massgen 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of massgen might be problematic. Click here for more details.
- massgen/__init__.py +1 -1
- massgen/agent_config.py +33 -7
- massgen/api_params_handler/_api_params_handler_base.py +3 -0
- massgen/api_params_handler/_chat_completions_api_params_handler.py +4 -0
- massgen/api_params_handler/_claude_api_params_handler.py +4 -0
- massgen/api_params_handler/_gemini_api_params_handler.py +4 -0
- massgen/api_params_handler/_response_api_params_handler.py +4 -0
- massgen/backend/azure_openai.py +9 -1
- massgen/backend/base.py +4 -0
- massgen/backend/base_with_custom_tool_and_mcp.py +25 -5
- massgen/backend/claude_code.py +9 -1
- massgen/backend/docs/permissions_and_context_files.md +2 -2
- massgen/backend/gemini.py +35 -6
- massgen/backend/gemini_utils.py +30 -0
- massgen/backend/response.py +2 -0
- massgen/chat_agent.py +9 -3
- massgen/cli.py +291 -43
- massgen/config_builder.py +163 -18
- massgen/configs/README.md +69 -14
- massgen/configs/debug/restart_test_controlled.yaml +60 -0
- massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
- massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
- massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
- massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
- massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
- massgen/configs/tools/custom_tools/crawl4ai_example.yaml +55 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml +61 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml +29 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml +51 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml +55 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml +47 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml +29 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
- massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
- massgen/docker/README.md +83 -0
- massgen/filesystem_manager/_code_execution_server.py +22 -7
- massgen/filesystem_manager/_docker_manager.py +21 -1
- massgen/filesystem_manager/_filesystem_manager.py +9 -0
- massgen/filesystem_manager/_path_permission_manager.py +148 -0
- massgen/filesystem_manager/_workspace_tools_server.py +0 -997
- massgen/formatter/_gemini_formatter.py +73 -0
- massgen/frontend/coordination_ui.py +175 -257
- massgen/frontend/displays/base_display.py +29 -0
- massgen/frontend/displays/rich_terminal_display.py +155 -9
- massgen/frontend/displays/simple_display.py +21 -0
- massgen/frontend/displays/terminal_display.py +22 -2
- massgen/logger_config.py +50 -6
- massgen/message_templates.py +283 -15
- massgen/orchestrator.py +335 -38
- massgen/tests/test_binary_file_blocking.py +274 -0
- massgen/tests/test_case_studies.md +12 -12
- massgen/tests/test_code_execution.py +178 -0
- massgen/tests/test_multimodal_size_limits.py +407 -0
- massgen/tests/test_orchestration_restart.py +204 -0
- massgen/tool/__init__.py +4 -0
- massgen/tool/_manager.py +7 -2
- massgen/tool/_multimodal_tools/image_to_image_generation.py +293 -0
- massgen/tool/_multimodal_tools/text_to_file_generation.py +455 -0
- massgen/tool/_multimodal_tools/text_to_image_generation.py +222 -0
- massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py +226 -0
- massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py +217 -0
- massgen/tool/_multimodal_tools/text_to_video_generation.py +223 -0
- massgen/tool/_multimodal_tools/understand_audio.py +211 -0
- massgen/tool/_multimodal_tools/understand_file.py +555 -0
- massgen/tool/_multimodal_tools/understand_image.py +316 -0
- massgen/tool/_multimodal_tools/understand_video.py +340 -0
- massgen/tool/_web_tools/crawl4ai_tool.py +718 -0
- massgen/tool/docs/multimodal_tools.md +1368 -0
- massgen/tool/workflow_toolkits/__init__.py +26 -0
- massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
- massgen/utils.py +1 -0
- {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/METADATA +101 -69
- {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/RECORD +82 -46
- {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/WHEEL +0 -0
- {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/entry_points.txt +0 -0
- {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# MassGen Configuration: Computer Use - Docker Environment
|
|
2
|
+
#
|
|
3
|
+
# This configuration uses a Docker container for OS-level automation.
|
|
4
|
+
#
|
|
5
|
+
# Usage:
|
|
6
|
+
# massgen --config @examples/tools/custom_tools/computer_use_docker_example "Open calculator and compute 123 + 456"
|
|
7
|
+
#
|
|
8
|
+
# Prerequisites:
|
|
9
|
+
# 1. Set OPENAI_API_KEY in your .env file
|
|
10
|
+
# 2. Have Docker installed and running
|
|
11
|
+
# 3. Start the Docker container (see README for instructions)
|
|
12
|
+
|
|
13
|
+
agents:
|
|
14
|
+
- id: "docker_automation_agent"
|
|
15
|
+
backend:
|
|
16
|
+
type: "openai"
|
|
17
|
+
model: "computer-use-preview"
|
|
18
|
+
custom_tools:
|
|
19
|
+
- name: ["computer_use"]
|
|
20
|
+
category: "automation"
|
|
21
|
+
path: "massgen/tool/_computer_use/computer_use_tool.py"
|
|
22
|
+
function: ["computer_use"]
|
|
23
|
+
# Default parameters for Docker environment
|
|
24
|
+
default_params:
|
|
25
|
+
environment: "ubuntu"
|
|
26
|
+
display_width: 1280
|
|
27
|
+
display_height: 800
|
|
28
|
+
max_iterations: 25
|
|
29
|
+
include_reasoning: true
|
|
30
|
+
environment_config:
|
|
31
|
+
container_name: "cua-container"
|
|
32
|
+
display: ":99"
|
|
33
|
+
|
|
34
|
+
system_message: |
|
|
35
|
+
You are a computer automation specialist with access to the computer_use tool.
|
|
36
|
+
|
|
37
|
+
You can control a virtual Ubuntu environment running in Docker to:
|
|
38
|
+
- Launch and use desktop applications
|
|
39
|
+
- Perform file operations
|
|
40
|
+
- Execute system-level tasks
|
|
41
|
+
- Automate GUI interactions
|
|
42
|
+
|
|
43
|
+
The environment includes:
|
|
44
|
+
- Ubuntu 22.04 desktop (Xfce)
|
|
45
|
+
- Firefox browser
|
|
46
|
+
- Standard desktop applications
|
|
47
|
+
- X11 display system
|
|
48
|
+
|
|
49
|
+
When using the computer_use tool:
|
|
50
|
+
1. Tasks are executed in a sandboxed Docker container
|
|
51
|
+
2. You have full desktop access via xdotool commands
|
|
52
|
+
3. Screenshots are captured after each action
|
|
53
|
+
4. Be specific about coordinates when clicking
|
|
54
|
+
|
|
55
|
+
Best practices:
|
|
56
|
+
- Allow time for applications to launch
|
|
57
|
+
- Use wait actions between steps
|
|
58
|
+
- Verify GUI elements are visible before clicking
|
|
59
|
+
- Consider screen resolution (1280x800)
|
|
60
|
+
|
|
61
|
+
ui:
|
|
62
|
+
display_type: "detailed"
|
|
63
|
+
logging_enabled: true
|
|
64
|
+
show_screenshots: true
|
|
65
|
+
show_reasoning: true
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# MassGen Configuration: Computer Use Tool Example
|
|
2
|
+
#
|
|
3
|
+
# This configuration demonstrates how to use the computer_use tool for automating
|
|
4
|
+
# browser and computer interactions using OpenAI's computer-use-preview model.
|
|
5
|
+
#
|
|
6
|
+
# Usage:
|
|
7
|
+
# massgen --config @examples/tools/custom_tools/computer_use_example "Search for Python documentation on Google"
|
|
8
|
+
#
|
|
9
|
+
# Prerequisites:
|
|
10
|
+
# 1. Set OPENAI_API_KEY in your .env file
|
|
11
|
+
# 2. For browser environment: pip install playwright && playwright install
|
|
12
|
+
# 3. For Docker environment: Have Docker installed and running
|
|
13
|
+
|
|
14
|
+
agents:
|
|
15
|
+
- id: "computer_use_agent"
|
|
16
|
+
backend:
|
|
17
|
+
type: "openai"
|
|
18
|
+
model: "gpt-4.1" # You can also use "computer-use-preview" for the main model
|
|
19
|
+
custom_tools:
|
|
20
|
+
- name: ["computer_use"]
|
|
21
|
+
category: "automation"
|
|
22
|
+
path: "massgen/tool/_computer_use/computer_use_tool.py"
|
|
23
|
+
function: ["computer_use"]
|
|
24
|
+
|
|
25
|
+
system_message: |
|
|
26
|
+
You are an AI assistant with access to computer automation capabilities.
|
|
27
|
+
|
|
28
|
+
The computer_use tool is available to you. This tool allows you to:
|
|
29
|
+
- Control a web browser (click, type, scroll, etc.)
|
|
30
|
+
- Automate computer tasks
|
|
31
|
+
- Search the web, fill forms, navigate websites
|
|
32
|
+
- Perform multi-step workflows
|
|
33
|
+
|
|
34
|
+
When a user asks you to perform a task that requires browser or computer interaction,
|
|
35
|
+
use the computer_use tool with a clear task description.
|
|
36
|
+
|
|
37
|
+
Important:
|
|
38
|
+
- Always provide clear, specific task descriptions
|
|
39
|
+
- The tool will execute actions step-by-step
|
|
40
|
+
- You will receive screenshots and action logs
|
|
41
|
+
- Safety checks may be triggered - acknowledge them when appropriate
|
|
42
|
+
|
|
43
|
+
Example usage:
|
|
44
|
+
- "Search for the latest AI news on Google"
|
|
45
|
+
- "Navigate to example.com and fill out the contact form"
|
|
46
|
+
- "Find Python documentation and save the URL"
|
|
47
|
+
|
|
48
|
+
ui:
|
|
49
|
+
display_type: "simple"
|
|
50
|
+
logging_enabled: true
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# MassGen Configuration: Crawl4AI Web Scraping via Custom Tools
|
|
2
|
+
#
|
|
3
|
+
# Prerequisites:
|
|
4
|
+
# 1. Start crawl4ai Docker container (one-time setup):
|
|
5
|
+
# docker pull unclecode/crawl4ai:latest
|
|
6
|
+
# docker run -d -p 11235:11235 --name crawl4ai --shm-size=1g unclecode/crawl4ai:latest
|
|
7
|
+
#
|
|
8
|
+
# 2. Verify container is running:
|
|
9
|
+
# docker ps | grep crawl4ai
|
|
10
|
+
#
|
|
11
|
+
# 3. Test REST API endpoint (optional):
|
|
12
|
+
# curl -X POST http://localhost:11235/md -H "Content-Type: application/json" -d '{"url": "https://example.com", "f": "fit"}'
|
|
13
|
+
#
|
|
14
|
+
# Usage:
|
|
15
|
+
# massgen --config @examples/configs/tools/custom_tools/crawl4ai_example.yaml "Please search for the MassGen docs, take a screenshot of the website, and explain that screenshot"
|
|
16
|
+
#
|
|
17
|
+
# Available Tools (via Custom Tools):
|
|
18
|
+
# - crawl4ai_md: Generate markdown from web content
|
|
19
|
+
# - crawl4ai_html: Extract preprocessed HTML
|
|
20
|
+
# - crawl4ai_screenshot: Capture webpage screenshots
|
|
21
|
+
# - crawl4ai_pdf: Generate PDF documents
|
|
22
|
+
# - crawl4ai_execute_js: Run JavaScript on web pages
|
|
23
|
+
# - crawl4ai_crawl: Perform multi-URL crawling
|
|
24
|
+
# - crawl4ai_ask: Query the Crawl4AI library context
|
|
25
|
+
#
|
|
26
|
+
# Note: Multiple agents can use these tools concurrently.
|
|
27
|
+
# The server handles up to 5 concurrent crawls by default.
|
|
28
|
+
|
|
29
|
+
agents:
|
|
30
|
+
- id: "web_scraper_agent"
|
|
31
|
+
backend:
|
|
32
|
+
type: "openai" # Works with any backend: openai, gemini, claude_code, etc.
|
|
33
|
+
model: "gpt-5-mini"
|
|
34
|
+
cwd: "workspace1"
|
|
35
|
+
|
|
36
|
+
# Register crawl4ai custom tools
|
|
37
|
+
custom_tools:
|
|
38
|
+
- name: ["crawl4ai_md", "crawl4ai_html", "crawl4ai_screenshot", "crawl4ai_pdf", "crawl4ai_execute_js", "crawl4ai_crawl"]
|
|
39
|
+
category: "web_scraping"
|
|
40
|
+
path: "massgen/tool/_web_tools/crawl4ai_tool.py"
|
|
41
|
+
function: ["crawl4ai_md", "crawl4ai_html", "crawl4ai_screenshot", "crawl4ai_pdf", "crawl4ai_execute_js", "crawl4ai_crawl"]
|
|
42
|
+
- name: ["understand_image"]
|
|
43
|
+
category: "multimodal"
|
|
44
|
+
path: "massgen/tool/_multimodal_tools/understand_image.py"
|
|
45
|
+
function: ["understand_image"]
|
|
46
|
+
|
|
47
|
+
orchestrator:
|
|
48
|
+
snapshot_storage: "snapshots"
|
|
49
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
50
|
+
coordination:
|
|
51
|
+
max_orchestration_restarts: 2 # Default: 0 (allows 3 total attempts: initial + 2 restarts)
|
|
52
|
+
|
|
53
|
+
ui:
|
|
54
|
+
display_type: "rich_terminal"
|
|
55
|
+
logging_enabled: true
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# MassGen Configuration: Text to File Generation Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml "Please generate a comprehensive business presentation about Artificial Intelligence in Healthcare for our upcoming board meeting. The presentation should include the following slides: 1) Title slide with presentation title and date, 2) Executive Summary highlighting key findings, 3) Market Overview showing the current AI healthcare market size and growth trends, 4) Technology Applications including AI in diagnostics, drug discovery, and patient care, 5) Case Studies showcasing 3-4 successful implementations with metrics, 6) Competitive Landscape analyzing major players and their solutions, 7) Implementation Roadmap with timeline and milestones, 8) ROI Analysis with projected costs and benefits, 9) Risk Assessment and mitigation strategies, 10) Recommendations and next steps. Please make it professional with approximately 15-20 slides, use clear bullet points, include suggested visual elements for each slide, and save it as a PPTX file with a modern business layout."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "text_to_file_generation_tool1"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-4o"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
enable_file_generation: true
|
|
11
|
+
custom_tools:
|
|
12
|
+
- name: ["text_to_file_generation"]
|
|
13
|
+
category: "multimodal"
|
|
14
|
+
path: "massgen/tool/_multimodal_tools/text_to_file_generation.py"
|
|
15
|
+
function: ["text_to_file_generation"]
|
|
16
|
+
- name: ["understand_file"]
|
|
17
|
+
category: "multimodal"
|
|
18
|
+
path: "massgen/tool/_multimodal_tools/understand_file.py"
|
|
19
|
+
function: ["understand_file"]
|
|
20
|
+
system_message: |
|
|
21
|
+
You are an AI assistant with access to text-to-file generation capabilities.
|
|
22
|
+
|
|
23
|
+
When generating PPTX presentations, format your content with:
|
|
24
|
+
- Use "# Title" or "## Title" for slide titles
|
|
25
|
+
- Use "---" to separate slides
|
|
26
|
+
- Use "- Item" for bullet points
|
|
27
|
+
- Use " - Subitem" for sub-bullets (two spaces indent)
|
|
28
|
+
- Structure content in a slide-friendly format with clear, concise points
|
|
29
|
+
|
|
30
|
+
- id: "text_to_file_generation_tool2"
|
|
31
|
+
backend:
|
|
32
|
+
type: "openai"
|
|
33
|
+
model: "gpt-4o"
|
|
34
|
+
cwd: "workspace2"
|
|
35
|
+
enable_file_generation: true
|
|
36
|
+
custom_tools:
|
|
37
|
+
- name: ["text_to_file_generation"]
|
|
38
|
+
category: "multimodal"
|
|
39
|
+
path: "massgen/tool/_multimodal_tools/text_to_file_generation.py"
|
|
40
|
+
function: ["text_to_file_generation"]
|
|
41
|
+
- name: ["understand_file"]
|
|
42
|
+
category: "multimodal"
|
|
43
|
+
path: "massgen/tool/_multimodal_tools/understand_file.py"
|
|
44
|
+
function: ["understand_file"]
|
|
45
|
+
system_message: |
|
|
46
|
+
You are an AI assistant with access to text-to-file generation capabilities.
|
|
47
|
+
|
|
48
|
+
When generating PPTX presentations, format your content with:
|
|
49
|
+
- Use "# Title" or "## Title" for slide titles
|
|
50
|
+
- Use "---" to separate slides
|
|
51
|
+
- Use "- Item" for bullet points
|
|
52
|
+
- Use " - Subitem" for sub-bullets (two spaces indent)
|
|
53
|
+
- Structure content in a slide-friendly format with clear, concise points
|
|
54
|
+
|
|
55
|
+
orchestrator:
|
|
56
|
+
snapshot_storage: "snapshots"
|
|
57
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
58
|
+
|
|
59
|
+
ui:
|
|
60
|
+
display_type: "rich_terminal"
|
|
61
|
+
logging_enabled: true
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# MassGen Configuration: Text to File Generation Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml "Please generate a comprehensive technical report about the latest developments in Large Language Models (LLMs) and Generative AI. The report should include the following sections: 1) Executive Summary, 2) Introduction to LLMs and their architecture, 3) Recent breakthroughs in 2024-2025, 4) Applications in industry including healthcare, finance, and education, 5) Ethical considerations and limitations, 6) Future directions and research opportunities. Please make the report approximately 10-15 pages long with proper citations and references, and save it as a PDF file with a professional layout."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "text_to_file_generation_tool"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-4o"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
enable_file_generation: true
|
|
11
|
+
custom_tools:
|
|
12
|
+
- name: ["text_to_file_generation"]
|
|
13
|
+
category: "multimodal"
|
|
14
|
+
path: "massgen/tool/_multimodal_tools/text_to_file_generation.py"
|
|
15
|
+
function: ["text_to_file_generation"]
|
|
16
|
+
- name: ["understand_file"]
|
|
17
|
+
category: "multimodal"
|
|
18
|
+
path: "massgen/tool/_multimodal_tools/understand_file.py"
|
|
19
|
+
function: ["understand_file"]
|
|
20
|
+
system_message: |
|
|
21
|
+
You are an AI assistant with access to text-to-file generation capabilities.
|
|
22
|
+
|
|
23
|
+
orchestrator:
|
|
24
|
+
snapshot_storage: "snapshots"
|
|
25
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
26
|
+
|
|
27
|
+
ui:
|
|
28
|
+
display_type: "simple"
|
|
29
|
+
logging_enabled: true
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# MassGen Configuration: Text to Image Generation Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml "Please generate an image of a cat in space."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "text_to_image_generation_tool1"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-4o"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
enable_image_generation: true
|
|
11
|
+
custom_tools:
|
|
12
|
+
- name: ["text_to_image_generation"]
|
|
13
|
+
category: "multimodal"
|
|
14
|
+
path: "massgen/tool/_multimodal_tools/text_to_image_generation.py"
|
|
15
|
+
function: ["text_to_image_generation"]
|
|
16
|
+
- name: ["understand_image"]
|
|
17
|
+
category: "multimodal"
|
|
18
|
+
path: "massgen/tool/_multimodal_tools/understand_image.py"
|
|
19
|
+
function: ["understand_image"]
|
|
20
|
+
- name: ["image_to_image_generation"]
|
|
21
|
+
category: "multimodal"
|
|
22
|
+
path: "massgen/tool/_multimodal_tools/image_to_image_generation.py"
|
|
23
|
+
function: ["image_to_image_generation"]
|
|
24
|
+
system_message: |
|
|
25
|
+
You are an AI assistant with access to text-to-image generation capabilities.
|
|
26
|
+
|
|
27
|
+
- id: "text_to_image_generation_tool2"
|
|
28
|
+
backend:
|
|
29
|
+
type: "openai"
|
|
30
|
+
model: "gpt-4o"
|
|
31
|
+
cwd: "workspace2"
|
|
32
|
+
enable_image_generation: true
|
|
33
|
+
custom_tools:
|
|
34
|
+
- name: ["text_to_image_generation"]
|
|
35
|
+
category: "multimodal"
|
|
36
|
+
path: "massgen/tool/_multimodal_tools/text_to_image_generation.py"
|
|
37
|
+
function: ["text_to_image_generation"]
|
|
38
|
+
- name: ["understand_image"]
|
|
39
|
+
category: "multimodal"
|
|
40
|
+
path: "massgen/tool/_multimodal_tools/understand_image.py"
|
|
41
|
+
function: ["understand_image"]
|
|
42
|
+
system_message: |
|
|
43
|
+
You are an AI assistant with access to text-to-image generation capabilities.
|
|
44
|
+
|
|
45
|
+
orchestrator:
|
|
46
|
+
snapshot_storage: "snapshots"
|
|
47
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
48
|
+
|
|
49
|
+
ui:
|
|
50
|
+
display_type: "rich_terminal"
|
|
51
|
+
logging_enabled: true
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# MassGen Configuration: Text to Image Generation Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml "Please generate an image of a cat in space."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "text_to_image_generation_tool"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-4o"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
enable_image_generation: true
|
|
11
|
+
custom_tools:
|
|
12
|
+
- name: ["text_to_image_generation"]
|
|
13
|
+
category: "multimodal"
|
|
14
|
+
path: "massgen/tool/_multimodal_tools/text_to_image_generation.py"
|
|
15
|
+
function: ["text_to_image_generation"]
|
|
16
|
+
- name: ["understand_image"]
|
|
17
|
+
category: "multimodal"
|
|
18
|
+
path: "massgen/tool/_multimodal_tools/understand_image.py"
|
|
19
|
+
function: ["understand_image"]
|
|
20
|
+
- name: ["image_to_image_generation"]
|
|
21
|
+
category: "multimodal"
|
|
22
|
+
path: "massgen/tool/_multimodal_tools/image_to_image_generation.py"
|
|
23
|
+
function: ["image_to_image_generation"]
|
|
24
|
+
system_message: |
|
|
25
|
+
You are an AI assistant with access to text-to-image generation capabilities.
|
|
26
|
+
|
|
27
|
+
orchestrator:
|
|
28
|
+
snapshot_storage: "snapshots"
|
|
29
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
30
|
+
|
|
31
|
+
ui:
|
|
32
|
+
display_type: "simple"
|
|
33
|
+
logging_enabled: true
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# MassGen Configuration: Text to Speech Continue Generation Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml "I want to you tell me a very short introduction about Sherlock Homes in one sentence, and I want you to use emotion voice to read it out loud."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "text_to_speech_continue_generation_tool1"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-4o"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
enable_audio_generation: true
|
|
11
|
+
custom_tools:
|
|
12
|
+
- name: ["text_to_speech_transcription_generation"]
|
|
13
|
+
category: "multimodal"
|
|
14
|
+
path: "massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py"
|
|
15
|
+
function: ["text_to_speech_transcription_generation"]
|
|
16
|
+
- name: ["understand_audio"]
|
|
17
|
+
category: "multimodal"
|
|
18
|
+
path: "massgen/tool/_multimodal_tools/understand_audio.py"
|
|
19
|
+
function: ["understand_audio"]
|
|
20
|
+
- name: ["text_to_speech_continue_generation"]
|
|
21
|
+
category: "multimodal"
|
|
22
|
+
path: "massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py"
|
|
23
|
+
function: ["text_to_speech_continue_generation"]
|
|
24
|
+
system_message: |
|
|
25
|
+
You are an AI assistant with access to text-to-speech generation capabilities.
|
|
26
|
+
|
|
27
|
+
- id: "text_to_speech_continue_generation_tool2"
|
|
28
|
+
backend:
|
|
29
|
+
type: "openai"
|
|
30
|
+
model: "gpt-4o"
|
|
31
|
+
cwd: "workspace2"
|
|
32
|
+
enable_audio_generation: true
|
|
33
|
+
custom_tools:
|
|
34
|
+
- name: ["text_to_speech_transcription_generation"]
|
|
35
|
+
category: "multimodal"
|
|
36
|
+
path: "massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py"
|
|
37
|
+
function: ["text_to_speech_transcription_generation"]
|
|
38
|
+
- name: ["understand_audio"]
|
|
39
|
+
category: "multimodal"
|
|
40
|
+
path: "massgen/tool/_multimodal_tools/understand_audio.py"
|
|
41
|
+
function: ["understand_audio"]
|
|
42
|
+
- name: ["text_to_speech_continue_generation"]
|
|
43
|
+
category: "multimodal"
|
|
44
|
+
path: "massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py"
|
|
45
|
+
function: ["text_to_speech_continue_generation"]
|
|
46
|
+
system_message: |
|
|
47
|
+
You are an AI assistant with access to text-to-speech generation capabilities.
|
|
48
|
+
|
|
49
|
+
orchestrator:
|
|
50
|
+
snapshot_storage: "snapshots"
|
|
51
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
52
|
+
|
|
53
|
+
ui:
|
|
54
|
+
display_type: "rich_terminal"
|
|
55
|
+
logging_enabled: true
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# MassGen Configuration: Text to Speech Continue Generation Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml "I want to you tell me a very short introduction about Sherlock Homes in one sentence, and I want you to use emotion voice to read it out loud."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "text_to_speech_continue_generation_tool"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-4o"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
enable_audio_generation: true
|
|
11
|
+
custom_tools:
|
|
12
|
+
- name: ["text_to_speech_transcription_generation"]
|
|
13
|
+
category: "multimodal"
|
|
14
|
+
path: "massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py"
|
|
15
|
+
function: ["text_to_speech_transcription_generation"]
|
|
16
|
+
- name: ["understand_audio"]
|
|
17
|
+
category: "multimodal"
|
|
18
|
+
path: "massgen/tool/_multimodal_tools/understand_audio.py"
|
|
19
|
+
function: ["understand_audio"]
|
|
20
|
+
- name: ["text_to_speech_continue_generation"]
|
|
21
|
+
category: "multimodal"
|
|
22
|
+
path: "massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py"
|
|
23
|
+
function: ["text_to_speech_continue_generation"]
|
|
24
|
+
system_message: |
|
|
25
|
+
You are an AI assistant with access to text-to-speech generation capabilities.
|
|
26
|
+
|
|
27
|
+
orchestrator:
|
|
28
|
+
snapshot_storage: "snapshots"
|
|
29
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
30
|
+
|
|
31
|
+
ui:
|
|
32
|
+
display_type: "simple"
|
|
33
|
+
logging_enabled: true
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# MassGen Configuration: Text to Video Generation Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml "Generate a 4 seconds video with neon-lit alley at night, light rain, slow push-in, cinematic."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "text_to_video_generation_tool1"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-4o"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
enable_video_generation: true
|
|
11
|
+
custom_tools:
|
|
12
|
+
- name: ["understand_video"]
|
|
13
|
+
category: "multimodal"
|
|
14
|
+
path: "massgen/tool/_multimodal_tools/understand_video.py"
|
|
15
|
+
function: ["understand_video"]
|
|
16
|
+
- name: ["text_to_video_generation"]
|
|
17
|
+
category: "multimodal"
|
|
18
|
+
path: "massgen/tool/_multimodal_tools/text_to_video_generation.py"
|
|
19
|
+
function: ["text_to_video_generation"]
|
|
20
|
+
system_message: |
|
|
21
|
+
You are an AI assistant with access to text-to-video generation capabilities.
|
|
22
|
+
|
|
23
|
+
- id: "text_to_video_generation_tool2"
|
|
24
|
+
backend:
|
|
25
|
+
type: "openai"
|
|
26
|
+
model: "gpt-4o"
|
|
27
|
+
cwd: "workspace2"
|
|
28
|
+
enable_video_generation: true
|
|
29
|
+
custom_tools:
|
|
30
|
+
- name: ["understand_video"]
|
|
31
|
+
category: "multimodal"
|
|
32
|
+
path: "massgen/tool/_multimodal_tools/understand_video.py"
|
|
33
|
+
function: ["understand_video"]
|
|
34
|
+
- name: ["text_to_video_generation"]
|
|
35
|
+
category: "multimodal"
|
|
36
|
+
path: "massgen/tool/_multimodal_tools/text_to_video_generation.py"
|
|
37
|
+
function: ["text_to_video_generation"]
|
|
38
|
+
system_message: |
|
|
39
|
+
You are an AI assistant with access to text-to-video generation capabilities.
|
|
40
|
+
|
|
41
|
+
orchestrator:
|
|
42
|
+
snapshot_storage: "snapshots"
|
|
43
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
44
|
+
|
|
45
|
+
ui:
|
|
46
|
+
display_type: "rich_terminal"
|
|
47
|
+
logging_enabled: true
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# MassGen Configuration: Text to Video Generation Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml "Generate a 4 seconds video with neon-lit alley at night, light rain, slow push-in, cinematic."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "text_to_video_generation_tool"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-4o"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
enable_video_generation: true
|
|
11
|
+
custom_tools:
|
|
12
|
+
- name: ["understand_video"]
|
|
13
|
+
category: "multimodal"
|
|
14
|
+
path: "massgen/tool/_multimodal_tools/understand_video.py"
|
|
15
|
+
function: ["understand_video"]
|
|
16
|
+
- name: ["text_to_video_generation"]
|
|
17
|
+
category: "multimodal"
|
|
18
|
+
path: "massgen/tool/_multimodal_tools/text_to_video_generation.py"
|
|
19
|
+
function: ["text_to_video_generation"]
|
|
20
|
+
system_message: |
|
|
21
|
+
You are an AI assistant with access to text-to-video generation capabilities.
|
|
22
|
+
|
|
23
|
+
orchestrator:
|
|
24
|
+
snapshot_storage: "snapshots"
|
|
25
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
26
|
+
|
|
27
|
+
ui:
|
|
28
|
+
display_type: "simple"
|
|
29
|
+
logging_enabled: true
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# MassGen Configuration: Understand Audio Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml "Please summarize the content in this audio."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "understand_audio_tool"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-5-nano"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
custom_tools:
|
|
11
|
+
- name: ["understand_audio"]
|
|
12
|
+
category: "multimodal"
|
|
13
|
+
path: "massgen/tool/_multimodal_tools/understand_audio.py"
|
|
14
|
+
function: ["understand_audio"]
|
|
15
|
+
system_message: |
|
|
16
|
+
You are an AI assistant with access to audio transcription capabilities.
|
|
17
|
+
|
|
18
|
+
The understand_audio tool is available to transcribe audio files to text using OpenAI's Transcription API.
|
|
19
|
+
Do not output tool call syntax or function declarations. Focus on answering the user's question clearly.
|
|
20
|
+
|
|
21
|
+
When users ask about transcribing or understanding audio files, use the understand_audio tool to
|
|
22
|
+
process the audio and provide the transcription.
|
|
23
|
+
|
|
24
|
+
orchestrator:
|
|
25
|
+
snapshot_storage: "snapshots"
|
|
26
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
27
|
+
context_paths:
|
|
28
|
+
- path: "massgen/configs/resources/v0.1.3-example/Sherlock_Holmes.mp3"
|
|
29
|
+
permission: "read"
|
|
30
|
+
|
|
31
|
+
ui:
|
|
32
|
+
display_type: "rich_terminal"
|
|
33
|
+
logging_enabled: true
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# MassGen Configuration: Understand File Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml "Please summarize the content in this file."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "understand_file_tool"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-5-nano"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
custom_tools:
|
|
11
|
+
- name: ["understand_file"]
|
|
12
|
+
category: "multimodal"
|
|
13
|
+
path: "massgen/tool/_multimodal_tools/understand_file.py"
|
|
14
|
+
function: ["understand_file"]
|
|
15
|
+
system_message: |
|
|
16
|
+
You are an AI assistant with access to file understanding capabilities.
|
|
17
|
+
|
|
18
|
+
The understand_file tool is available to analyze and understand file contents using OpenAI's gpt-4.1 API.
|
|
19
|
+
It supports text files, PDF, DOCX, XLSX, PPTX, and more.
|
|
20
|
+
Do not output tool call syntax or function declarations. Focus on answering the user's question clearly.
|
|
21
|
+
|
|
22
|
+
When users ask about analyzing or understanding files, use the understand_file tool to process
|
|
23
|
+
the file and provide detailed descriptions or answers to their questions.
|
|
24
|
+
|
|
25
|
+
orchestrator:
|
|
26
|
+
snapshot_storage: "snapshots"
|
|
27
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
28
|
+
context_paths:
|
|
29
|
+
- path: "massgen/configs/resources/v0.1.3-example/TUMIX.pdf"
|
|
30
|
+
permission: "read"
|
|
31
|
+
|
|
32
|
+
ui:
|
|
33
|
+
display_type: "rich_terminal"
|
|
34
|
+
logging_enabled: true
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# MassGen Configuration: Understand Image Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml "Please summarize the content in this image."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "understand_image_tool"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-5-nano"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
custom_tools:
|
|
11
|
+
- name: ["understand_image"]
|
|
12
|
+
category: "multimodal"
|
|
13
|
+
path: "massgen/tool/_multimodal_tools/understand_image.py"
|
|
14
|
+
function: ["understand_image"]
|
|
15
|
+
system_message: |
|
|
16
|
+
You are an AI assistant with access to image understanding capabilities.
|
|
17
|
+
|
|
18
|
+
The understand_image tool is available to analyze and understand images using OpenAI's gpt-4.1 API.
|
|
19
|
+
Do not output tool call syntax or function declarations. Focus on answering the user's question clearly.
|
|
20
|
+
|
|
21
|
+
When users ask about analyzing or understanding images, use the understand_image tool to process
|
|
22
|
+
the image and provide detailed descriptions or answers to their questions.
|
|
23
|
+
|
|
24
|
+
orchestrator:
|
|
25
|
+
snapshot_storage: "snapshots"
|
|
26
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
27
|
+
context_paths:
|
|
28
|
+
- path: "massgen/configs/resources/v0.1.3-example/multimodality.jpg"
|
|
29
|
+
permission: "read"
|
|
30
|
+
|
|
31
|
+
ui:
|
|
32
|
+
display_type: "rich_terminal"
|
|
33
|
+
logging_enabled: true
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# MassGen Configuration: Understand Video Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml "What's happening in this video?"
|
|
4
|
+
agents:
|
|
5
|
+
- id: "understand_video_tool"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-5-nano"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
custom_tools:
|
|
11
|
+
- name: ["understand_video"]
|
|
12
|
+
category: "multimodal"
|
|
13
|
+
path: "massgen/tool/_multimodal_tools/understand_video.py"
|
|
14
|
+
function: ["understand_video"]
|
|
15
|
+
system_message: |
|
|
16
|
+
You are an AI assistant with access to video understanding capabilities.
|
|
17
|
+
|
|
18
|
+
The understand_video tool is available to analyze and understand videos by extracting key frames
|
|
19
|
+
and using OpenAI's gpt-4.1 API.
|
|
20
|
+
Do not output tool call syntax or function declarations. Focus on answering the user's question clearly.
|
|
21
|
+
|
|
22
|
+
When users ask about analyzing or understanding videos, use the understand_video tool to process
|
|
23
|
+
the video and provide detailed descriptions or answers to their questions.
|
|
24
|
+
|
|
25
|
+
orchestrator:
|
|
26
|
+
snapshot_storage: "snapshots"
|
|
27
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
28
|
+
context_paths:
|
|
29
|
+
- path: "massgen/configs/resources/v0.1.3-example/oppenheimer_trailer_1920.mp4"
|
|
30
|
+
permission: "read"
|
|
31
|
+
|
|
32
|
+
ui:
|
|
33
|
+
display_type: "rich_terminal"
|
|
34
|
+
logging_enabled: true
|