massgen 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of massgen might be problematic. Click here for more details.
- massgen/__init__.py +1 -1
- massgen/api_params_handler/_chat_completions_api_params_handler.py +4 -0
- massgen/api_params_handler/_claude_api_params_handler.py +4 -0
- massgen/api_params_handler/_gemini_api_params_handler.py +4 -0
- massgen/api_params_handler/_response_api_params_handler.py +4 -0
- massgen/backend/base_with_custom_tool_and_mcp.py +25 -5
- massgen/backend/docs/permissions_and_context_files.md +2 -2
- massgen/backend/response.py +2 -0
- massgen/chat_agent.py +340 -20
- massgen/cli.py +326 -19
- massgen/configs/README.md +92 -41
- massgen/configs/memory/gpt5mini_gemini_baseline_research_to_implementation.yaml +94 -0
- massgen/configs/memory/gpt5mini_gemini_context_window_management.yaml +187 -0
- massgen/configs/memory/gpt5mini_gemini_research_to_implementation.yaml +127 -0
- massgen/configs/memory/gpt5mini_high_reasoning_gemini.yaml +107 -0
- massgen/configs/memory/single_agent_compression_test.yaml +64 -0
- massgen/configs/tools/custom_tools/crawl4ai_example.yaml +55 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml +61 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml +29 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml +51 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml +55 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml +47 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml +29 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +1 -1
- massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +1 -1
- massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +1 -1
- massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +1 -1
- massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +1 -1
- massgen/filesystem_manager/_filesystem_manager.py +1 -0
- massgen/filesystem_manager/_path_permission_manager.py +148 -0
- massgen/memory/README.md +277 -0
- massgen/memory/__init__.py +26 -0
- massgen/memory/_base.py +193 -0
- massgen/memory/_compression.py +237 -0
- massgen/memory/_context_monitor.py +211 -0
- massgen/memory/_conversation.py +255 -0
- massgen/memory/_fact_extraction_prompts.py +333 -0
- massgen/memory/_mem0_adapters.py +257 -0
- massgen/memory/_persistent.py +687 -0
- massgen/memory/docker-compose.qdrant.yml +36 -0
- massgen/memory/docs/DESIGN.md +388 -0
- massgen/memory/docs/QUICKSTART.md +409 -0
- massgen/memory/docs/SUMMARY.md +319 -0
- massgen/memory/docs/agent_use_memory.md +408 -0
- massgen/memory/docs/orchestrator_use_memory.md +586 -0
- massgen/memory/examples.py +237 -0
- massgen/message_templates.py +160 -12
- massgen/orchestrator.py +223 -7
- massgen/tests/memory/test_agent_compression.py +174 -0
- massgen/{configs/tools → tests}/memory/test_context_window_management.py +30 -30
- massgen/tests/memory/test_force_compression.py +154 -0
- massgen/tests/memory/test_simple_compression.py +147 -0
- massgen/tests/test_agent_memory.py +534 -0
- massgen/tests/test_binary_file_blocking.py +274 -0
- massgen/tests/test_case_studies.md +12 -12
- massgen/tests/test_conversation_memory.py +382 -0
- massgen/tests/test_multimodal_size_limits.py +407 -0
- massgen/tests/test_orchestrator_memory.py +620 -0
- massgen/tests/test_persistent_memory.py +435 -0
- massgen/token_manager/token_manager.py +6 -0
- massgen/tool/_manager.py +7 -2
- massgen/tool/_multimodal_tools/image_to_image_generation.py +293 -0
- massgen/tool/_multimodal_tools/text_to_file_generation.py +455 -0
- massgen/tool/_multimodal_tools/text_to_image_generation.py +222 -0
- massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py +226 -0
- massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py +217 -0
- massgen/tool/_multimodal_tools/text_to_video_generation.py +223 -0
- massgen/tool/_multimodal_tools/understand_audio.py +19 -1
- massgen/tool/_multimodal_tools/understand_file.py +6 -1
- massgen/tool/_multimodal_tools/understand_image.py +112 -8
- massgen/tool/_multimodal_tools/understand_video.py +32 -5
- massgen/tool/_web_tools/crawl4ai_tool.py +718 -0
- massgen/tool/docs/multimodal_tools.md +589 -0
- massgen/tools/__init__.py +8 -0
- massgen/tools/_planning_mcp_server.py +520 -0
- massgen/tools/planning_dataclasses.py +434 -0
- {massgen-0.1.3.dist-info → massgen-0.1.5.dist-info}/METADATA +142 -82
- {massgen-0.1.3.dist-info → massgen-0.1.5.dist-info}/RECORD +84 -41
- massgen/configs/tools/custom_tools/crawl4ai_mcp_example.yaml +0 -67
- massgen/configs/tools/custom_tools/crawl4ai_multi_agent_example.yaml +0 -68
- massgen/configs/tools/memory/README.md +0 -199
- massgen/configs/tools/memory/gpt5mini_gemini_context_window_management.yaml +0 -131
- massgen/configs/tools/memory/gpt5mini_gemini_no_persistent_memory.yaml +0 -133
- massgen/configs/tools/multimodal/gpt5mini_gpt5nano_documentation_evolution.yaml +0 -97
- {massgen-0.1.3.dist-info → massgen-0.1.5.dist-info}/WHEEL +0 -0
- {massgen-0.1.3.dist-info → massgen-0.1.5.dist-info}/entry_points.txt +0 -0
- {massgen-0.1.3.dist-info → massgen-0.1.5.dist-info}/licenses/LICENSE +0 -0
- {massgen-0.1.3.dist-info → massgen-0.1.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# MassGen Configuration: Text to File Generation Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml "Please generate a comprehensive business presentation about Artificial Intelligence in Healthcare for our upcoming board meeting. The presentation should include the following slides: 1) Title slide with presentation title and date, 2) Executive Summary highlighting key findings, 3) Market Overview showing the current AI healthcare market size and growth trends, 4) Technology Applications including AI in diagnostics, drug discovery, and patient care, 5) Case Studies showcasing 3-4 successful implementations with metrics, 6) Competitive Landscape analyzing major players and their solutions, 7) Implementation Roadmap with timeline and milestones, 8) ROI Analysis with projected costs and benefits, 9) Risk Assessment and mitigation strategies, 10) Recommendations and next steps. Please make it professional with approximately 15-20 slides, use clear bullet points, include suggested visual elements for each slide, and save it as a PPTX file with a modern business layout."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "text_to_file_generation_tool1"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-4o"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
enable_file_generation: true
|
|
11
|
+
custom_tools:
|
|
12
|
+
- name: ["text_to_file_generation"]
|
|
13
|
+
category: "multimodal"
|
|
14
|
+
path: "massgen/tool/_multimodal_tools/text_to_file_generation.py"
|
|
15
|
+
function: ["text_to_file_generation"]
|
|
16
|
+
- name: ["understand_file"]
|
|
17
|
+
category: "multimodal"
|
|
18
|
+
path: "massgen/tool/_multimodal_tools/understand_file.py"
|
|
19
|
+
function: ["understand_file"]
|
|
20
|
+
system_message: |
|
|
21
|
+
You are an AI assistant with access to text-to-file generation capabilities.
|
|
22
|
+
|
|
23
|
+
When generating PPTX presentations, format your content with:
|
|
24
|
+
- Use "# Title" or "## Title" for slide titles
|
|
25
|
+
- Use "---" to separate slides
|
|
26
|
+
- Use "- Item" for bullet points
|
|
27
|
+
- Use " - Subitem" for sub-bullets (two spaces indent)
|
|
28
|
+
- Structure content in a slide-friendly format with clear, concise points
|
|
29
|
+
|
|
30
|
+
- id: "text_to_file_generation_tool2"
|
|
31
|
+
backend:
|
|
32
|
+
type: "openai"
|
|
33
|
+
model: "gpt-4o"
|
|
34
|
+
cwd: "workspace2"
|
|
35
|
+
enable_file_generation: true
|
|
36
|
+
custom_tools:
|
|
37
|
+
- name: ["text_to_file_generation"]
|
|
38
|
+
category: "multimodal"
|
|
39
|
+
path: "massgen/tool/_multimodal_tools/text_to_file_generation.py"
|
|
40
|
+
function: ["text_to_file_generation"]
|
|
41
|
+
- name: ["understand_file"]
|
|
42
|
+
category: "multimodal"
|
|
43
|
+
path: "massgen/tool/_multimodal_tools/understand_file.py"
|
|
44
|
+
function: ["understand_file"]
|
|
45
|
+
system_message: |
|
|
46
|
+
You are an AI assistant with access to text-to-file generation capabilities.
|
|
47
|
+
|
|
48
|
+
When generating PPTX presentations, format your content with:
|
|
49
|
+
- Use "# Title" or "## Title" for slide titles
|
|
50
|
+
- Use "---" to separate slides
|
|
51
|
+
- Use "- Item" for bullet points
|
|
52
|
+
- Use " - Subitem" for sub-bullets (two spaces indent)
|
|
53
|
+
- Structure content in a slide-friendly format with clear, concise points
|
|
54
|
+
|
|
55
|
+
orchestrator:
|
|
56
|
+
snapshot_storage: "snapshots"
|
|
57
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
58
|
+
|
|
59
|
+
ui:
|
|
60
|
+
display_type: "rich_terminal"
|
|
61
|
+
logging_enabled: true
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# MassGen Configuration: Text to File Generation Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml "Please generate a comprehensive technical report about the latest developments in Large Language Models (LLMs) and Generative AI. The report should include the following sections: 1) Executive Summary, 2) Introduction to LLMs and their architecture, 3) Recent breakthroughs in 2024-2025, 4) Applications in industry including healthcare, finance, and education, 5) Ethical considerations and limitations, 6) Future directions and research opportunities. Please make the report approximately 10-15 pages long with proper citations and references, and save it as a PDF file with a professional layout."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "text_to_file_generation_tool"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-4o"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
enable_file_generation: true
|
|
11
|
+
custom_tools:
|
|
12
|
+
- name: ["text_to_file_generation"]
|
|
13
|
+
category: "multimodal"
|
|
14
|
+
path: "massgen/tool/_multimodal_tools/text_to_file_generation.py"
|
|
15
|
+
function: ["text_to_file_generation"]
|
|
16
|
+
- name: ["understand_file"]
|
|
17
|
+
category: "multimodal"
|
|
18
|
+
path: "massgen/tool/_multimodal_tools/understand_file.py"
|
|
19
|
+
function: ["understand_file"]
|
|
20
|
+
system_message: |
|
|
21
|
+
You are an AI assistant with access to text-to-file generation capabilities.
|
|
22
|
+
|
|
23
|
+
orchestrator:
|
|
24
|
+
snapshot_storage: "snapshots"
|
|
25
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
26
|
+
|
|
27
|
+
ui:
|
|
28
|
+
display_type: "simple"
|
|
29
|
+
logging_enabled: true
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# MassGen Configuration: Text to Image Generation Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml "Please generate an image of a cat in space."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "text_to_image_generation_tool1"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-4o"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
enable_image_generation: true
|
|
11
|
+
custom_tools:
|
|
12
|
+
- name: ["text_to_image_generation"]
|
|
13
|
+
category: "multimodal"
|
|
14
|
+
path: "massgen/tool/_multimodal_tools/text_to_image_generation.py"
|
|
15
|
+
function: ["text_to_image_generation"]
|
|
16
|
+
- name: ["understand_image"]
|
|
17
|
+
category: "multimodal"
|
|
18
|
+
path: "massgen/tool/_multimodal_tools/understand_image.py"
|
|
19
|
+
function: ["understand_image"]
|
|
20
|
+
- name: ["image_to_image_generation"]
|
|
21
|
+
category: "multimodal"
|
|
22
|
+
path: "massgen/tool/_multimodal_tools/image_to_image_generation.py"
|
|
23
|
+
function: ["image_to_image_generation"]
|
|
24
|
+
system_message: |
|
|
25
|
+
You are an AI assistant with access to text-to-image generation capabilities.
|
|
26
|
+
|
|
27
|
+
- id: "text_to_image_generation_tool2"
|
|
28
|
+
backend:
|
|
29
|
+
type: "openai"
|
|
30
|
+
model: "gpt-4o"
|
|
31
|
+
cwd: "workspace2"
|
|
32
|
+
enable_image_generation: true
|
|
33
|
+
custom_tools:
|
|
34
|
+
- name: ["text_to_image_generation"]
|
|
35
|
+
category: "multimodal"
|
|
36
|
+
path: "massgen/tool/_multimodal_tools/text_to_image_generation.py"
|
|
37
|
+
function: ["text_to_image_generation"]
|
|
38
|
+
- name: ["understand_image"]
|
|
39
|
+
category: "multimodal"
|
|
40
|
+
path: "massgen/tool/_multimodal_tools/understand_image.py"
|
|
41
|
+
function: ["understand_image"]
|
|
42
|
+
system_message: |
|
|
43
|
+
You are an AI assistant with access to text-to-image generation capabilities.
|
|
44
|
+
|
|
45
|
+
orchestrator:
|
|
46
|
+
snapshot_storage: "snapshots"
|
|
47
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
48
|
+
|
|
49
|
+
ui:
|
|
50
|
+
display_type: "rich_terminal"
|
|
51
|
+
logging_enabled: true
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# MassGen Configuration: Text to Image Generation Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml "Please generate an image of a cat in space."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "text_to_image_generation_tool"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-4o"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
enable_image_generation: true
|
|
11
|
+
custom_tools:
|
|
12
|
+
- name: ["text_to_image_generation"]
|
|
13
|
+
category: "multimodal"
|
|
14
|
+
path: "massgen/tool/_multimodal_tools/text_to_image_generation.py"
|
|
15
|
+
function: ["text_to_image_generation"]
|
|
16
|
+
- name: ["understand_image"]
|
|
17
|
+
category: "multimodal"
|
|
18
|
+
path: "massgen/tool/_multimodal_tools/understand_image.py"
|
|
19
|
+
function: ["understand_image"]
|
|
20
|
+
- name: ["image_to_image_generation"]
|
|
21
|
+
category: "multimodal"
|
|
22
|
+
path: "massgen/tool/_multimodal_tools/image_to_image_generation.py"
|
|
23
|
+
function: ["image_to_image_generation"]
|
|
24
|
+
system_message: |
|
|
25
|
+
You are an AI assistant with access to text-to-image generation capabilities.
|
|
26
|
+
|
|
27
|
+
orchestrator:
|
|
28
|
+
snapshot_storage: "snapshots"
|
|
29
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
30
|
+
|
|
31
|
+
ui:
|
|
32
|
+
display_type: "simple"
|
|
33
|
+
logging_enabled: true
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# MassGen Configuration: Text to Speech Continue Generation Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml "I want to you tell me a very short introduction about Sherlock Homes in one sentence, and I want you to use emotion voice to read it out loud."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "text_to_speech_continue_generation_tool1"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-4o"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
enable_audio_generation: true
|
|
11
|
+
custom_tools:
|
|
12
|
+
- name: ["text_to_speech_transcription_generation"]
|
|
13
|
+
category: "multimodal"
|
|
14
|
+
path: "massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py"
|
|
15
|
+
function: ["text_to_speech_transcription_generation"]
|
|
16
|
+
- name: ["understand_audio"]
|
|
17
|
+
category: "multimodal"
|
|
18
|
+
path: "massgen/tool/_multimodal_tools/understand_audio.py"
|
|
19
|
+
function: ["understand_audio"]
|
|
20
|
+
- name: ["text_to_speech_continue_generation"]
|
|
21
|
+
category: "multimodal"
|
|
22
|
+
path: "massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py"
|
|
23
|
+
function: ["text_to_speech_continue_generation"]
|
|
24
|
+
system_message: |
|
|
25
|
+
You are an AI assistant with access to text-to-speech generation capabilities.
|
|
26
|
+
|
|
27
|
+
- id: "text_to_speech_continue_generation_tool2"
|
|
28
|
+
backend:
|
|
29
|
+
type: "openai"
|
|
30
|
+
model: "gpt-4o"
|
|
31
|
+
cwd: "workspace2"
|
|
32
|
+
enable_audio_generation: true
|
|
33
|
+
custom_tools:
|
|
34
|
+
- name: ["text_to_speech_transcription_generation"]
|
|
35
|
+
category: "multimodal"
|
|
36
|
+
path: "massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py"
|
|
37
|
+
function: ["text_to_speech_transcription_generation"]
|
|
38
|
+
- name: ["understand_audio"]
|
|
39
|
+
category: "multimodal"
|
|
40
|
+
path: "massgen/tool/_multimodal_tools/understand_audio.py"
|
|
41
|
+
function: ["understand_audio"]
|
|
42
|
+
- name: ["text_to_speech_continue_generation"]
|
|
43
|
+
category: "multimodal"
|
|
44
|
+
path: "massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py"
|
|
45
|
+
function: ["text_to_speech_continue_generation"]
|
|
46
|
+
system_message: |
|
|
47
|
+
You are an AI assistant with access to text-to-speech generation capabilities.
|
|
48
|
+
|
|
49
|
+
orchestrator:
|
|
50
|
+
snapshot_storage: "snapshots"
|
|
51
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
52
|
+
|
|
53
|
+
ui:
|
|
54
|
+
display_type: "rich_terminal"
|
|
55
|
+
logging_enabled: true
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# MassGen Configuration: Text to Speech Continue Generation Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml "I want to you tell me a very short introduction about Sherlock Homes in one sentence, and I want you to use emotion voice to read it out loud."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "text_to_speech_continue_generation_tool"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-4o"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
enable_audio_generation: true
|
|
11
|
+
custom_tools:
|
|
12
|
+
- name: ["text_to_speech_transcription_generation"]
|
|
13
|
+
category: "multimodal"
|
|
14
|
+
path: "massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py"
|
|
15
|
+
function: ["text_to_speech_transcription_generation"]
|
|
16
|
+
- name: ["understand_audio"]
|
|
17
|
+
category: "multimodal"
|
|
18
|
+
path: "massgen/tool/_multimodal_tools/understand_audio.py"
|
|
19
|
+
function: ["understand_audio"]
|
|
20
|
+
- name: ["text_to_speech_continue_generation"]
|
|
21
|
+
category: "multimodal"
|
|
22
|
+
path: "massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py"
|
|
23
|
+
function: ["text_to_speech_continue_generation"]
|
|
24
|
+
system_message: |
|
|
25
|
+
You are an AI assistant with access to text-to-speech generation capabilities.
|
|
26
|
+
|
|
27
|
+
orchestrator:
|
|
28
|
+
snapshot_storage: "snapshots"
|
|
29
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
30
|
+
|
|
31
|
+
ui:
|
|
32
|
+
display_type: "simple"
|
|
33
|
+
logging_enabled: true
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# MassGen Configuration: Text to Video Generation Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml "Generate a 4 seconds video with neon-lit alley at night, light rain, slow push-in, cinematic."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "text_to_video_generation_tool1"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-4o"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
enable_video_generation: true
|
|
11
|
+
custom_tools:
|
|
12
|
+
- name: ["understand_video"]
|
|
13
|
+
category: "multimodal"
|
|
14
|
+
path: "massgen/tool/_multimodal_tools/understand_video.py"
|
|
15
|
+
function: ["understand_video"]
|
|
16
|
+
- name: ["text_to_video_generation"]
|
|
17
|
+
category: "multimodal"
|
|
18
|
+
path: "massgen/tool/_multimodal_tools/text_to_video_generation.py"
|
|
19
|
+
function: ["text_to_video_generation"]
|
|
20
|
+
system_message: |
|
|
21
|
+
You are an AI assistant with access to text-to-video generation capabilities.
|
|
22
|
+
|
|
23
|
+
- id: "text_to_video_generation_tool2"
|
|
24
|
+
backend:
|
|
25
|
+
type: "openai"
|
|
26
|
+
model: "gpt-4o"
|
|
27
|
+
cwd: "workspace2"
|
|
28
|
+
enable_video_generation: true
|
|
29
|
+
custom_tools:
|
|
30
|
+
- name: ["understand_video"]
|
|
31
|
+
category: "multimodal"
|
|
32
|
+
path: "massgen/tool/_multimodal_tools/understand_video.py"
|
|
33
|
+
function: ["understand_video"]
|
|
34
|
+
- name: ["text_to_video_generation"]
|
|
35
|
+
category: "multimodal"
|
|
36
|
+
path: "massgen/tool/_multimodal_tools/text_to_video_generation.py"
|
|
37
|
+
function: ["text_to_video_generation"]
|
|
38
|
+
system_message: |
|
|
39
|
+
You are an AI assistant with access to text-to-video generation capabilities.
|
|
40
|
+
|
|
41
|
+
orchestrator:
|
|
42
|
+
snapshot_storage: "snapshots"
|
|
43
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
44
|
+
|
|
45
|
+
ui:
|
|
46
|
+
display_type: "rich_terminal"
|
|
47
|
+
logging_enabled: true
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# MassGen Configuration: Text to Video Generation Tool
|
|
2
|
+
# Usage:
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml "Generate a 4 seconds video with neon-lit alley at night, light rain, slow push-in, cinematic."
|
|
4
|
+
agents:
|
|
5
|
+
- id: "text_to_video_generation_tool"
|
|
6
|
+
backend:
|
|
7
|
+
type: "openai"
|
|
8
|
+
model: "gpt-4o"
|
|
9
|
+
cwd: "workspace1"
|
|
10
|
+
enable_video_generation: true
|
|
11
|
+
custom_tools:
|
|
12
|
+
- name: ["understand_video"]
|
|
13
|
+
category: "multimodal"
|
|
14
|
+
path: "massgen/tool/_multimodal_tools/understand_video.py"
|
|
15
|
+
function: ["understand_video"]
|
|
16
|
+
- name: ["text_to_video_generation"]
|
|
17
|
+
category: "multimodal"
|
|
18
|
+
path: "massgen/tool/_multimodal_tools/text_to_video_generation.py"
|
|
19
|
+
function: ["text_to_video_generation"]
|
|
20
|
+
system_message: |
|
|
21
|
+
You are an AI assistant with access to text-to-video generation capabilities.
|
|
22
|
+
|
|
23
|
+
orchestrator:
|
|
24
|
+
snapshot_storage: "snapshots"
|
|
25
|
+
agent_temporary_workspace: "temp_workspaces"
|
|
26
|
+
|
|
27
|
+
ui:
|
|
28
|
+
display_type: "simple"
|
|
29
|
+
logging_enabled: true
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# MassGen Configuration: Understand Audio Tool
|
|
2
2
|
# Usage:
|
|
3
|
-
# massgen --config massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml "Please summarize the content in this audio."
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml "Please summarize the content in this audio."
|
|
4
4
|
agents:
|
|
5
5
|
- id: "understand_audio_tool"
|
|
6
6
|
backend:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# MassGen Configuration: Understand File Tool
|
|
2
2
|
# Usage:
|
|
3
|
-
# massgen --config massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml "Please summarize the content in this file."
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml "Please summarize the content in this file."
|
|
4
4
|
agents:
|
|
5
5
|
- id: "understand_file_tool"
|
|
6
6
|
backend:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# MassGen Configuration: Understand Image Tool
|
|
2
2
|
# Usage:
|
|
3
|
-
# massgen --config massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml "Please summarize the content in this image."
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml "Please summarize the content in this image."
|
|
4
4
|
agents:
|
|
5
5
|
- id: "understand_image_tool"
|
|
6
6
|
backend:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# MassGen Configuration: Understand Video Tool
|
|
2
2
|
# Usage:
|
|
3
|
-
# massgen --config massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml "What's happening in this video?"
|
|
3
|
+
# uv run python -m massgen.cli --config massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml "What's happening in this video?"
|
|
4
4
|
agents:
|
|
5
5
|
- id: "understand_video_tool"
|
|
6
6
|
backend:
|
|
@@ -90,6 +90,68 @@ class PathPermissionManager:
|
|
|
90
90
|
"massgen_logs",
|
|
91
91
|
]
|
|
92
92
|
|
|
93
|
+
# Binary file extensions that should not be read by text-based tools
|
|
94
|
+
# These files should be handled by specialized tools (understand_image, understand_video, etc.)
|
|
95
|
+
BINARY_FILE_EXTENSIONS = {
|
|
96
|
+
# Images
|
|
97
|
+
".jpg",
|
|
98
|
+
".jpeg",
|
|
99
|
+
".png",
|
|
100
|
+
".gif",
|
|
101
|
+
".bmp",
|
|
102
|
+
".ico",
|
|
103
|
+
".svg",
|
|
104
|
+
".webp",
|
|
105
|
+
".tiff",
|
|
106
|
+
".tif",
|
|
107
|
+
# Videos
|
|
108
|
+
".mp4",
|
|
109
|
+
".avi",
|
|
110
|
+
".mov",
|
|
111
|
+
".mkv",
|
|
112
|
+
".flv",
|
|
113
|
+
".wmv",
|
|
114
|
+
".webm",
|
|
115
|
+
".m4v",
|
|
116
|
+
".mpg",
|
|
117
|
+
".mpeg",
|
|
118
|
+
# Audio
|
|
119
|
+
".mp3",
|
|
120
|
+
".wav",
|
|
121
|
+
".ogg",
|
|
122
|
+
".flac",
|
|
123
|
+
".aac",
|
|
124
|
+
".m4a",
|
|
125
|
+
".wma",
|
|
126
|
+
# Archives
|
|
127
|
+
".zip",
|
|
128
|
+
".tar",
|
|
129
|
+
".gz",
|
|
130
|
+
".bz2",
|
|
131
|
+
".7z",
|
|
132
|
+
".rar",
|
|
133
|
+
".xz",
|
|
134
|
+
# Executables and binaries
|
|
135
|
+
".exe",
|
|
136
|
+
".bin",
|
|
137
|
+
".dll",
|
|
138
|
+
".so",
|
|
139
|
+
".dylib",
|
|
140
|
+
".o",
|
|
141
|
+
".a",
|
|
142
|
+
".pyc",
|
|
143
|
+
".class",
|
|
144
|
+
".jar",
|
|
145
|
+
# Office documents (binary formats - use understand_file tool)
|
|
146
|
+
".doc", # Old Word (not supported by understand_file)
|
|
147
|
+
".xls", # Old Excel (not supported by understand_file)
|
|
148
|
+
".ppt", # Old PowerPoint (not supported by understand_file)
|
|
149
|
+
".pdf", # PDF (supported by understand_file with PyPDF2)
|
|
150
|
+
".docx", # Word (supported by understand_file with python-docx)
|
|
151
|
+
".xlsx", # Excel (supported by understand_file with openpyxl)
|
|
152
|
+
".pptx", # PowerPoint (supported by understand_file with python-pptx)
|
|
153
|
+
}
|
|
154
|
+
|
|
93
155
|
def __init__(
|
|
94
156
|
self,
|
|
95
157
|
context_write_access_enabled: bool = False,
|
|
@@ -440,6 +502,12 @@ class PathPermissionManager:
|
|
|
440
502
|
- allowed: Whether the tool call should proceed
|
|
441
503
|
- reason: Explanation if blocked (None if allowed)
|
|
442
504
|
"""
|
|
505
|
+
# Check if read tool is trying to read binary files (images, videos, etc.)
|
|
506
|
+
if self._is_text_read_tool(tool_name):
|
|
507
|
+
binary_check_result = self._validate_binary_file_access(tool_name, tool_args)
|
|
508
|
+
if not binary_check_result[0]:
|
|
509
|
+
return binary_check_result
|
|
510
|
+
|
|
443
511
|
# Track read operations for read-before-delete enforcement
|
|
444
512
|
if self._is_read_tool(tool_name):
|
|
445
513
|
self._track_read_operation(tool_name, tool_args)
|
|
@@ -495,6 +563,33 @@ class PathPermissionManager:
|
|
|
495
563
|
|
|
496
564
|
return False
|
|
497
565
|
|
|
566
|
+
def _is_text_read_tool(self, tool_name: str) -> bool:
|
|
567
|
+
"""
|
|
568
|
+
Check if a tool is a text-based read operation that should not access binary files.
|
|
569
|
+
|
|
570
|
+
These tools are designed for reading text files and should be blocked from
|
|
571
|
+
reading binary files (images, videos, audio, etc.) to prevent context pollution.
|
|
572
|
+
|
|
573
|
+
Tools that read text file contents:
|
|
574
|
+
- Read: Claude Code read tool
|
|
575
|
+
- read_text_file: MCP filesystem read tool
|
|
576
|
+
- read_file: Generic read operations
|
|
577
|
+
"""
|
|
578
|
+
# Use lowercase for case-insensitive matching
|
|
579
|
+
tool_lower = tool_name.lower()
|
|
580
|
+
|
|
581
|
+
# Check if tool name contains any text read operation keywords
|
|
582
|
+
text_read_keywords = [
|
|
583
|
+
"read_text_file", # MCP filesystem: read_text_file
|
|
584
|
+
"read_file", # Generic read operations
|
|
585
|
+
]
|
|
586
|
+
|
|
587
|
+
# Also check for exact "Read" match (Claude Code tool)
|
|
588
|
+
if tool_name == "Read":
|
|
589
|
+
return True
|
|
590
|
+
|
|
591
|
+
return any(keyword in tool_lower for keyword in text_read_keywords)
|
|
592
|
+
|
|
498
593
|
def _is_read_tool(self, tool_name: str) -> bool:
|
|
499
594
|
"""
|
|
500
595
|
Check if a tool is a read operation that should be tracked.
|
|
@@ -518,6 +613,59 @@ class PathPermissionManager:
|
|
|
518
613
|
|
|
519
614
|
return any(keyword in tool_lower for keyword in read_keywords)
|
|
520
615
|
|
|
616
|
+
def _validate_binary_file_access(self, tool_name: str, tool_args: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
|
|
617
|
+
"""
|
|
618
|
+
Validate that text-based read tools are not trying to read binary files.
|
|
619
|
+
|
|
620
|
+
Binary files (images, videos, audio, etc.) should be handled by specialized tools
|
|
621
|
+
to prevent context pollution with binary data.
|
|
622
|
+
|
|
623
|
+
Args:
|
|
624
|
+
tool_name: Name of the tool being called
|
|
625
|
+
tool_args: Arguments passed to the tool
|
|
626
|
+
|
|
627
|
+
Returns:
|
|
628
|
+
Tuple of (allowed: bool, reason: Optional[str])
|
|
629
|
+
- allowed: False if trying to read binary file, True otherwise
|
|
630
|
+
- reason: Explanation if blocked (None if allowed)
|
|
631
|
+
"""
|
|
632
|
+
# Extract file path from arguments
|
|
633
|
+
file_path = self._extract_file_path(tool_args)
|
|
634
|
+
if not file_path:
|
|
635
|
+
# Can't determine path - allow (tool may not access files)
|
|
636
|
+
return (True, None)
|
|
637
|
+
|
|
638
|
+
# Resolve path
|
|
639
|
+
try:
|
|
640
|
+
file_path_str = self._resolve_path_against_workspace(file_path)
|
|
641
|
+
path = Path(file_path_str)
|
|
642
|
+
except Exception:
|
|
643
|
+
# If path resolution fails, allow (will fail elsewhere if invalid)
|
|
644
|
+
return (True, None)
|
|
645
|
+
|
|
646
|
+
# Check file extension
|
|
647
|
+
file_extension = path.suffix.lower()
|
|
648
|
+
if file_extension in self.BINARY_FILE_EXTENSIONS:
|
|
649
|
+
# Determine appropriate tool suggestion based on file type
|
|
650
|
+
if file_extension in {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".ico", ".svg", ".webp", ".tiff", ".tif"}:
|
|
651
|
+
suggestion = "For images, use understand_image tool"
|
|
652
|
+
elif file_extension in {".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv", ".webm", ".m4v", ".mpg", ".mpeg"}:
|
|
653
|
+
suggestion = "For videos, use understand_video tool"
|
|
654
|
+
elif file_extension in {".mp3", ".wav", ".ogg", ".flac", ".aac", ".m4a", ".wma"}:
|
|
655
|
+
suggestion = "For audio files, use generate_text_with_input_audio tool"
|
|
656
|
+
elif file_extension in {".pdf"}:
|
|
657
|
+
suggestion = "For PDF files, use understand_file tool"
|
|
658
|
+
elif file_extension in {".docx", ".xlsx", ".pptx"}:
|
|
659
|
+
suggestion = "For Office documents, use understand_file tool"
|
|
660
|
+
else:
|
|
661
|
+
suggestion = "Use appropriate specialized tool for this file type"
|
|
662
|
+
|
|
663
|
+
reason = f"Cannot read binary file '{path.name}' with {tool_name}. {suggestion}."
|
|
664
|
+
logger.warning(f"[PathPermissionManager] Blocked {tool_name} from reading binary file: {path}")
|
|
665
|
+
return (False, reason)
|
|
666
|
+
|
|
667
|
+
return (True, None)
|
|
668
|
+
|
|
521
669
|
def _is_delete_tool(self, tool_name: str) -> bool:
|
|
522
670
|
"""
|
|
523
671
|
Check if a tool is a delete operation.
|