massgen 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of massgen might be problematic. Click here for more details.

Files changed (82) hide show
  1. massgen/__init__.py +1 -1
  2. massgen/agent_config.py +33 -7
  3. massgen/api_params_handler/_api_params_handler_base.py +3 -0
  4. massgen/api_params_handler/_chat_completions_api_params_handler.py +4 -0
  5. massgen/api_params_handler/_claude_api_params_handler.py +4 -0
  6. massgen/api_params_handler/_gemini_api_params_handler.py +4 -0
  7. massgen/api_params_handler/_response_api_params_handler.py +4 -0
  8. massgen/backend/azure_openai.py +9 -1
  9. massgen/backend/base.py +4 -0
  10. massgen/backend/base_with_custom_tool_and_mcp.py +25 -5
  11. massgen/backend/claude_code.py +9 -1
  12. massgen/backend/docs/permissions_and_context_files.md +2 -2
  13. massgen/backend/gemini.py +35 -6
  14. massgen/backend/gemini_utils.py +30 -0
  15. massgen/backend/response.py +2 -0
  16. massgen/chat_agent.py +9 -3
  17. massgen/cli.py +291 -43
  18. massgen/config_builder.py +163 -18
  19. massgen/configs/README.md +69 -14
  20. massgen/configs/debug/restart_test_controlled.yaml +60 -0
  21. massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
  22. massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
  23. massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
  24. massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
  25. massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
  26. massgen/configs/tools/custom_tools/crawl4ai_example.yaml +55 -0
  27. massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml +61 -0
  28. massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml +29 -0
  29. massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml +51 -0
  30. massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml +33 -0
  31. massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml +55 -0
  32. massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml +33 -0
  33. massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml +47 -0
  34. massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml +29 -0
  35. massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
  36. massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
  37. massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
  38. massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
  39. massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
  40. massgen/docker/README.md +83 -0
  41. massgen/filesystem_manager/_code_execution_server.py +22 -7
  42. massgen/filesystem_manager/_docker_manager.py +21 -1
  43. massgen/filesystem_manager/_filesystem_manager.py +9 -0
  44. massgen/filesystem_manager/_path_permission_manager.py +148 -0
  45. massgen/filesystem_manager/_workspace_tools_server.py +0 -997
  46. massgen/formatter/_gemini_formatter.py +73 -0
  47. massgen/frontend/coordination_ui.py +175 -257
  48. massgen/frontend/displays/base_display.py +29 -0
  49. massgen/frontend/displays/rich_terminal_display.py +155 -9
  50. massgen/frontend/displays/simple_display.py +21 -0
  51. massgen/frontend/displays/terminal_display.py +22 -2
  52. massgen/logger_config.py +50 -6
  53. massgen/message_templates.py +283 -15
  54. massgen/orchestrator.py +335 -38
  55. massgen/tests/test_binary_file_blocking.py +274 -0
  56. massgen/tests/test_case_studies.md +12 -12
  57. massgen/tests/test_code_execution.py +178 -0
  58. massgen/tests/test_multimodal_size_limits.py +407 -0
  59. massgen/tests/test_orchestration_restart.py +204 -0
  60. massgen/tool/__init__.py +4 -0
  61. massgen/tool/_manager.py +7 -2
  62. massgen/tool/_multimodal_tools/image_to_image_generation.py +293 -0
  63. massgen/tool/_multimodal_tools/text_to_file_generation.py +455 -0
  64. massgen/tool/_multimodal_tools/text_to_image_generation.py +222 -0
  65. massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py +226 -0
  66. massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py +217 -0
  67. massgen/tool/_multimodal_tools/text_to_video_generation.py +223 -0
  68. massgen/tool/_multimodal_tools/understand_audio.py +211 -0
  69. massgen/tool/_multimodal_tools/understand_file.py +555 -0
  70. massgen/tool/_multimodal_tools/understand_image.py +316 -0
  71. massgen/tool/_multimodal_tools/understand_video.py +340 -0
  72. massgen/tool/_web_tools/crawl4ai_tool.py +718 -0
  73. massgen/tool/docs/multimodal_tools.md +1368 -0
  74. massgen/tool/workflow_toolkits/__init__.py +26 -0
  75. massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
  76. massgen/utils.py +1 -0
  77. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/METADATA +101 -69
  78. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/RECORD +82 -46
  79. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/WHEEL +0 -0
  80. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/entry_points.txt +0 -0
  81. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/licenses/LICENSE +0 -0
  82. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,274 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Tests for binary file blocking in PathPermissionManager.
5
+
6
+ These tests ensure that text-based read tools (Read, read_text_file, etc.)
7
+ are blocked from reading binary files (images, videos, audio, etc.) to prevent
8
+ context pollution with binary data.
9
+ """
10
+
11
+ from pathlib import Path
12
+
13
+ import pytest
14
+
15
+ from massgen.filesystem_manager._base import Permission
16
+ from massgen.filesystem_manager._path_permission_manager import PathPermissionManager
17
+
18
+
19
+ @pytest.fixture
20
+ def permission_manager():
21
+ """Create a PathPermissionManager instance for testing."""
22
+ manager = PathPermissionManager(
23
+ context_write_access_enabled=False,
24
+ enforce_read_before_delete=True,
25
+ )
26
+ # Add a workspace path for testing
27
+ test_workspace = Path("/tmp/test_workspace").resolve()
28
+ manager.add_path(test_workspace, Permission.WRITE, "workspace")
29
+ return manager
30
+
31
+
32
+ class TestBinaryFileBlocking:
33
+ """Test suite for binary file blocking functionality."""
34
+
35
+ @pytest.mark.asyncio
36
+ async def test_block_read_image_with_read_tool(self, permission_manager):
37
+ """Test that Read tool is blocked from reading image files."""
38
+ tool_name = "Read"
39
+ tool_args = {"file_path": "/tmp/test_workspace/photo.jpg"}
40
+
41
+ allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
42
+
43
+ assert not allowed, "Read should be blocked from reading .jpg files"
44
+ assert reason is not None
45
+ assert "understand_image" in reason.lower()
46
+ assert "photo.jpg" in reason
47
+
48
+ @pytest.mark.asyncio
49
+ async def test_block_read_text_file_image(self, permission_manager):
50
+ """Test that read_text_file (MCP) is blocked from reading image files."""
51
+ tool_name = "mcp__filesystem__read_text_file"
52
+ tool_args = {"path": "/tmp/test_workspace/diagram.png"}
53
+
54
+ allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
55
+
56
+ assert not allowed, "read_text_file should be blocked from reading .png files"
57
+ assert reason is not None
58
+ assert "understand_image" in reason.lower()
59
+
60
+ @pytest.mark.asyncio
61
+ async def test_block_read_video(self, permission_manager):
62
+ """Test that Read tool is blocked from reading video files."""
63
+ tool_name = "Read"
64
+ tool_args = {"file_path": "/tmp/test_workspace/demo.mp4"}
65
+
66
+ allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
67
+
68
+ assert not allowed, "Read should be blocked from reading .mp4 files"
69
+ assert reason is not None
70
+ assert "understand_video" in reason.lower()
71
+
72
+ @pytest.mark.asyncio
73
+ async def test_block_read_audio(self, permission_manager):
74
+ """Test that Read tool is blocked from reading audio files."""
75
+ tool_name = "Read"
76
+ tool_args = {"file_path": "/tmp/test_workspace/recording.mp3"}
77
+
78
+ allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
79
+
80
+ assert not allowed, "Read should be blocked from reading .mp3 files"
81
+ assert reason is not None
82
+ assert "audio" in reason.lower()
83
+
84
+ @pytest.mark.asyncio
85
+ async def test_allow_read_text_file(self, permission_manager):
86
+ """Test that Read tool is allowed to read text files."""
87
+ tool_name = "Read"
88
+ tool_args = {"file_path": "/tmp/test_workspace/document.txt"}
89
+
90
+ allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
91
+
92
+ assert allowed, "Read should be allowed to read .txt files"
93
+ assert reason is None
94
+
95
+ @pytest.mark.asyncio
96
+ async def test_allow_read_code_file(self, permission_manager):
97
+ """Test that Read tool is allowed to read code files."""
98
+ test_cases = [
99
+ "script.py",
100
+ "app.js",
101
+ "component.tsx",
102
+ "main.go",
103
+ "app.rs",
104
+ ]
105
+
106
+ for filename in test_cases:
107
+ tool_name = "Read"
108
+ tool_args = {"file_path": f"/tmp/test_workspace/{filename}"}
109
+
110
+ allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
111
+
112
+ assert allowed, f"Read should be allowed to read {filename}"
113
+ assert reason is None
114
+
115
+ @pytest.mark.asyncio
116
+ async def test_block_all_image_formats(self, permission_manager):
117
+ """Test that all image formats are blocked."""
118
+ image_extensions = [".jpg", ".jpeg", ".png", ".gif", ".bmp", ".ico", ".svg", ".webp", ".tiff", ".tif"]
119
+
120
+ for ext in image_extensions:
121
+ tool_name = "Read"
122
+ tool_args = {"file_path": f"/tmp/test_workspace/image{ext}"}
123
+
124
+ allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
125
+
126
+ assert not allowed, f"Read should be blocked from reading {ext} files"
127
+ assert reason is not None
128
+
129
+ @pytest.mark.asyncio
130
+ async def test_block_all_video_formats(self, permission_manager):
131
+ """Test that all video formats are blocked."""
132
+ video_extensions = [".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv", ".webm", ".m4v", ".mpg", ".mpeg"]
133
+
134
+ for ext in video_extensions:
135
+ tool_name = "Read"
136
+ tool_args = {"file_path": f"/tmp/test_workspace/video{ext}"}
137
+
138
+ allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
139
+
140
+ assert not allowed, f"Read should be blocked from reading {ext} files"
141
+ assert reason is not None
142
+
143
+ @pytest.mark.asyncio
144
+ async def test_block_all_audio_formats(self, permission_manager):
145
+ """Test that all audio formats are blocked."""
146
+ audio_extensions = [".mp3", ".wav", ".ogg", ".flac", ".aac", ".m4a", ".wma"]
147
+
148
+ for ext in audio_extensions:
149
+ tool_name = "Read"
150
+ tool_args = {"file_path": f"/tmp/test_workspace/audio{ext}"}
151
+
152
+ allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
153
+
154
+ assert not allowed, f"Read should be blocked from reading {ext} files"
155
+ assert reason is not None
156
+
157
+ @pytest.mark.asyncio
158
+ async def test_block_archive_formats(self, permission_manager):
159
+ """Test that archive formats are blocked."""
160
+ archive_extensions = [".zip", ".tar", ".gz", ".bz2", ".7z", ".rar", ".xz"]
161
+
162
+ for ext in archive_extensions:
163
+ tool_name = "Read"
164
+ tool_args = {"file_path": f"/tmp/test_workspace/archive{ext}"}
165
+
166
+ allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
167
+
168
+ assert not allowed, f"Read should be blocked from reading {ext} files"
169
+ assert reason is not None
170
+
171
+ @pytest.mark.asyncio
172
+ async def test_block_executable_formats(self, permission_manager):
173
+ """Test that executable/binary formats are blocked."""
174
+ binary_extensions = [".exe", ".bin", ".dll", ".so", ".dylib", ".o", ".a", ".pyc", ".class", ".jar"]
175
+
176
+ for ext in binary_extensions:
177
+ tool_name = "Read"
178
+ tool_args = {"file_path": f"/tmp/test_workspace/binary{ext}"}
179
+
180
+ allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
181
+
182
+ assert not allowed, f"Read should be blocked from reading {ext} files"
183
+ assert reason is not None
184
+
185
+ @pytest.mark.asyncio
186
+ async def test_block_old_office_formats(self, permission_manager):
187
+ """Test that old Office formats are blocked (use understand_file instead)."""
188
+ old_office_extensions = [".doc", ".xls", ".ppt"]
189
+
190
+ for ext in old_office_extensions:
191
+ tool_name = "Read"
192
+ tool_args = {"file_path": f"/tmp/test_workspace/document{ext}"}
193
+
194
+ allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
195
+
196
+ assert not allowed, f"Read should be blocked from reading {ext} files"
197
+ assert reason is not None
198
+
199
+ @pytest.mark.asyncio
200
+ async def test_block_office_formats(self, permission_manager):
201
+ """Test that Office document formats are blocked from Read (must use understand_file).
202
+
203
+ These are binary formats that should be handled by understand_file tool,
204
+ which can properly extract text from them using specialized libraries.
205
+ """
206
+ office_extensions = [".pdf", ".docx", ".xlsx", ".pptx"]
207
+
208
+ for ext in office_extensions:
209
+ tool_name = "Read"
210
+ tool_args = {"file_path": f"/tmp/test_workspace/document{ext}"}
211
+
212
+ allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
213
+
214
+ assert not allowed, f"Read should be blocked from reading {ext} files (use understand_file)"
215
+ assert reason is not None
216
+ assert "understand_file" in reason.lower()
217
+
218
+ @pytest.mark.asyncio
219
+ async def test_case_insensitive_extension_check(self, permission_manager):
220
+ """Test that extension checking is case-insensitive."""
221
+ test_cases = [
222
+ "/tmp/test_workspace/PHOTO.JPG",
223
+ "/tmp/test_workspace/Video.MP4",
224
+ "/tmp/test_workspace/Audio.MP3",
225
+ "/tmp/test_workspace/Image.PNG",
226
+ ]
227
+
228
+ for file_path in test_cases:
229
+ tool_name = "Read"
230
+ tool_args = {"file_path": file_path}
231
+
232
+ allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
233
+
234
+ assert not allowed, f"Read should be blocked from reading {file_path} (case-insensitive)"
235
+ assert reason is not None
236
+
237
+ @pytest.mark.asyncio
238
+ async def test_non_text_read_tools_not_affected(self, permission_manager):
239
+ """Test that non-text-read tools are not affected by binary file blocking."""
240
+ # Tools like Write, Edit, Delete should not be affected
241
+ test_cases = [
242
+ ("Write", {"file_path": "/tmp/test_workspace/image.jpg"}),
243
+ ("Edit", {"file_path": "/tmp/test_workspace/video.mp4"}),
244
+ ("Grep", {"pattern": "test"}), # No file_path, should pass
245
+ ]
246
+
247
+ for tool_name, tool_args in test_cases:
248
+ allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
249
+
250
+ # These tools have their own validation, but shouldn't be blocked by binary check
251
+ # (they may be blocked for other reasons like permissions)
252
+ # The key is that _validate_binary_file_access is not called for these
253
+ assert isinstance(allowed, bool) # Should complete without binary file error
254
+
255
+ @pytest.mark.asyncio
256
+ async def test_helpful_error_messages(self, permission_manager):
257
+ """Test that error messages provide helpful suggestions for blocked binary files."""
258
+ test_cases = [
259
+ (".jpg", "understand_image"),
260
+ (".mp4", "understand_video"),
261
+ (".mp3", "audio"),
262
+ (".pdf", "understand_file"),
263
+ (".docx", "understand_file"),
264
+ ]
265
+
266
+ for ext, expected_suggestion in test_cases:
267
+ tool_name = "Read"
268
+ tool_args = {"file_path": f"/tmp/test_workspace/file{ext}"}
269
+
270
+ allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
271
+
272
+ assert not allowed, f"File with {ext} extension should be blocked"
273
+ assert reason is not None
274
+ assert expected_suggestion.lower() in reason.lower(), f"Error message should suggest {expected_suggestion} for {ext} files"
@@ -1,12 +1,12 @@
1
1
  # MassGen Case Study Test Commands
2
2
 
3
- This document contains commands to test all the case studies from `docs/case_studies/` using the three agents default configuration.
3
+ This document contains commands to test all the case studies from `docs/source/examples/case_studies/` using the three agents default configuration.
4
4
 
5
5
  ## Quick Commands
6
6
 
7
7
  All tests use the `three_agents_default.yaml` configuration with:
8
8
  - **Gemini 2.5 Flash** (web search enabled)
9
- - **GPT-4o-mini** (web search + code interpreter)
9
+ - **GPT-4o-mini** (web search + code interpreter)
10
10
  - **Grok 3 mini** (web search with citations)
11
11
 
12
12
  ### 1. Collaborative Creative Writing
@@ -17,8 +17,8 @@ python massgen/cli.py --config massgen/configs/three_agents_default.yaml "Write
17
17
  # From tests directory:
18
18
  python ../cli.py --config ../configs/three_agents_default.yaml "Write a short story about a robot who discovers music."
19
19
  ```
20
- **Original:** gpt-4o, gemini-2.5-flash, grok-3-mini
21
- **Current:** gemini2.5flash, 4omini, grok3mini with builtin tools
20
+ **Original:** gpt-4o, gemini-2.5-flash, grok-3-mini
21
+ **Current:** gemini2.5flash, 4omini, grok3mini with builtin tools
22
22
 
23
23
  ### 2. AI News Synthesis
24
24
  ```bash
@@ -28,8 +28,8 @@ python massgen/cli.py --config massgen/configs/three_agents_default.yaml "find b
28
28
  # From tests directory:
29
29
  python ../cli.py --config ../configs/three_agents_default.yaml "find big AI news this week"
30
30
  ```
31
- **Original:** gpt-4.1, gemini-2.5-flash, grok-3-mini
32
- **Current:** gemini2.5flash, 4omini, grok3mini with web search
31
+ **Original:** gpt-4.1, gemini-2.5-flash, grok-3-mini
32
+ **Current:** gemini2.5flash, 4omini, grok3mini with web search
33
33
 
34
34
  ### 3. Grok HLE Cost Estimation
35
35
  ```bash
@@ -39,8 +39,8 @@ python massgen/cli.py --config massgen/configs/three_agents_default.yaml "How mu
39
39
  # From tests directory:
40
40
  python ../cli.py --config ../configs/three_agents_default.yaml "How much does it cost to run HLE benchmark with Grok-4"
41
41
  ```
42
- **Original:** gpt-4o, gemini-2.5-flash, grok-3-mini
43
- **Current:** gemini2.5flash, 4omini, grok3mini with web search
42
+ **Original:** gpt-4o, gemini-2.5-flash, grok-3-mini
43
+ **Current:** gemini2.5flash, 4omini, grok3mini with web search
44
44
 
45
45
  ### 4. IMO 2025 Winner
46
46
  ```bash
@@ -50,8 +50,8 @@ python massgen/cli.py --config massgen/configs/three_agents_default.yaml "Which
50
50
  # From tests directory:
51
51
  python ../cli.py --config ../configs/three_agents_default.yaml "Which AI won IMO 2025?"
52
52
  ```
53
- **Original:** gemini-2.5-flash, gpt-4.1 (2 agents)
54
- **Current:** gemini2.5flash, 4omini, grok3mini (3 agents with web search)
53
+ **Original:** gemini-2.5-flash, gpt-4.1 (2 agents)
54
+ **Current:** gemini2.5flash, 4omini, grok3mini (3 agents with web search)
55
55
 
56
56
  ### 5. Stockholm Travel Guide
57
57
  ```bash
@@ -61,7 +61,7 @@ python massgen/cli.py --config massgen/configs/three_agents_default.yaml "what's
61
61
  # From tests directory:
62
62
  python ../cli.py --config ../configs/three_agents_default.yaml "what's best to do in Stockholm in October 2025"
63
63
  ```
64
- **Original:** gemini-2.5-flash, gpt-4o (2 agents)
64
+ **Original:** gemini-2.5-flash, gpt-4o (2 agents)
65
65
  **Current:** gemini2.5flash, 4omini, grok3mini with web search for current info
66
66
 
67
67
  ## Configuration Details
@@ -70,7 +70,7 @@ The `three_agents_default.yaml` configuration provides:
70
70
 
71
71
  ### Agent Capabilities
72
72
  - **gemini2.5flash**: Gemini 2.5 Flash with web search
73
- - **4omini**: GPT-4o-mini with web search + code interpreter
73
+ - **4omini**: GPT-4o-mini with web search + code interpreter
74
74
  - **grok3mini**: Grok 3 mini with web search and citations
75
75
 
76
76
  ### UI Features
@@ -154,6 +154,112 @@ class TestCommandSanitization:
154
154
  _sanitize_command(cmd)
155
155
 
156
156
 
157
+ class TestSudoSanitization:
158
+ """Test sudo sanitization respects enable_sudo flag."""
159
+
160
+ def test_sudo_blocked_by_default(self):
161
+ """Test that sudo is blocked when enable_sudo=False (default)."""
162
+ from massgen.filesystem_manager._code_execution_server import _sanitize_command
163
+
164
+ sudo_commands = [
165
+ "sudo apt-get update",
166
+ "sudo apt-get install -y ffmpeg",
167
+ "sudo pip install tensorflow",
168
+ "sudo npm install -g typescript",
169
+ "sudo chmod 755 file.txt",
170
+ "echo 'test' && sudo apt update",
171
+ ]
172
+
173
+ for cmd in sudo_commands:
174
+ with pytest.raises(ValueError, match="sudo.*not allowed"):
175
+ _sanitize_command(cmd, enable_sudo=False)
176
+
177
+ def test_sudo_allowed_when_enabled(self):
178
+ """Test that sudo is allowed when enable_sudo=True."""
179
+ from massgen.filesystem_manager._code_execution_server import _sanitize_command
180
+
181
+ sudo_commands = [
182
+ "sudo apt-get update",
183
+ "sudo apt-get install -y ffmpeg",
184
+ "sudo pip install tensorflow",
185
+ "sudo npm install -g typescript",
186
+ "sudo chown user:group file.txt", # chown allowed with sudo enabled
187
+ "sudo chmod 755 file.txt", # chmod allowed with sudo enabled
188
+ ]
189
+
190
+ for cmd in sudo_commands:
191
+ # Should not raise when enable_sudo=True
192
+ _sanitize_command(cmd, enable_sudo=True)
193
+
194
+ def test_other_dangerous_patterns_still_blocked_with_sudo(self):
195
+ """Test that other dangerous patterns are still blocked even with sudo enabled."""
196
+ from massgen.filesystem_manager._code_execution_server import _sanitize_command
197
+
198
+ # These should ALWAYS be blocked, regardless of enable_sudo
199
+ dangerous_commands = [
200
+ "sudo rm -rf /", # Still blocked - root deletion
201
+ "rm -rf /", # Still blocked
202
+ "dd if=/dev/zero of=/dev/sda", # Still blocked - dd command
203
+ "sudo dd if=/dev/zero of=/dev/sda", # Still blocked
204
+ ":(){ :|:& };:", # Still blocked - fork bomb
205
+ "mv file /dev/null", # Still blocked
206
+ "sudo mv file /dev/null", # Still blocked
207
+ "echo test > /dev/sda1", # Still blocked - writing to disk
208
+ ]
209
+
210
+ for cmd in dangerous_commands:
211
+ with pytest.raises(ValueError, match="dangerous|not allowed"):
212
+ _sanitize_command(cmd, enable_sudo=True)
213
+
214
+ def test_su_chown_chmod_blocked_without_sudo_flag(self):
215
+ """Test that su, chown, chmod are blocked when enable_sudo=False."""
216
+ from massgen.filesystem_manager._code_execution_server import _sanitize_command
217
+
218
+ commands = [
219
+ "su root",
220
+ "su - postgres",
221
+ "chown root:root file.txt",
222
+ "chmod 777 file.txt",
223
+ "chmod +x script.sh",
224
+ ]
225
+
226
+ for cmd in commands:
227
+ with pytest.raises(ValueError, match="not allowed"):
228
+ _sanitize_command(cmd, enable_sudo=False)
229
+
230
+ def test_su_chown_chmod_allowed_with_sudo_flag(self):
231
+ """Test that su, chown, chmod are allowed when enable_sudo=True (Docker sudo mode)."""
232
+ from massgen.filesystem_manager._code_execution_server import _sanitize_command
233
+
234
+ # In Docker sudo mode, these are safe because they're confined to container
235
+ commands = [
236
+ "su postgres",
237
+ "chown user:group file.txt",
238
+ "chmod 755 file.txt",
239
+ "chmod +x script.sh",
240
+ ]
241
+
242
+ for cmd in commands:
243
+ # Should not raise when enable_sudo=True
244
+ _sanitize_command(cmd, enable_sudo=True)
245
+
246
+ def test_local_mode_blocks_sudo(self):
247
+ """Test that local mode (non-Docker) blocks sudo commands."""
248
+ from massgen.filesystem_manager._code_execution_server import _sanitize_command
249
+
250
+ # In local mode (enable_sudo=False), sudo should be blocked for safety
251
+ with pytest.raises(ValueError, match="sudo.*not allowed"):
252
+ _sanitize_command("sudo apt-get install malicious-package", enable_sudo=False)
253
+
254
+ def test_docker_sudo_mode_allows_sudo(self):
255
+ """Test that Docker sudo mode allows sudo commands."""
256
+ from massgen.filesystem_manager._code_execution_server import _sanitize_command
257
+
258
+ # In Docker mode with enable_sudo=True, sudo should be allowed
259
+ # (safe because it's inside container)
260
+ _sanitize_command("sudo apt-get install gh", enable_sudo=True)
261
+
262
+
157
263
  class TestOutputHandling:
158
264
  """Test output capture and size limits."""
159
265
 
@@ -674,6 +780,78 @@ class TestDockerExecution:
674
780
  # Cleanup
675
781
  manager.cleanup("test_context")
676
782
 
783
+ @pytest.mark.docker
784
+ def test_docker_sudo_enabled_image_selection(self):
785
+ """Test that enabling sudo automatically selects the sudo image variant."""
786
+ from massgen.filesystem_manager._docker_manager import DockerManager
787
+
788
+ # Test 1: Default image with sudo=False should use regular image
789
+ manager_no_sudo = DockerManager(enable_sudo=False)
790
+ assert manager_no_sudo.image == "massgen/mcp-runtime:latest"
791
+ assert manager_no_sudo.enable_sudo is False
792
+
793
+ # Test 2: Default image with sudo=True should auto-switch to sudo variant
794
+ manager_with_sudo = DockerManager(enable_sudo=True)
795
+ assert manager_with_sudo.image == "massgen/mcp-runtime-sudo:latest"
796
+ assert manager_with_sudo.enable_sudo is True
797
+
798
+ # Test 3: Custom image with sudo=True should keep custom image
799
+ manager_custom = DockerManager(
800
+ image="my-custom-image:latest",
801
+ enable_sudo=True,
802
+ )
803
+ assert manager_custom.image == "my-custom-image:latest"
804
+ assert manager_custom.enable_sudo is True
805
+
806
+ @pytest.mark.docker
807
+ def test_docker_sudo_functionality(self, tmp_path):
808
+ """Test that sudo commands work in sudo-enabled container."""
809
+ from massgen.filesystem_manager._docker_manager import DockerManager
810
+
811
+ # Skip if sudo image not built
812
+ manager = DockerManager(enable_sudo=True)
813
+ try:
814
+ manager.ensure_image_exists()
815
+ except RuntimeError:
816
+ pytest.skip("Sudo Docker image not built. Run: bash massgen/docker/build.sh --sudo")
817
+
818
+ workspace = tmp_path / "workspace_sudo"
819
+ workspace.mkdir()
820
+
821
+ # Create container with sudo enabled
822
+ manager.create_container(
823
+ agent_id="test_sudo",
824
+ workspace_path=workspace,
825
+ )
826
+
827
+ # Test 1: Verify whoami returns 'massgen' (non-root user)
828
+ result_whoami = manager.exec_command(
829
+ agent_id="test_sudo",
830
+ command="whoami",
831
+ )
832
+ assert result_whoami["success"] is True
833
+ assert "massgen" in result_whoami["stdout"]
834
+
835
+ # Test 2: Verify sudo whoami returns 'root' (sudo works)
836
+ result_sudo_whoami = manager.exec_command(
837
+ agent_id="test_sudo",
838
+ command="sudo whoami",
839
+ )
840
+ assert result_sudo_whoami["success"] is True
841
+ assert "root" in result_sudo_whoami["stdout"]
842
+
843
+ # Test 3: Verify sudo apt-get update works (package installation capability)
844
+ result_apt = manager.exec_command(
845
+ agent_id="test_sudo",
846
+ command="sudo apt-get update",
847
+ timeout=60,
848
+ )
849
+ # This should succeed in sudo image (may fail in network=none, but command should run)
850
+ assert result_apt["exit_code"] is not None
851
+
852
+ # Cleanup
853
+ manager.cleanup("test_sudo")
854
+
677
855
 
678
856
  if __name__ == "__main__":
679
857
  pytest.main([__file__, "-v"])