massgen 0.0.3__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of massgen might be problematic. Click here for more details.

Files changed (268) hide show
  1. massgen/__init__.py +142 -8
  2. massgen/adapters/__init__.py +29 -0
  3. massgen/adapters/ag2_adapter.py +483 -0
  4. massgen/adapters/base.py +183 -0
  5. massgen/adapters/tests/__init__.py +0 -0
  6. massgen/adapters/tests/test_ag2_adapter.py +439 -0
  7. massgen/adapters/tests/test_agent_adapter.py +128 -0
  8. massgen/adapters/utils/__init__.py +2 -0
  9. massgen/adapters/utils/ag2_utils.py +236 -0
  10. massgen/adapters/utils/tests/__init__.py +0 -0
  11. massgen/adapters/utils/tests/test_ag2_utils.py +138 -0
  12. massgen/agent_config.py +329 -55
  13. massgen/api_params_handler/__init__.py +10 -0
  14. massgen/api_params_handler/_api_params_handler_base.py +99 -0
  15. massgen/api_params_handler/_chat_completions_api_params_handler.py +176 -0
  16. massgen/api_params_handler/_claude_api_params_handler.py +113 -0
  17. massgen/api_params_handler/_response_api_params_handler.py +130 -0
  18. massgen/backend/__init__.py +39 -4
  19. massgen/backend/azure_openai.py +385 -0
  20. massgen/backend/base.py +341 -69
  21. massgen/backend/base_with_mcp.py +1102 -0
  22. massgen/backend/capabilities.py +386 -0
  23. massgen/backend/chat_completions.py +577 -130
  24. massgen/backend/claude.py +1033 -537
  25. massgen/backend/claude_code.py +1203 -0
  26. massgen/backend/cli_base.py +209 -0
  27. massgen/backend/docs/BACKEND_ARCHITECTURE.md +126 -0
  28. massgen/backend/{CLAUDE_API_RESEARCH.md → docs/CLAUDE_API_RESEARCH.md} +18 -18
  29. massgen/backend/{GEMINI_API_DOCUMENTATION.md → docs/GEMINI_API_DOCUMENTATION.md} +9 -9
  30. massgen/backend/docs/Gemini MCP Integration Analysis.md +1050 -0
  31. massgen/backend/docs/MCP_IMPLEMENTATION_CLAUDE_BACKEND.md +177 -0
  32. massgen/backend/docs/MCP_INTEGRATION_RESPONSE_BACKEND.md +352 -0
  33. massgen/backend/docs/OPENAI_GPT5_MODELS.md +211 -0
  34. massgen/backend/{OPENAI_RESPONSES_API_FORMAT.md → docs/OPENAI_RESPONSE_API_TOOL_CALLS.md} +3 -3
  35. massgen/backend/docs/OPENAI_response_streaming.md +20654 -0
  36. massgen/backend/docs/inference_backend.md +257 -0
  37. massgen/backend/docs/permissions_and_context_files.md +1085 -0
  38. massgen/backend/external.py +126 -0
  39. massgen/backend/gemini.py +1850 -241
  40. massgen/backend/grok.py +40 -156
  41. massgen/backend/inference.py +156 -0
  42. massgen/backend/lmstudio.py +171 -0
  43. massgen/backend/response.py +1095 -322
  44. massgen/chat_agent.py +131 -113
  45. massgen/cli.py +1560 -275
  46. massgen/config_builder.py +2396 -0
  47. massgen/configs/BACKEND_CONFIGURATION.md +458 -0
  48. massgen/configs/README.md +559 -216
  49. massgen/configs/ag2/ag2_case_study.yaml +27 -0
  50. massgen/configs/ag2/ag2_coder.yaml +34 -0
  51. massgen/configs/ag2/ag2_coder_case_study.yaml +36 -0
  52. massgen/configs/ag2/ag2_gemini.yaml +27 -0
  53. massgen/configs/ag2/ag2_groupchat.yaml +108 -0
  54. massgen/configs/ag2/ag2_groupchat_gpt.yaml +118 -0
  55. massgen/configs/ag2/ag2_single_agent.yaml +21 -0
  56. massgen/configs/basic/multi/fast_timeout_example.yaml +37 -0
  57. massgen/configs/basic/multi/gemini_4o_claude.yaml +31 -0
  58. massgen/configs/basic/multi/gemini_gpt5nano_claude.yaml +36 -0
  59. massgen/configs/{gemini_4o_claude.yaml → basic/multi/geminicode_4o_claude.yaml} +3 -3
  60. massgen/configs/basic/multi/geminicode_gpt5nano_claude.yaml +36 -0
  61. massgen/configs/basic/multi/glm_gemini_claude.yaml +25 -0
  62. massgen/configs/basic/multi/gpt4o_audio_generation.yaml +30 -0
  63. massgen/configs/basic/multi/gpt4o_image_generation.yaml +31 -0
  64. massgen/configs/basic/multi/gpt5nano_glm_qwen.yaml +26 -0
  65. massgen/configs/basic/multi/gpt5nano_image_understanding.yaml +26 -0
  66. massgen/configs/{three_agents_default.yaml → basic/multi/three_agents_default.yaml} +8 -4
  67. massgen/configs/basic/multi/three_agents_opensource.yaml +27 -0
  68. massgen/configs/basic/multi/three_agents_vllm.yaml +20 -0
  69. massgen/configs/basic/multi/two_agents_gemini.yaml +19 -0
  70. massgen/configs/{two_agents.yaml → basic/multi/two_agents_gpt5.yaml} +14 -6
  71. massgen/configs/basic/multi/two_agents_opensource_lmstudio.yaml +31 -0
  72. massgen/configs/basic/multi/two_qwen_vllm_sglang.yaml +28 -0
  73. massgen/configs/{single_agent.yaml → basic/single/single_agent.yaml} +1 -1
  74. massgen/configs/{single_flash2.5.yaml → basic/single/single_flash2.5.yaml} +1 -2
  75. massgen/configs/basic/single/single_gemini2.5pro.yaml +16 -0
  76. massgen/configs/basic/single/single_gpt4o_audio_generation.yaml +22 -0
  77. massgen/configs/basic/single/single_gpt4o_image_generation.yaml +22 -0
  78. massgen/configs/basic/single/single_gpt4o_video_generation.yaml +24 -0
  79. massgen/configs/basic/single/single_gpt5nano.yaml +20 -0
  80. massgen/configs/basic/single/single_gpt5nano_file_search.yaml +18 -0
  81. massgen/configs/basic/single/single_gpt5nano_image_understanding.yaml +17 -0
  82. massgen/configs/basic/single/single_gptoss120b.yaml +15 -0
  83. massgen/configs/basic/single/single_openrouter_audio_understanding.yaml +15 -0
  84. massgen/configs/basic/single/single_qwen_video_understanding.yaml +15 -0
  85. massgen/configs/debug/code_execution/command_filtering_blacklist.yaml +29 -0
  86. massgen/configs/debug/code_execution/command_filtering_whitelist.yaml +28 -0
  87. massgen/configs/debug/code_execution/docker_verification.yaml +29 -0
  88. massgen/configs/debug/skip_coordination_test.yaml +27 -0
  89. massgen/configs/debug/test_sdk_migration.yaml +17 -0
  90. massgen/configs/docs/DISCORD_MCP_SETUP.md +208 -0
  91. massgen/configs/docs/TWITTER_MCP_ENESCINAR_SETUP.md +82 -0
  92. massgen/configs/providers/azure/azure_openai_multi.yaml +21 -0
  93. massgen/configs/providers/azure/azure_openai_single.yaml +19 -0
  94. massgen/configs/providers/claude/claude.yaml +14 -0
  95. massgen/configs/providers/gemini/gemini_gpt5nano.yaml +28 -0
  96. massgen/configs/providers/local/lmstudio.yaml +11 -0
  97. massgen/configs/providers/openai/gpt5.yaml +46 -0
  98. massgen/configs/providers/openai/gpt5_nano.yaml +46 -0
  99. massgen/configs/providers/others/grok_single_agent.yaml +19 -0
  100. massgen/configs/providers/others/zai_coding_team.yaml +108 -0
  101. massgen/configs/providers/others/zai_glm45.yaml +12 -0
  102. massgen/configs/{creative_team.yaml → teams/creative/creative_team.yaml} +16 -6
  103. massgen/configs/{travel_planning.yaml → teams/creative/travel_planning.yaml} +16 -6
  104. massgen/configs/{news_analysis.yaml → teams/research/news_analysis.yaml} +16 -6
  105. massgen/configs/{research_team.yaml → teams/research/research_team.yaml} +15 -7
  106. massgen/configs/{technical_analysis.yaml → teams/research/technical_analysis.yaml} +16 -6
  107. massgen/configs/tools/code-execution/basic_command_execution.yaml +25 -0
  108. massgen/configs/tools/code-execution/code_execution_use_case_simple.yaml +41 -0
  109. massgen/configs/tools/code-execution/docker_claude_code.yaml +32 -0
  110. massgen/configs/tools/code-execution/docker_multi_agent.yaml +32 -0
  111. massgen/configs/tools/code-execution/docker_simple.yaml +29 -0
  112. massgen/configs/tools/code-execution/docker_with_resource_limits.yaml +32 -0
  113. massgen/configs/tools/code-execution/multi_agent_playwright_automation.yaml +57 -0
  114. massgen/configs/tools/filesystem/cc_gpt5_gemini_filesystem.yaml +34 -0
  115. massgen/configs/tools/filesystem/claude_code_context_sharing.yaml +68 -0
  116. massgen/configs/tools/filesystem/claude_code_flash2.5.yaml +43 -0
  117. massgen/configs/tools/filesystem/claude_code_flash2.5_gptoss.yaml +49 -0
  118. massgen/configs/tools/filesystem/claude_code_gpt5nano.yaml +31 -0
  119. massgen/configs/tools/filesystem/claude_code_single.yaml +40 -0
  120. massgen/configs/tools/filesystem/fs_permissions_test.yaml +87 -0
  121. massgen/configs/tools/filesystem/gemini_gemini_workspace_cleanup.yaml +54 -0
  122. massgen/configs/tools/filesystem/gemini_gpt5_filesystem_casestudy.yaml +30 -0
  123. massgen/configs/tools/filesystem/gemini_gpt5nano_file_context_path.yaml +43 -0
  124. massgen/configs/tools/filesystem/gemini_gpt5nano_protected_paths.yaml +45 -0
  125. massgen/configs/tools/filesystem/gpt5mini_cc_fs_context_path.yaml +31 -0
  126. massgen/configs/tools/filesystem/grok4_gpt5_gemini_filesystem.yaml +32 -0
  127. massgen/configs/tools/filesystem/multiturn/grok4_gpt5_claude_code_filesystem_multiturn.yaml +58 -0
  128. massgen/configs/tools/filesystem/multiturn/grok4_gpt5_gemini_filesystem_multiturn.yaml +58 -0
  129. massgen/configs/tools/filesystem/multiturn/two_claude_code_filesystem_multiturn.yaml +47 -0
  130. massgen/configs/tools/filesystem/multiturn/two_gemini_flash_filesystem_multiturn.yaml +48 -0
  131. massgen/configs/tools/mcp/claude_code_discord_mcp_example.yaml +27 -0
  132. massgen/configs/tools/mcp/claude_code_simple_mcp.yaml +35 -0
  133. massgen/configs/tools/mcp/claude_code_twitter_mcp_example.yaml +32 -0
  134. massgen/configs/tools/mcp/claude_mcp_example.yaml +24 -0
  135. massgen/configs/tools/mcp/claude_mcp_test.yaml +27 -0
  136. massgen/configs/tools/mcp/five_agents_travel_mcp_test.yaml +157 -0
  137. massgen/configs/tools/mcp/five_agents_weather_mcp_test.yaml +103 -0
  138. massgen/configs/tools/mcp/gemini_mcp_example.yaml +24 -0
  139. massgen/configs/tools/mcp/gemini_mcp_filesystem_test.yaml +23 -0
  140. massgen/configs/tools/mcp/gemini_mcp_filesystem_test_sharing.yaml +23 -0
  141. massgen/configs/tools/mcp/gemini_mcp_filesystem_test_single_agent.yaml +17 -0
  142. massgen/configs/tools/mcp/gemini_mcp_filesystem_test_with_claude_code.yaml +24 -0
  143. massgen/configs/tools/mcp/gemini_mcp_test.yaml +27 -0
  144. massgen/configs/tools/mcp/gemini_notion_mcp.yaml +52 -0
  145. massgen/configs/tools/mcp/gpt5_nano_mcp_example.yaml +24 -0
  146. massgen/configs/tools/mcp/gpt5_nano_mcp_test.yaml +27 -0
  147. massgen/configs/tools/mcp/gpt5mini_claude_code_discord_mcp_example.yaml +38 -0
  148. massgen/configs/tools/mcp/gpt_oss_mcp_example.yaml +25 -0
  149. massgen/configs/tools/mcp/gpt_oss_mcp_test.yaml +28 -0
  150. massgen/configs/tools/mcp/grok3_mini_mcp_example.yaml +24 -0
  151. massgen/configs/tools/mcp/grok3_mini_mcp_test.yaml +27 -0
  152. massgen/configs/tools/mcp/multimcp_gemini.yaml +111 -0
  153. massgen/configs/tools/mcp/qwen_api_mcp_example.yaml +25 -0
  154. massgen/configs/tools/mcp/qwen_api_mcp_test.yaml +28 -0
  155. massgen/configs/tools/mcp/qwen_local_mcp_example.yaml +24 -0
  156. massgen/configs/tools/mcp/qwen_local_mcp_test.yaml +27 -0
  157. massgen/configs/tools/planning/five_agents_discord_mcp_planning_mode.yaml +140 -0
  158. massgen/configs/tools/planning/five_agents_filesystem_mcp_planning_mode.yaml +151 -0
  159. massgen/configs/tools/planning/five_agents_notion_mcp_planning_mode.yaml +151 -0
  160. massgen/configs/tools/planning/five_agents_twitter_mcp_planning_mode.yaml +155 -0
  161. massgen/configs/tools/planning/gpt5_mini_case_study_mcp_planning_mode.yaml +73 -0
  162. massgen/configs/tools/web-search/claude_streamable_http_test.yaml +43 -0
  163. massgen/configs/tools/web-search/gemini_streamable_http_test.yaml +43 -0
  164. massgen/configs/tools/web-search/gpt5_mini_streamable_http_test.yaml +43 -0
  165. massgen/configs/tools/web-search/gpt_oss_streamable_http_test.yaml +44 -0
  166. massgen/configs/tools/web-search/grok3_mini_streamable_http_test.yaml +43 -0
  167. massgen/configs/tools/web-search/qwen_api_streamable_http_test.yaml +44 -0
  168. massgen/configs/tools/web-search/qwen_local_streamable_http_test.yaml +43 -0
  169. massgen/coordination_tracker.py +708 -0
  170. massgen/docker/README.md +462 -0
  171. massgen/filesystem_manager/__init__.py +21 -0
  172. massgen/filesystem_manager/_base.py +9 -0
  173. massgen/filesystem_manager/_code_execution_server.py +545 -0
  174. massgen/filesystem_manager/_docker_manager.py +477 -0
  175. massgen/filesystem_manager/_file_operation_tracker.py +248 -0
  176. massgen/filesystem_manager/_filesystem_manager.py +813 -0
  177. massgen/filesystem_manager/_path_permission_manager.py +1261 -0
  178. massgen/filesystem_manager/_workspace_tools_server.py +1815 -0
  179. massgen/formatter/__init__.py +10 -0
  180. massgen/formatter/_chat_completions_formatter.py +284 -0
  181. massgen/formatter/_claude_formatter.py +235 -0
  182. massgen/formatter/_formatter_base.py +156 -0
  183. massgen/formatter/_response_formatter.py +263 -0
  184. massgen/frontend/__init__.py +1 -2
  185. massgen/frontend/coordination_ui.py +471 -286
  186. massgen/frontend/displays/base_display.py +56 -11
  187. massgen/frontend/displays/create_coordination_table.py +1956 -0
  188. massgen/frontend/displays/rich_terminal_display.py +1259 -619
  189. massgen/frontend/displays/simple_display.py +9 -4
  190. massgen/frontend/displays/terminal_display.py +27 -68
  191. massgen/logger_config.py +681 -0
  192. massgen/mcp_tools/README.md +232 -0
  193. massgen/mcp_tools/__init__.py +105 -0
  194. massgen/mcp_tools/backend_utils.py +1035 -0
  195. massgen/mcp_tools/circuit_breaker.py +195 -0
  196. massgen/mcp_tools/client.py +894 -0
  197. massgen/mcp_tools/config_validator.py +138 -0
  198. massgen/mcp_tools/docs/circuit_breaker.md +646 -0
  199. massgen/mcp_tools/docs/client.md +950 -0
  200. massgen/mcp_tools/docs/config_validator.md +478 -0
  201. massgen/mcp_tools/docs/exceptions.md +1165 -0
  202. massgen/mcp_tools/docs/security.md +854 -0
  203. massgen/mcp_tools/exceptions.py +338 -0
  204. massgen/mcp_tools/hooks.py +212 -0
  205. massgen/mcp_tools/security.py +780 -0
  206. massgen/message_templates.py +342 -64
  207. massgen/orchestrator.py +1515 -241
  208. massgen/stream_chunk/__init__.py +35 -0
  209. massgen/stream_chunk/base.py +92 -0
  210. massgen/stream_chunk/multimodal.py +237 -0
  211. massgen/stream_chunk/text.py +162 -0
  212. massgen/tests/mcp_test_server.py +150 -0
  213. massgen/tests/multi_turn_conversation_design.md +0 -8
  214. massgen/tests/test_azure_openai_backend.py +156 -0
  215. massgen/tests/test_backend_capabilities.py +262 -0
  216. massgen/tests/test_backend_event_loop_all.py +179 -0
  217. massgen/tests/test_chat_completions_refactor.py +142 -0
  218. massgen/tests/test_claude_backend.py +15 -28
  219. massgen/tests/test_claude_code.py +268 -0
  220. massgen/tests/test_claude_code_context_sharing.py +233 -0
  221. massgen/tests/test_claude_code_orchestrator.py +175 -0
  222. massgen/tests/test_cli_backends.py +180 -0
  223. massgen/tests/test_code_execution.py +679 -0
  224. massgen/tests/test_external_agent_backend.py +134 -0
  225. massgen/tests/test_final_presentation_fallback.py +237 -0
  226. massgen/tests/test_gemini_planning_mode.py +351 -0
  227. massgen/tests/test_grok_backend.py +7 -10
  228. massgen/tests/test_http_mcp_server.py +42 -0
  229. massgen/tests/test_integration_simple.py +198 -0
  230. massgen/tests/test_mcp_blocking.py +125 -0
  231. massgen/tests/test_message_context_building.py +29 -47
  232. massgen/tests/test_orchestrator_final_presentation.py +48 -0
  233. massgen/tests/test_path_permission_manager.py +2087 -0
  234. massgen/tests/test_rich_terminal_display.py +14 -13
  235. massgen/tests/test_timeout.py +133 -0
  236. massgen/tests/test_v3_3agents.py +11 -12
  237. massgen/tests/test_v3_simple.py +8 -13
  238. massgen/tests/test_v3_three_agents.py +11 -18
  239. massgen/tests/test_v3_two_agents.py +8 -13
  240. massgen/token_manager/__init__.py +7 -0
  241. massgen/token_manager/token_manager.py +400 -0
  242. massgen/utils.py +52 -16
  243. massgen/v1/agent.py +45 -91
  244. massgen/v1/agents.py +18 -53
  245. massgen/v1/backends/gemini.py +50 -153
  246. massgen/v1/backends/grok.py +21 -54
  247. massgen/v1/backends/oai.py +39 -111
  248. massgen/v1/cli.py +36 -93
  249. massgen/v1/config.py +8 -12
  250. massgen/v1/logging.py +43 -127
  251. massgen/v1/main.py +18 -32
  252. massgen/v1/orchestrator.py +68 -209
  253. massgen/v1/streaming_display.py +62 -163
  254. massgen/v1/tools.py +8 -12
  255. massgen/v1/types.py +9 -23
  256. massgen/v1/utils.py +5 -23
  257. massgen-0.1.0.dist-info/METADATA +1245 -0
  258. massgen-0.1.0.dist-info/RECORD +273 -0
  259. massgen-0.1.0.dist-info/entry_points.txt +2 -0
  260. massgen/frontend/logging/__init__.py +0 -9
  261. massgen/frontend/logging/realtime_logger.py +0 -197
  262. massgen-0.0.3.dist-info/METADATA +0 -568
  263. massgen-0.0.3.dist-info/RECORD +0 -76
  264. massgen-0.0.3.dist-info/entry_points.txt +0 -2
  265. /massgen/backend/{Function calling openai responses.md → docs/Function calling openai responses.md} +0 -0
  266. {massgen-0.0.3.dist-info → massgen-0.1.0.dist-info}/WHEEL +0 -0
  267. {massgen-0.0.3.dist-info → massgen-0.1.0.dist-info}/licenses/LICENSE +0 -0
  268. {massgen-0.0.3.dist-info → massgen-0.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,679 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Unit tests for code execution MCP server.
4
+ """
5
+ import subprocess
6
+ import sys
7
+
8
+ import pytest
9
+
10
+
11
+ # Test utilities
12
+ def run_command_directly(command: str, cwd: str = None, timeout: int = 10) -> tuple:
13
+ """Helper to run commands directly for testing."""
14
+ result = subprocess.run(
15
+ command,
16
+ shell=True,
17
+ cwd=cwd,
18
+ timeout=timeout,
19
+ capture_output=True,
20
+ text=True,
21
+ )
22
+ return result.returncode, result.stdout, result.stderr
23
+
24
+
25
+ class TestCodeExecutionBasics:
26
+ """Test basic command execution functionality."""
27
+
28
+ def test_simple_python_command(self, tmp_path):
29
+ """Test executing a simple Python command."""
30
+ exit_code, stdout, stderr = run_command_directly(
31
+ f'{sys.executable} -c "print(\\"Hello, World!\\")"',
32
+ cwd=str(tmp_path),
33
+ )
34
+ assert exit_code == 0
35
+ assert "Hello, World!" in stdout
36
+
37
+ def test_python_script_execution(self, tmp_path):
38
+ """Test executing a Python script."""
39
+ # Create a test script
40
+ script_path = tmp_path / "test_script.py"
41
+ script_path.write_text("print('Script executed')\nprint('Success')")
42
+
43
+ exit_code, stdout, stderr = run_command_directly(
44
+ f"{sys.executable} test_script.py",
45
+ cwd=str(tmp_path),
46
+ )
47
+ assert exit_code == 0
48
+ assert "Script executed" in stdout
49
+ assert "Success" in stdout
50
+
51
+ def test_command_with_error(self, tmp_path):
52
+ """Test that command errors are captured."""
53
+ exit_code, stdout, stderr = run_command_directly(
54
+ f'{sys.executable} -c "import sys; sys.exit(1)"',
55
+ cwd=str(tmp_path),
56
+ )
57
+ assert exit_code == 1
58
+
59
+ def test_command_timeout(self, tmp_path):
60
+ """Test that commands can timeout."""
61
+ with pytest.raises(subprocess.TimeoutExpired):
62
+ run_command_directly(
63
+ f'{sys.executable} -c "import time; time.sleep(10)"',
64
+ cwd=str(tmp_path),
65
+ timeout=1,
66
+ )
67
+
68
+ def test_working_directory(self, tmp_path):
69
+ """Test that working directory is respected."""
70
+ # Create a file in tmp_path
71
+ test_file = tmp_path / "test.txt"
72
+ test_file.write_text("test content")
73
+
74
+ # List files using command
75
+ exit_code, stdout, stderr = run_command_directly(
76
+ f'{sys.executable} -c "import os; print(os.listdir())"',
77
+ cwd=str(tmp_path),
78
+ )
79
+ assert exit_code == 0
80
+ assert "test.txt" in stdout
81
+
82
+
83
+ class TestPathValidation:
84
+ """Test path validation and security."""
85
+
86
+ def test_path_exists_validation(self, tmp_path):
87
+ """Test that non-existent paths are rejected."""
88
+ non_existent = tmp_path / "does_not_exist"
89
+ # subprocess.run should raise FileNotFoundError for non-existent cwd
90
+ with pytest.raises(FileNotFoundError):
91
+ run_command_directly(
92
+ 'echo "test"',
93
+ cwd=str(non_existent),
94
+ )
95
+
96
+ def test_relative_path_resolution(self, tmp_path):
97
+ """Test that relative paths are resolved correctly."""
98
+ # Create subdirectory
99
+ subdir = tmp_path / "subdir"
100
+ subdir.mkdir()
101
+
102
+ # Create file in subdir
103
+ test_file = subdir / "test.txt"
104
+ test_file.write_text("content")
105
+
106
+ # Try to read file from parent using relative path
107
+ exit_code, stdout, stderr = run_command_directly(
108
+ f"{sys.executable} -c \"import os; print(os.path.exists('subdir/test.txt'))\"",
109
+ cwd=str(tmp_path),
110
+ )
111
+ assert exit_code == 0
112
+ assert "True" in stdout
113
+
114
+
115
+ class TestCommandSanitization:
116
+ """Test command sanitization patterns."""
117
+
118
+ def test_dangerous_command_patterns(self):
119
+ """Test that dangerous patterns are identified."""
120
+ from massgen.filesystem_manager._code_execution_server import _sanitize_command
121
+
122
+ dangerous_commands = [
123
+ "rm -rf /",
124
+ "dd if=/dev/zero of=/dev/sda",
125
+ ":(){ :|:& };:", # Fork bomb
126
+ "mv file /dev/null",
127
+ "sudo apt install something",
128
+ "su root",
129
+ "chown root file.txt",
130
+ "chmod 777 file.txt",
131
+ ]
132
+
133
+ for cmd in dangerous_commands:
134
+ with pytest.raises(ValueError, match="dangerous|not allowed"):
135
+ _sanitize_command(cmd)
136
+
137
+ def test_safe_commands_pass(self):
138
+ """Test that safe commands pass sanitization."""
139
+ from massgen.filesystem_manager._code_execution_server import _sanitize_command
140
+
141
+ safe_commands = [
142
+ "python script.py",
143
+ "pytest tests/",
144
+ "npm run build",
145
+ "ls -la",
146
+ "rm file.txt", # Specific file, not rm -rf /
147
+ "git submodule update", # Contains "su" but not "su " command
148
+ "echo 'summary'", # Contains "su" substring
149
+ "python -m pip install --user requests", # Contains "user" not "su"
150
+ ]
151
+
152
+ for cmd in safe_commands:
153
+ # Should not raise
154
+ _sanitize_command(cmd)
155
+
156
+
157
+ class TestOutputHandling:
158
+ """Test output capture and size limits."""
159
+
160
+ def test_stdout_capture(self, tmp_path):
161
+ """Test that stdout is captured correctly."""
162
+ exit_code, stdout, stderr = run_command_directly(
163
+ f'{sys.executable} -c "print(\\"line1\\"); print(\\"line2\\")"',
164
+ cwd=str(tmp_path),
165
+ )
166
+ assert exit_code == 0
167
+ assert "line1" in stdout
168
+ assert "line2" in stdout
169
+
170
+ def test_stderr_capture(self, tmp_path):
171
+ """Test that stderr is captured correctly."""
172
+ exit_code, stdout, stderr = run_command_directly(
173
+ f'{sys.executable} -c "import sys; sys.stderr.write(\\"error message\\\\n\\")"',
174
+ cwd=str(tmp_path),
175
+ )
176
+ assert "error message" in stderr
177
+
178
+ def test_large_output_handling(self, tmp_path):
179
+ """Test handling of large output."""
180
+ # Generate large output (1000 lines)
181
+ exit_code, stdout, stderr = run_command_directly(
182
+ f'{sys.executable} -c "for i in range(1000): print(i)"',
183
+ cwd=str(tmp_path),
184
+ )
185
+ assert exit_code == 0
186
+ assert len(stdout) > 0 # Output was captured
187
+
188
+
189
+ class TestCrossPlatform:
190
+ """Test cross-platform compatibility."""
191
+
192
+ def test_python_version_check(self, tmp_path):
193
+ """Test that Python version can be checked."""
194
+ exit_code, stdout, stderr = run_command_directly(
195
+ f"{sys.executable} --version",
196
+ cwd=str(tmp_path),
197
+ )
198
+ assert exit_code == 0
199
+ assert "Python" in stdout or "Python" in stderr # Version might be in stderr
200
+
201
+ def test_pip_install(self, tmp_path):
202
+ """Test that pip commands work."""
203
+ # Just check pip version, don't actually install anything
204
+ exit_code, stdout, stderr = run_command_directly(
205
+ f"{sys.executable} -m pip --version",
206
+ cwd=str(tmp_path),
207
+ )
208
+ assert exit_code == 0
209
+ assert "pip" in stdout or "pip" in stderr
210
+
211
+
212
+ class TestAutoGeneratedFiles:
213
+ """Test handling of auto-generated files."""
214
+
215
+ def test_pycache_deletion_allowed(self, tmp_path):
216
+ """Test that __pycache__ files can be deleted without reading."""
217
+ from massgen.filesystem_manager._file_operation_tracker import (
218
+ FileOperationTracker,
219
+ )
220
+
221
+ tracker = FileOperationTracker(enforce_read_before_delete=True)
222
+
223
+ # Create a fake __pycache__ file
224
+ pycache_dir = tmp_path / "__pycache__"
225
+ pycache_dir.mkdir()
226
+ pyc_file = pycache_dir / "test.cpython-313.pyc"
227
+ pyc_file.write_text("fake bytecode")
228
+
229
+ # Should be deletable without reading
230
+ can_delete, reason = tracker.can_delete(pyc_file)
231
+ assert can_delete
232
+ assert reason is None
233
+
234
+ def test_pyc_file_deletion_allowed(self, tmp_path):
235
+ """Test that .pyc files can be deleted without reading."""
236
+ from massgen.filesystem_manager._file_operation_tracker import (
237
+ FileOperationTracker,
238
+ )
239
+
240
+ tracker = FileOperationTracker(enforce_read_before_delete=True)
241
+
242
+ # Create a fake .pyc file
243
+ pyc_file = tmp_path / "module.pyc"
244
+ pyc_file.write_text("fake bytecode")
245
+
246
+ # Should be deletable without reading
247
+ can_delete, reason = tracker.can_delete(pyc_file)
248
+ assert can_delete
249
+ assert reason is None
250
+
251
+ def test_pytest_cache_deletion_allowed(self, tmp_path):
252
+ """Test that .pytest_cache can be deleted without reading."""
253
+ from massgen.filesystem_manager._file_operation_tracker import (
254
+ FileOperationTracker,
255
+ )
256
+
257
+ tracker = FileOperationTracker(enforce_read_before_delete=True)
258
+
259
+ # Create a fake .pytest_cache directory
260
+ cache_dir = tmp_path / ".pytest_cache"
261
+ cache_dir.mkdir()
262
+ cache_file = cache_dir / "v" / "cache" / "nodeids"
263
+ cache_file.parent.mkdir(parents=True)
264
+ cache_file.write_text("test data")
265
+
266
+ # Should be deletable without reading
267
+ can_delete, reason = tracker.can_delete(cache_file)
268
+ assert can_delete
269
+ assert reason is None
270
+
271
+ def test_regular_file_requires_read(self, tmp_path):
272
+ """Test that regular files still require reading before deletion."""
273
+ from massgen.filesystem_manager._file_operation_tracker import (
274
+ FileOperationTracker,
275
+ )
276
+
277
+ tracker = FileOperationTracker(enforce_read_before_delete=True)
278
+
279
+ # Create a regular Python file
280
+ py_file = tmp_path / "module.py"
281
+ py_file.write_text("print('hello')")
282
+
283
+ # Should NOT be deletable without reading
284
+ can_delete, reason = tracker.can_delete(py_file)
285
+ assert not can_delete
286
+ assert reason is not None
287
+ assert "must be read before deletion" in reason
288
+
289
+ def test_directory_with_pycache_allowed(self, tmp_path):
290
+ """Test that directories containing only __pycache__ can be deleted."""
291
+ from massgen.filesystem_manager._file_operation_tracker import (
292
+ FileOperationTracker,
293
+ )
294
+
295
+ tracker = FileOperationTracker(enforce_read_before_delete=True)
296
+
297
+ # Create directory with __pycache__ only
298
+ test_dir = tmp_path / "mymodule"
299
+ test_dir.mkdir()
300
+ pycache_dir = test_dir / "__pycache__"
301
+ pycache_dir.mkdir()
302
+ pyc_file = pycache_dir / "test.pyc"
303
+ pyc_file.write_text("fake bytecode")
304
+
305
+ # Should be deletable (only contains auto-generated files)
306
+ can_delete, reason = tracker.can_delete_directory(test_dir)
307
+ assert can_delete
308
+ assert reason is None
309
+
310
+
311
+ class TestVirtualEnvironment:
312
+ """Test virtual environment handling."""
313
+
314
+ def test_auto_detect_venv(self, tmp_path):
315
+ """Test auto-detection of .venv directory."""
316
+ from massgen.filesystem_manager._code_execution_server import (
317
+ _prepare_environment,
318
+ )
319
+
320
+ # Create fake .venv structure
321
+ venv_dir = tmp_path / ".venv"
322
+ venv_bin = venv_dir / "bin"
323
+ venv_bin.mkdir(parents=True, exist_ok=True)
324
+
325
+ # Test auto-detection
326
+ env = _prepare_environment(tmp_path)
327
+
328
+ assert "PATH" in env
329
+ assert str(venv_bin) in env["PATH"]
330
+ assert "VIRTUAL_ENV" in env
331
+ assert str(venv_dir) in env["VIRTUAL_ENV"]
332
+
333
+ def test_no_venv_fallback(self, tmp_path):
334
+ """Test fallback to system environment when no venv."""
335
+ import os
336
+
337
+ from massgen.filesystem_manager._code_execution_server import (
338
+ _prepare_environment,
339
+ )
340
+
341
+ # No .venv directory
342
+ env = _prepare_environment(tmp_path)
343
+
344
+ # Should just be copy of system environment
345
+ assert env["PATH"] == os.environ["PATH"]
346
+
347
+
348
+ class TestDockerExecution:
349
+ """Test Docker-based command execution."""
350
+
351
+ @pytest.fixture(autouse=True)
352
+ def check_docker(self):
353
+ """Skip tests if Docker is not available."""
354
+ try:
355
+ import docker
356
+
357
+ client = docker.from_env()
358
+ client.ping()
359
+ # Check if image exists, if not skip
360
+ try:
361
+ client.images.get("massgen/mcp-runtime:latest")
362
+ except docker.errors.ImageNotFound:
363
+ pytest.skip("Docker image 'massgen/mcp-runtime:latest' not found. Run: bash massgen/docker/build.sh")
364
+ except ImportError:
365
+ pytest.skip("Docker library not installed. Install with: pip install docker")
366
+ except Exception as e:
367
+ pytest.skip(f"Docker not available: {e}")
368
+
369
+ def test_docker_manager_initialization(self):
370
+ """Test that DockerManager can be initialized."""
371
+ from massgen.filesystem_manager._docker_manager import DockerManager
372
+
373
+ manager = DockerManager(
374
+ image="massgen/mcp-runtime:latest",
375
+ network_mode="none",
376
+ )
377
+ assert manager.image == "massgen/mcp-runtime:latest"
378
+ assert manager.network_mode == "none"
379
+ assert manager.containers == {}
380
+
381
+ def test_docker_container_creation(self, tmp_path):
382
+ """Test creating a Docker container."""
383
+ from massgen.filesystem_manager._docker_manager import DockerManager
384
+
385
+ manager = DockerManager()
386
+
387
+ # Create workspace
388
+ workspace = tmp_path / "workspace"
389
+ workspace.mkdir()
390
+
391
+ # Create container
392
+ container_id = manager.create_container(
393
+ agent_id="test_agent",
394
+ workspace_path=workspace,
395
+ )
396
+
397
+ assert container_id is not None
398
+ assert "test_agent" in manager.containers
399
+
400
+ # Cleanup
401
+ manager.cleanup("test_agent")
402
+
403
+ def test_docker_command_execution(self, tmp_path):
404
+ """Test executing commands in Docker container."""
405
+ from massgen.filesystem_manager._docker_manager import DockerManager
406
+
407
+ manager = DockerManager()
408
+
409
+ workspace = tmp_path / "workspace"
410
+ workspace.mkdir()
411
+
412
+ # Create container
413
+ manager.create_container(
414
+ agent_id="test_exec",
415
+ workspace_path=workspace,
416
+ )
417
+
418
+ # Execute simple command
419
+ result = manager.exec_command(
420
+ agent_id="test_exec",
421
+ command="echo 'Hello from Docker'",
422
+ )
423
+
424
+ assert result["success"] is True
425
+ assert result["exit_code"] == 0
426
+ assert "Hello from Docker" in result["stdout"]
427
+
428
+ # Cleanup
429
+ manager.cleanup("test_exec")
430
+
431
+ def test_docker_container_persistence(self, tmp_path):
432
+ """Test that container state persists across commands."""
433
+ from massgen.filesystem_manager._docker_manager import DockerManager
434
+
435
+ manager = DockerManager()
436
+
437
+ workspace = tmp_path / "workspace"
438
+ workspace.mkdir()
439
+
440
+ # Create container
441
+ manager.create_container(
442
+ agent_id="test_persist",
443
+ workspace_path=workspace,
444
+ )
445
+
446
+ # Install a package that's NOT pre-installed (pytest, numpy, pandas, requests are in the image)
447
+ result1 = manager.exec_command(
448
+ agent_id="test_persist",
449
+ command="pip install --quiet click",
450
+ )
451
+ assert result1["success"] is True
452
+
453
+ # Verify package is still installed (container persisted)
454
+ result2 = manager.exec_command(
455
+ agent_id="test_persist",
456
+ command="python -c 'import click; print(click.__version__)'",
457
+ )
458
+ assert result2["success"] is True
459
+ assert len(result2["stdout"].strip()) > 0 # Should have version output
460
+
461
+ # Cleanup
462
+ manager.cleanup("test_persist")
463
+
464
+ def test_docker_workspace_mounting(self, tmp_path):
465
+ """Test that workspace is mounted correctly (with path transparency)."""
466
+ from massgen.filesystem_manager._docker_manager import DockerManager
467
+
468
+ manager = DockerManager()
469
+
470
+ workspace = tmp_path / "workspace"
471
+ workspace.mkdir()
472
+
473
+ # Create a test file in workspace
474
+ test_file = workspace / "test.txt"
475
+ test_file.write_text("Hello from host")
476
+
477
+ # Create container with workspace mounted
478
+ manager.create_container(
479
+ agent_id="test_mount",
480
+ workspace_path=workspace,
481
+ )
482
+
483
+ # Read file from inside container using host path (path transparency)
484
+ result = manager.exec_command(
485
+ agent_id="test_mount",
486
+ command=f"cat {workspace}/test.txt",
487
+ )
488
+
489
+ assert result["success"] is True
490
+ assert "Hello from host" in result["stdout"]
491
+
492
+ # Write file from container using host path
493
+ result2 = manager.exec_command(
494
+ agent_id="test_mount",
495
+ command=f"echo 'Hello from container' > {workspace}/from_container.txt",
496
+ )
497
+ assert result2["success"] is True
498
+
499
+ # Verify file exists on host
500
+ from_container = workspace / "from_container.txt"
501
+ assert from_container.exists()
502
+ assert "Hello from container" in from_container.read_text()
503
+
504
+ # Cleanup
505
+ manager.cleanup("test_mount")
506
+
507
+ def test_docker_container_isolation(self, tmp_path):
508
+ """Test that containers are isolated from each other."""
509
+ from massgen.filesystem_manager._docker_manager import DockerManager
510
+
511
+ manager = DockerManager()
512
+
513
+ workspace1 = tmp_path / "workspace1"
514
+ workspace1.mkdir()
515
+ workspace2 = tmp_path / "workspace2"
516
+ workspace2.mkdir()
517
+
518
+ # Create two containers
519
+ manager.create_container(agent_id="agent1", workspace_path=workspace1)
520
+ manager.create_container(agent_id="agent2", workspace_path=workspace2)
521
+
522
+ # Create file in agent1's workspace using host path
523
+ result1 = manager.exec_command(
524
+ agent_id="agent1",
525
+ command=f"echo 'agent1 data' > {workspace1}/data.txt",
526
+ )
527
+ assert result1["success"] is True
528
+
529
+ # Agent2 should not see agent1's file (isolated workspaces)
530
+ result2 = manager.exec_command(
531
+ agent_id="agent2",
532
+ command=f"ls {workspace2}/",
533
+ )
534
+ assert result2["success"] is True
535
+ assert "data.txt" not in result2["stdout"] # Isolated
536
+
537
+ # Cleanup
538
+ manager.cleanup("agent1")
539
+ manager.cleanup("agent2")
540
+
541
+ def test_docker_resource_limits(self, tmp_path):
542
+ """Test that resource limits are applied."""
543
+ from massgen.filesystem_manager._docker_manager import DockerManager
544
+
545
+ manager = DockerManager(
546
+ memory_limit="512m",
547
+ cpu_limit=1.0,
548
+ )
549
+
550
+ workspace = tmp_path / "workspace"
551
+ workspace.mkdir()
552
+
553
+ # Create container with limits
554
+ container_id = manager.create_container(
555
+ agent_id="test_limits",
556
+ workspace_path=workspace,
557
+ )
558
+
559
+ assert container_id is not None
560
+
561
+ # Verify container was created (limits are applied at Docker level)
562
+ container = manager.get_container("test_limits")
563
+ assert container is not None
564
+
565
+ # Cleanup
566
+ manager.cleanup("test_limits")
567
+
568
+ def test_docker_network_isolation(self, tmp_path):
569
+ """Test that network isolation works."""
570
+ from massgen.filesystem_manager._docker_manager import DockerManager
571
+
572
+ # Create manager with no network
573
+ manager = DockerManager(network_mode="none")
574
+
575
+ workspace = tmp_path / "workspace"
576
+ workspace.mkdir()
577
+
578
+ # Create container
579
+ manager.create_container(
580
+ agent_id="test_network",
581
+ workspace_path=workspace,
582
+ )
583
+
584
+ # Try to ping (should fail with network_mode="none")
585
+ result = manager.exec_command(
586
+ agent_id="test_network",
587
+ command="ping -c 1 google.com",
588
+ )
589
+
590
+ # Should fail due to network isolation
591
+ assert result["success"] is False or "Network is unreachable" in result["stdout"]
592
+
593
+ # Cleanup
594
+ manager.cleanup("test_network")
595
+
596
+ def test_docker_command_timeout(self, tmp_path):
597
+ """Test that Docker commands can timeout."""
598
+ from massgen.filesystem_manager._docker_manager import DockerManager
599
+
600
+ manager = DockerManager()
601
+
602
+ workspace = tmp_path / "workspace"
603
+ workspace.mkdir()
604
+
605
+ # Create container
606
+ manager.create_container(
607
+ agent_id="test_timeout",
608
+ workspace_path=workspace,
609
+ )
610
+
611
+ # Execute command that sleeps longer than timeout
612
+ result = manager.exec_command(
613
+ agent_id="test_timeout",
614
+ command="sleep 10",
615
+ timeout=1, # 1 second timeout
616
+ )
617
+
618
+ # Should timeout
619
+ assert result["success"] is False
620
+ assert result["exit_code"] == -1
621
+ assert "timed out" in result["stderr"].lower()
622
+ assert result["execution_time"] >= 1.0 # Should have waited at least 1 second
623
+
624
+ # Cleanup
625
+ manager.cleanup("test_timeout")
626
+
627
+ def test_docker_context_path_mounting(self, tmp_path):
628
+ """Test that context paths are mounted correctly with proper read-only enforcement."""
629
+ from massgen.filesystem_manager._docker_manager import DockerManager
630
+
631
+ manager = DockerManager()
632
+
633
+ workspace = tmp_path / "workspace"
634
+ workspace.mkdir()
635
+
636
+ context_dir = tmp_path / "context"
637
+ context_dir.mkdir()
638
+ context_file = context_dir / "context.txt"
639
+ context_file.write_text("Context data")
640
+
641
+ # Create container with READ-ONLY context path
642
+ context_paths = [
643
+ {"path": str(context_dir), "permission": "read", "name": "my_context"},
644
+ ]
645
+ manager.create_container(
646
+ agent_id="test_context",
647
+ workspace_path=workspace,
648
+ context_paths=context_paths,
649
+ )
650
+
651
+ # Test 1: Read should succeed (path transparency)
652
+ result = manager.exec_command(
653
+ agent_id="test_context",
654
+ command=f"cat {context_dir}/context.txt",
655
+ )
656
+
657
+ assert result["success"] is True
658
+ assert "Context data" in result["stdout"]
659
+
660
+ # Test 2: Write should FAIL (read-only mount)
661
+ result_write = manager.exec_command(
662
+ agent_id="test_context",
663
+ command=f"echo 'should fail' > {context_dir}/new_file.txt",
664
+ )
665
+
666
+ # Write should fail due to read-only mount
667
+ assert result_write["success"] is False
668
+ assert "Read-only file system" in result_write["stdout"]
669
+
670
+ # Verify file was NOT created on host
671
+ new_file = context_dir / "new_file.txt"
672
+ assert not new_file.exists()
673
+
674
+ # Cleanup
675
+ manager.cleanup("test_context")
676
+
677
+
678
+ if __name__ == "__main__":
679
+ pytest.main([__file__, "-v"])