massgen 0.1.0a3__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of massgen might be problematic. Click here for more details.

Files changed (120) hide show
  1. massgen/__init__.py +1 -1
  2. massgen/agent_config.py +17 -0
  3. massgen/api_params_handler/_api_params_handler_base.py +1 -0
  4. massgen/api_params_handler/_chat_completions_api_params_handler.py +15 -2
  5. massgen/api_params_handler/_claude_api_params_handler.py +8 -1
  6. massgen/api_params_handler/_gemini_api_params_handler.py +73 -0
  7. massgen/api_params_handler/_response_api_params_handler.py +8 -1
  8. massgen/backend/base.py +83 -0
  9. massgen/backend/{base_with_mcp.py → base_with_custom_tool_and_mcp.py} +286 -15
  10. massgen/backend/capabilities.py +6 -6
  11. massgen/backend/chat_completions.py +200 -103
  12. massgen/backend/claude.py +115 -18
  13. massgen/backend/claude_code.py +378 -14
  14. massgen/backend/docs/CLAUDE_API_RESEARCH.md +3 -3
  15. massgen/backend/gemini.py +1333 -1629
  16. massgen/backend/gemini_mcp_manager.py +545 -0
  17. massgen/backend/gemini_trackers.py +344 -0
  18. massgen/backend/gemini_utils.py +43 -0
  19. massgen/backend/grok.py +39 -6
  20. massgen/backend/response.py +147 -81
  21. massgen/cli.py +605 -110
  22. massgen/config_builder.py +376 -27
  23. massgen/configs/README.md +123 -80
  24. massgen/configs/basic/multi/three_agents_default.yaml +3 -3
  25. massgen/configs/basic/single/single_agent.yaml +1 -1
  26. massgen/configs/providers/openai/gpt5_nano.yaml +3 -3
  27. massgen/configs/tools/custom_tools/claude_code_custom_tool_example.yaml +32 -0
  28. massgen/configs/tools/custom_tools/claude_code_custom_tool_example_no_path.yaml +28 -0
  29. massgen/configs/tools/custom_tools/claude_code_custom_tool_with_mcp_example.yaml +40 -0
  30. massgen/configs/tools/custom_tools/claude_code_custom_tool_with_wrong_mcp_example.yaml +38 -0
  31. massgen/configs/tools/custom_tools/claude_code_wrong_custom_tool_with_mcp_example.yaml +38 -0
  32. massgen/configs/tools/custom_tools/claude_custom_tool_example.yaml +24 -0
  33. massgen/configs/tools/custom_tools/claude_custom_tool_example_no_path.yaml +22 -0
  34. massgen/configs/tools/custom_tools/claude_custom_tool_with_mcp_example.yaml +35 -0
  35. massgen/configs/tools/custom_tools/claude_custom_tool_with_wrong_mcp_example.yaml +33 -0
  36. massgen/configs/tools/custom_tools/claude_wrong_custom_tool_with_mcp_example.yaml +33 -0
  37. massgen/configs/tools/custom_tools/gemini_custom_tool_example.yaml +24 -0
  38. massgen/configs/tools/custom_tools/gemini_custom_tool_example_no_path.yaml +22 -0
  39. massgen/configs/tools/custom_tools/gemini_custom_tool_with_mcp_example.yaml +35 -0
  40. massgen/configs/tools/custom_tools/gemini_custom_tool_with_wrong_mcp_example.yaml +33 -0
  41. massgen/configs/tools/custom_tools/gemini_wrong_custom_tool_with_mcp_example.yaml +33 -0
  42. massgen/configs/tools/custom_tools/github_issue_market_analysis.yaml +94 -0
  43. massgen/configs/tools/custom_tools/gpt5_nano_custom_tool_example.yaml +24 -0
  44. massgen/configs/tools/custom_tools/gpt5_nano_custom_tool_example_no_path.yaml +22 -0
  45. massgen/configs/tools/custom_tools/gpt5_nano_custom_tool_with_mcp_example.yaml +35 -0
  46. massgen/configs/tools/custom_tools/gpt5_nano_custom_tool_with_wrong_mcp_example.yaml +33 -0
  47. massgen/configs/tools/custom_tools/gpt5_nano_wrong_custom_tool_with_mcp_example.yaml +33 -0
  48. massgen/configs/tools/custom_tools/gpt_oss_custom_tool_example.yaml +25 -0
  49. massgen/configs/tools/custom_tools/gpt_oss_custom_tool_example_no_path.yaml +23 -0
  50. massgen/configs/tools/custom_tools/gpt_oss_custom_tool_with_mcp_example.yaml +34 -0
  51. massgen/configs/tools/custom_tools/gpt_oss_custom_tool_with_wrong_mcp_example.yaml +34 -0
  52. massgen/configs/tools/custom_tools/gpt_oss_wrong_custom_tool_with_mcp_example.yaml +34 -0
  53. massgen/configs/tools/custom_tools/grok3_mini_custom_tool_example.yaml +24 -0
  54. massgen/configs/tools/custom_tools/grok3_mini_custom_tool_example_no_path.yaml +22 -0
  55. massgen/configs/tools/custom_tools/grok3_mini_custom_tool_with_mcp_example.yaml +35 -0
  56. massgen/configs/tools/custom_tools/grok3_mini_custom_tool_with_wrong_mcp_example.yaml +33 -0
  57. massgen/configs/tools/custom_tools/grok3_mini_wrong_custom_tool_with_mcp_example.yaml +33 -0
  58. massgen/configs/tools/custom_tools/qwen_api_custom_tool_example.yaml +25 -0
  59. massgen/configs/tools/custom_tools/qwen_api_custom_tool_example_no_path.yaml +23 -0
  60. massgen/configs/tools/custom_tools/qwen_api_custom_tool_with_mcp_example.yaml +36 -0
  61. massgen/configs/tools/custom_tools/qwen_api_custom_tool_with_wrong_mcp_example.yaml +34 -0
  62. massgen/configs/tools/custom_tools/qwen_api_wrong_custom_tool_with_mcp_example.yaml +34 -0
  63. massgen/configs/tools/custom_tools/qwen_local_custom_tool_example.yaml +24 -0
  64. massgen/configs/tools/custom_tools/qwen_local_custom_tool_example_no_path.yaml +22 -0
  65. massgen/configs/tools/custom_tools/qwen_local_custom_tool_with_mcp_example.yaml +35 -0
  66. massgen/configs/tools/custom_tools/qwen_local_custom_tool_with_wrong_mcp_example.yaml +33 -0
  67. massgen/configs/tools/custom_tools/qwen_local_wrong_custom_tool_with_mcp_example.yaml +33 -0
  68. massgen/configs/tools/filesystem/claude_code_context_sharing.yaml +1 -1
  69. massgen/configs/tools/planning/five_agents_discord_mcp_planning_mode.yaml +7 -29
  70. massgen/configs/tools/planning/five_agents_filesystem_mcp_planning_mode.yaml +5 -6
  71. massgen/configs/tools/planning/five_agents_notion_mcp_planning_mode.yaml +4 -4
  72. massgen/configs/tools/planning/five_agents_twitter_mcp_planning_mode.yaml +4 -4
  73. massgen/configs/tools/planning/gpt5_mini_case_study_mcp_planning_mode.yaml +2 -2
  74. massgen/configs/voting/gemini_gpt_voting_sensitivity.yaml +67 -0
  75. massgen/formatter/_chat_completions_formatter.py +104 -0
  76. massgen/formatter/_claude_formatter.py +120 -0
  77. massgen/formatter/_gemini_formatter.py +448 -0
  78. massgen/formatter/_response_formatter.py +88 -0
  79. massgen/frontend/coordination_ui.py +4 -2
  80. massgen/logger_config.py +35 -3
  81. massgen/message_templates.py +56 -6
  82. massgen/orchestrator.py +512 -16
  83. massgen/stream_chunk/base.py +3 -0
  84. massgen/tests/custom_tools_example.py +392 -0
  85. massgen/tests/mcp_test_server.py +17 -7
  86. massgen/tests/test_config_builder.py +423 -0
  87. massgen/tests/test_custom_tools.py +401 -0
  88. massgen/tests/test_intelligent_planning_mode.py +643 -0
  89. massgen/tests/test_tools.py +127 -0
  90. massgen/token_manager/token_manager.py +13 -4
  91. massgen/tool/README.md +935 -0
  92. massgen/tool/__init__.py +39 -0
  93. massgen/tool/_async_helpers.py +70 -0
  94. massgen/tool/_basic/__init__.py +8 -0
  95. massgen/tool/_basic/_two_num_tool.py +24 -0
  96. massgen/tool/_code_executors/__init__.py +10 -0
  97. massgen/tool/_code_executors/_python_executor.py +74 -0
  98. massgen/tool/_code_executors/_shell_executor.py +61 -0
  99. massgen/tool/_exceptions.py +39 -0
  100. massgen/tool/_file_handlers/__init__.py +10 -0
  101. massgen/tool/_file_handlers/_file_operations.py +218 -0
  102. massgen/tool/_manager.py +634 -0
  103. massgen/tool/_registered_tool.py +88 -0
  104. massgen/tool/_result.py +66 -0
  105. massgen/tool/_self_evolution/_github_issue_analyzer.py +369 -0
  106. massgen/tool/docs/builtin_tools.md +681 -0
  107. massgen/tool/docs/exceptions.md +794 -0
  108. massgen/tool/docs/execution_results.md +691 -0
  109. massgen/tool/docs/manager.md +887 -0
  110. massgen/tool/docs/workflow_toolkits.md +529 -0
  111. massgen/tool/workflow_toolkits/__init__.py +57 -0
  112. massgen/tool/workflow_toolkits/base.py +55 -0
  113. massgen/tool/workflow_toolkits/new_answer.py +126 -0
  114. massgen/tool/workflow_toolkits/vote.py +167 -0
  115. {massgen-0.1.0a3.dist-info → massgen-0.1.2.dist-info}/METADATA +87 -129
  116. {massgen-0.1.0a3.dist-info → massgen-0.1.2.dist-info}/RECORD +120 -44
  117. {massgen-0.1.0a3.dist-info → massgen-0.1.2.dist-info}/WHEEL +0 -0
  118. {massgen-0.1.0a3.dist-info → massgen-0.1.2.dist-info}/entry_points.txt +0 -0
  119. {massgen-0.1.0a3.dist-info → massgen-0.1.2.dist-info}/licenses/LICENSE +0 -0
  120. {massgen-0.1.0a3.dist-info → massgen-0.1.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,643 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Test intelligent planning mode that analyzes questions for irreversibility.
4
+
5
+ This test verifies that the orchestrator can:
6
+ 1. Analyze user questions to determine if they involve irreversible MCP operations
7
+ 2. Automatically enable planning mode for irreversible operations (e.g., send Discord message)
8
+ 3. Automatically disable planning mode for reversible operations (e.g., read Discord messages)
9
+ 4. All analysis happens silently - users don't see the internal analysis
10
+ """
11
+
12
+ import sys
13
+ from pathlib import Path
14
+ from unittest.mock import AsyncMock, MagicMock, patch
15
+
16
+ import pytest
17
+
18
+ from massgen.agent_config import AgentConfig
19
+ from massgen.backend.base import StreamChunk
20
+ from massgen.backend.response import ResponseBackend
21
+ from massgen.chat_agent import ConfigurableAgent
22
+ from massgen.orchestrator import Orchestrator
23
+
24
+ # Add parent directory to path for imports
25
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
26
+
27
+
28
+ @pytest.fixture
29
+ def mock_backend():
30
+ """Create a mock backend with planning mode support."""
31
+ backend = MagicMock(spec=ResponseBackend)
32
+ backend.set_planning_mode = MagicMock()
33
+ backend.is_planning_mode_enabled = MagicMock(return_value=False)
34
+ backend.stream_with_tools = AsyncMock()
35
+ backend.filesystem_manager = None
36
+ return backend
37
+
38
+
39
+ @pytest.fixture
40
+ def orchestrator_with_agents(mock_backend):
41
+ """Create an orchestrator with mock agents."""
42
+ from massgen.agent_config import CoordinationConfig
43
+
44
+ # Create agent configs
45
+ config1 = AgentConfig.create_openai_config(model="gpt-4")
46
+ config2 = AgentConfig.create_openai_config(model="gpt-4")
47
+
48
+ # Create agents with mock backends
49
+ agent1 = ConfigurableAgent(config=config1, backend=mock_backend)
50
+ agent2 = ConfigurableAgent(config=config2, backend=mock_backend)
51
+
52
+ agents = {
53
+ "agent1": agent1,
54
+ "agent2": agent2,
55
+ }
56
+
57
+ # Create orchestrator with planning mode enabled in coordination config
58
+ orchestrator_config = AgentConfig.create_openai_config()
59
+ orchestrator_config.coordination_config = CoordinationConfig()
60
+ orchestrator_config.coordination_config.enable_planning_mode = True
61
+
62
+ orchestrator = Orchestrator(
63
+ agents=agents,
64
+ orchestrator_id="test_orchestrator",
65
+ config=orchestrator_config,
66
+ )
67
+
68
+ return orchestrator, mock_backend
69
+
70
+
71
+ @pytest.mark.asyncio
72
+ async def test_irreversible_operation_enables_planning_mode(orchestrator_with_agents):
73
+ """Test that irreversible operations (like sending Discord messages) enable planning mode."""
74
+ orchestrator, mock_backend = orchestrator_with_agents
75
+
76
+ # Mock the analysis to return YES (irreversible) in the new format
77
+ async def mock_analysis_stream(*args, **kwargs):
78
+ yield StreamChunk(type="content", content="IRREVERSIBLE: YES\nBLOCKED_TOOLS: mcp__discord__discord_send")
79
+
80
+ mock_backend.stream_with_tools = mock_analysis_stream
81
+
82
+ # Test with a question about sending a Discord message
83
+ user_question = "Send a message to the #general channel saying 'Hello everyone!'"
84
+ conversation_context = {
85
+ "current_message": user_question,
86
+ "conversation_history": [],
87
+ "full_messages": [{"role": "user", "content": user_question}],
88
+ }
89
+
90
+ # Run the analysis
91
+ result = await orchestrator._analyze_question_irreversibility(
92
+ user_question,
93
+ conversation_context,
94
+ )
95
+
96
+ # Verify that it detected irreversible operation
97
+ assert result["has_irreversible"] is True, "Should detect sending Discord message as irreversible"
98
+
99
+
100
+ @pytest.mark.asyncio
101
+ async def test_reversible_operation_disables_planning_mode(orchestrator_with_agents):
102
+ """Test that reversible operations (like reading Discord messages) disable planning mode."""
103
+ orchestrator, mock_backend = orchestrator_with_agents
104
+
105
+ # Mock the analysis to return NO (reversible) in the new format
106
+ async def mock_analysis_stream(*args, **kwargs):
107
+ yield StreamChunk(type="content", content="IRREVERSIBLE: NO\nBLOCKED_TOOLS: ")
108
+
109
+ mock_backend.stream_with_tools = mock_analysis_stream
110
+
111
+ # Test with a question about reading Discord messages
112
+ user_question = "Show me the last 10 messages from the #general channel"
113
+ conversation_context = {
114
+ "current_message": user_question,
115
+ "conversation_history": [],
116
+ "full_messages": [{"role": "user", "content": user_question}],
117
+ }
118
+
119
+ # Run the analysis
120
+ result = await orchestrator._analyze_question_irreversibility(
121
+ user_question,
122
+ conversation_context,
123
+ )
124
+
125
+ # Verify that it detected reversible operation
126
+ assert result["has_irreversible"] is False, "Should detect reading Discord messages as reversible"
127
+
128
+
129
+ @pytest.mark.asyncio
130
+ async def test_planning_mode_set_on_all_agents(orchestrator_with_agents):
131
+ """Test that planning mode is set on all agents during chat."""
132
+ orchestrator, mock_backend = orchestrator_with_agents
133
+
134
+ # Mock the analysis to return YES (irreversible) in the new format
135
+ async def mock_analysis_stream(*args, **kwargs):
136
+ yield StreamChunk(type="content", content="IRREVERSIBLE: YES\nBLOCKED_TOOLS: mcp__filesystem__delete_file")
137
+
138
+ mock_backend.stream_with_tools = mock_analysis_stream
139
+
140
+ # Add the set_planning_mode_blocked_tools method to mock
141
+ mock_backend.set_planning_mode_blocked_tools = MagicMock()
142
+
143
+ # Mock the coordination to avoid full execution
144
+ async def mock_coordinate(*args, **kwargs):
145
+ yield StreamChunk(type="content", content="Coordinated response")
146
+ yield StreamChunk(type="done")
147
+
148
+ with patch.object(orchestrator, "_coordinate_agents_with_timeout", mock_coordinate):
149
+ # Simulate a chat interaction
150
+ user_question = "Delete all files in the temp directory"
151
+ messages = [{"role": "user", "content": user_question}]
152
+
153
+ # Collect chunks
154
+ chunks = []
155
+ async for chunk in orchestrator.chat(messages):
156
+ chunks.append(chunk)
157
+
158
+ # Verify that set_planning_mode was called on the backend
159
+ # It should be called twice (once for each agent)
160
+ assert mock_backend.set_planning_mode.call_count == 2
161
+ # Verify it was called with True (planning mode enabled)
162
+ mock_backend.set_planning_mode.assert_called_with(True)
163
+
164
+ # Verify set_planning_mode_blocked_tools was also called
165
+ assert mock_backend.set_planning_mode_blocked_tools.call_count == 2
166
+
167
+
168
+ @pytest.mark.asyncio
169
+ async def test_error_defaults_to_safe_mode(orchestrator_with_agents):
170
+ """Test that errors during analysis default to safe mode (planning enabled)."""
171
+ orchestrator, mock_backend = orchestrator_with_agents
172
+
173
+ # Mock the analysis to raise an error
174
+ async def mock_analysis_error(*args, **kwargs):
175
+ raise Exception("Analysis failed")
176
+
177
+ mock_backend.stream_with_tools = mock_analysis_error
178
+
179
+ # Test with any question
180
+ user_question = "Test question"
181
+ conversation_context = {
182
+ "current_message": user_question,
183
+ "conversation_history": [],
184
+ "full_messages": [{"role": "user", "content": user_question}],
185
+ }
186
+
187
+ # Run the analysis
188
+ result = await orchestrator._analyze_question_irreversibility(
189
+ user_question,
190
+ conversation_context,
191
+ )
192
+
193
+ # Verify that it defaulted to safe mode (True = planning enabled)
194
+ assert result["has_irreversible"] is True, "Should default to planning mode on error"
195
+
196
+
197
+ @pytest.mark.asyncio
198
+ async def test_analysis_uses_random_agent():
199
+ """Test that the analysis randomly selects an available agent."""
200
+ # Create multiple agents with different IDs
201
+ agent_ids = ["agent1", "agent2", "agent3"]
202
+ agents = {}
203
+
204
+ for agent_id in agent_ids:
205
+ backend = MagicMock(spec=ResponseBackend)
206
+ backend.set_planning_mode = MagicMock()
207
+ backend.filesystem_manager = None
208
+
209
+ # Mock stream to return NO
210
+ async def mock_stream(*args, **kwargs):
211
+ yield StreamChunk(type="content", content="NO")
212
+
213
+ backend.stream_with_tools = mock_stream
214
+
215
+ config = AgentConfig.create_openai_config()
216
+ agent = ConfigurableAgent(config=config, backend=backend)
217
+ agents[agent_id] = agent
218
+
219
+ orchestrator_config = AgentConfig.create_openai_config()
220
+ orchestrator = Orchestrator(
221
+ agents=agents,
222
+ orchestrator_id="test_orchestrator",
223
+ config=orchestrator_config,
224
+ )
225
+
226
+ # Run analysis multiple times to verify random selection
227
+ user_question = "Test question"
228
+ conversation_context = {
229
+ "current_message": user_question,
230
+ "conversation_history": [],
231
+ "full_messages": [{"role": "user", "content": user_question}],
232
+ }
233
+
234
+ # Run analysis once
235
+ result = await orchestrator._analyze_question_irreversibility(
236
+ user_question,
237
+ conversation_context,
238
+ )
239
+
240
+ # Just verify it completes without error
241
+ # (Random selection is hard to test deterministically)
242
+ assert result["has_irreversible"] is False, "Should return False for NO response"
243
+
244
+
245
+ @pytest.mark.asyncio
246
+ async def test_mixed_responses_parsed_correctly(orchestrator_with_agents):
247
+ """Test that YES/NO responses are parsed correctly even with extra text."""
248
+ orchestrator, mock_backend = orchestrator_with_agents
249
+
250
+ # Test with YES in mixed text
251
+ async def mock_stream_yes(*args, **kwargs):
252
+ yield StreamChunk(type="content", content="IRREVERSIBLE: YES\nBLOCKED_TOOLS: mcp__discord__discord_send")
253
+
254
+ mock_backend.stream_with_tools = mock_stream_yes
255
+
256
+ user_question = "Test question"
257
+ conversation_context = {
258
+ "current_message": user_question,
259
+ "conversation_history": [],
260
+ "full_messages": [{"role": "user", "content": user_question}],
261
+ }
262
+
263
+ result = await orchestrator._analyze_question_irreversibility(user_question, conversation_context)
264
+ assert result["has_irreversible"] is True, "Should parse YES from formatted response"
265
+ assert "mcp__discord__discord_send" in result["blocked_tools"], "Should extract blocked tools"
266
+
267
+ # Test with NO in mixed text
268
+ async def mock_stream_no(*args, **kwargs):
269
+ yield StreamChunk(type="content", content="IRREVERSIBLE: NO\nBLOCKED_TOOLS: ")
270
+
271
+ mock_backend.stream_with_tools = mock_stream_no
272
+
273
+ result = await orchestrator._analyze_question_irreversibility(user_question, conversation_context)
274
+ assert result["has_irreversible"] is False, "Should parse NO from formatted response"
275
+ assert len(result["blocked_tools"]) == 0, "Should have empty blocked tools for reversible operations"
276
+
277
+
278
+ @pytest.mark.asyncio
279
+ async def test_selective_blocking_multiple_tools(orchestrator_with_agents):
280
+ """Test that multiple tools can be blocked selectively."""
281
+ orchestrator, mock_backend = orchestrator_with_agents
282
+
283
+ # Mock the analysis to return multiple blocked tools
284
+ async def mock_stream(*args, **kwargs):
285
+ yield StreamChunk(
286
+ type="content",
287
+ content="IRREVERSIBLE: YES\nBLOCKED_TOOLS: mcp__discord__discord_send, mcp__twitter__post_tweet, mcp__filesystem__delete_file",
288
+ )
289
+
290
+ mock_backend.stream_with_tools = mock_stream
291
+
292
+ user_question = "Send a Discord message, post a tweet, and delete a file"
293
+ conversation_context = {
294
+ "current_message": user_question,
295
+ "conversation_history": [],
296
+ "full_messages": [{"role": "user", "content": user_question}],
297
+ }
298
+
299
+ result = await orchestrator._analyze_question_irreversibility(user_question, conversation_context)
300
+
301
+ assert result["has_irreversible"] is True, "Should detect irreversible operations"
302
+ assert len(result["blocked_tools"]) == 3, "Should identify 3 blocked tools"
303
+ assert "mcp__discord__discord_send" in result["blocked_tools"]
304
+ assert "mcp__twitter__post_tweet" in result["blocked_tools"]
305
+ assert "mcp__filesystem__delete_file" in result["blocked_tools"]
306
+
307
+
308
+ @pytest.mark.asyncio
309
+ async def test_selective_blocking_with_whitespace(orchestrator_with_agents):
310
+ """Test that tool names are parsed correctly even with extra whitespace."""
311
+ orchestrator, mock_backend = orchestrator_with_agents
312
+
313
+ # Mock the analysis with various whitespace patterns
314
+ async def mock_stream(*args, **kwargs):
315
+ yield StreamChunk(
316
+ type="content",
317
+ content="IRREVERSIBLE: YES\nBLOCKED_TOOLS: mcp__discord__discord_send , mcp__twitter__post_tweet ",
318
+ )
319
+
320
+ mock_backend.stream_with_tools = mock_stream
321
+
322
+ user_question = "Test question"
323
+ conversation_context = {
324
+ "current_message": user_question,
325
+ "conversation_history": [],
326
+ "full_messages": [{"role": "user", "content": user_question}],
327
+ }
328
+
329
+ result = await orchestrator._analyze_question_irreversibility(user_question, conversation_context)
330
+
331
+ assert result["has_irreversible"] is True
332
+ assert len(result["blocked_tools"]) == 2, "Should parse tools correctly despite whitespace"
333
+ assert "mcp__discord__discord_send" in result["blocked_tools"]
334
+ assert "mcp__twitter__post_tweet" in result["blocked_tools"]
335
+
336
+
337
+ @pytest.mark.asyncio
338
+ async def test_isolated_workspace_detection():
339
+ """Test that isolated workspaces are detected correctly."""
340
+
341
+ # Create mock filesystem managers with isolated workspaces
342
+ class MockFilesystemManager:
343
+ def __init__(self, cwd):
344
+ self.cwd = cwd
345
+
346
+ def setup_orchestration_paths(self, **kwargs):
347
+ """Mock method to avoid initialization errors."""
348
+
349
+ def update_backend_mcp_config(self, config):
350
+ """Mock method to avoid initialization errors."""
351
+
352
+ class MockBackendWithWorkspace:
353
+ def __init__(self, cwd):
354
+ self.filesystem_manager = MockFilesystemManager(cwd)
355
+ self.set_planning_mode = MagicMock()
356
+ self.set_planning_mode_blocked_tools = MagicMock()
357
+ self.config = {} # Add config attribute
358
+
359
+ async def mock_stream(*args, **kwargs):
360
+ yield StreamChunk(type="content", content="IRREVERSIBLE: NO\nBLOCKED_TOOLS: ")
361
+
362
+ self.stream_with_tools = mock_stream
363
+
364
+ # Create agents with isolated workspaces
365
+ backend1 = MockBackendWithWorkspace("/tmp/massgen_workspace_agent1")
366
+ backend2 = MockBackendWithWorkspace("/tmp/workspace_agent2")
367
+
368
+ config1 = AgentConfig.create_openai_config()
369
+ config2 = AgentConfig.create_openai_config()
370
+
371
+ agent1 = ConfigurableAgent(config=config1, backend=backend1)
372
+ agent2 = ConfigurableAgent(config=config2, backend=backend2)
373
+
374
+ agents = {"agent1": agent1, "agent2": agent2}
375
+
376
+ orchestrator_config = AgentConfig.create_openai_config()
377
+ orchestrator = Orchestrator(
378
+ agents=agents,
379
+ orchestrator_id="test_orchestrator",
380
+ config=orchestrator_config,
381
+ )
382
+
383
+ user_question = "Create a file and write some data"
384
+ conversation_context = {
385
+ "current_message": user_question,
386
+ "conversation_history": [],
387
+ "full_messages": [{"role": "user", "content": user_question}],
388
+ }
389
+
390
+ # Run analysis - should detect isolated workspaces
391
+ result = await orchestrator._analyze_question_irreversibility(user_question, conversation_context)
392
+
393
+ # The prompt will inform the LLM about isolated workspaces
394
+ # In this test, we're just verifying the detection logic runs without error
395
+ assert result is not None
396
+ assert "has_irreversible" in result
397
+ assert "blocked_tools" in result
398
+
399
+
400
+ @pytest.mark.asyncio
401
+ async def test_no_isolated_workspace_detection():
402
+ """Test behavior when no isolated workspaces are present."""
403
+
404
+ # Create mock filesystem managers without isolated workspaces
405
+ class MockFilesystemManager:
406
+ def __init__(self, cwd):
407
+ self.cwd = cwd
408
+
409
+ def setup_orchestration_paths(self, **kwargs):
410
+ """Mock method to avoid initialization errors."""
411
+
412
+ def update_backend_mcp_config(self, config):
413
+ """Mock method to avoid initialization errors."""
414
+
415
+ class MockBackendNoWorkspace:
416
+ def __init__(self, cwd):
417
+ self.filesystem_manager = MockFilesystemManager(cwd)
418
+ self.set_planning_mode = MagicMock()
419
+ self.set_planning_mode_blocked_tools = MagicMock()
420
+ self.config = {} # Add config attribute
421
+
422
+ async def mock_stream(*args, **kwargs):
423
+ yield StreamChunk(type="content", content="IRREVERSIBLE: YES\nBLOCKED_TOOLS: mcp__filesystem__write_file")
424
+
425
+ self.stream_with_tools = mock_stream
426
+
427
+ # Create agents with regular directories (not workspaces)
428
+ backend1 = MockBackendNoWorkspace("/home/user/project")
429
+ backend2 = MockBackendNoWorkspace("/tmp/mydir")
430
+
431
+ config1 = AgentConfig.create_openai_config()
432
+ config2 = AgentConfig.create_openai_config()
433
+
434
+ agent1 = ConfigurableAgent(config=config1, backend=backend1)
435
+ agent2 = ConfigurableAgent(config=config2, backend=backend2)
436
+
437
+ agents = {"agent1": agent1, "agent2": agent2}
438
+
439
+ orchestrator_config = AgentConfig.create_openai_config()
440
+ orchestrator = Orchestrator(
441
+ agents=agents,
442
+ orchestrator_id="test_orchestrator",
443
+ config=orchestrator_config,
444
+ )
445
+
446
+ user_question = "Write a file to /tmp/test.txt"
447
+ conversation_context = {
448
+ "current_message": user_question,
449
+ "conversation_history": [],
450
+ "full_messages": [{"role": "user", "content": user_question}],
451
+ }
452
+
453
+ # Run analysis - should NOT detect isolated workspaces
454
+ result = await orchestrator._analyze_question_irreversibility(user_question, conversation_context)
455
+
456
+ assert result is not None
457
+ assert result["has_irreversible"] is True
458
+ assert "mcp__filesystem__write_file" in result["blocked_tools"]
459
+
460
+
461
+ @pytest.mark.asyncio
462
+ async def test_backend_selective_blocking_logic():
463
+ """Test the backend's is_mcp_tool_blocked logic directly."""
464
+ from massgen.backend.base import LLMBackend
465
+
466
+ # Create a minimal mock backend
467
+ class TestBackend(LLMBackend):
468
+ def __init__(self):
469
+ # Skip full initialization
470
+ self._planning_mode_enabled = False
471
+ self._planning_mode_blocked_tools = set()
472
+
473
+ async def stream_with_tools(self, messages, tools, **kwargs):
474
+ pass
475
+
476
+ def get_provider_name(self):
477
+ return "test"
478
+
479
+ backend = TestBackend()
480
+
481
+ # Test 1: Planning mode disabled - all tools allowed
482
+ backend.set_planning_mode(False)
483
+ assert backend.is_mcp_tool_blocked("mcp__discord__discord_send") is False
484
+ assert backend.is_mcp_tool_blocked("any_tool") is False
485
+
486
+ # Test 2: Planning mode enabled with empty blocked set - block ALL
487
+ backend.set_planning_mode(True)
488
+ backend.set_planning_mode_blocked_tools(set())
489
+ assert backend.is_mcp_tool_blocked("mcp__discord__discord_send") is True
490
+ assert backend.is_mcp_tool_blocked("mcp__discord__discord_read") is True
491
+ assert backend.is_mcp_tool_blocked("any_tool") is True
492
+
493
+ # Test 3: Planning mode enabled with specific tools - selective blocking
494
+ backend.set_planning_mode(True)
495
+ backend.set_planning_mode_blocked_tools({"mcp__discord__discord_send", "mcp__twitter__post_tweet"})
496
+
497
+ assert backend.is_mcp_tool_blocked("mcp__discord__discord_send") is True
498
+ assert backend.is_mcp_tool_blocked("mcp__twitter__post_tweet") is True
499
+ assert backend.is_mcp_tool_blocked("mcp__discord__discord_read") is False
500
+ assert backend.is_mcp_tool_blocked("mcp__twitter__search_tweets") is False
501
+
502
+ # Test 4: Get blocked tools
503
+ blocked = backend.get_planning_mode_blocked_tools()
504
+ assert len(blocked) == 2
505
+ assert "mcp__discord__discord_send" in blocked
506
+ assert "mcp__twitter__post_tweet" in blocked
507
+
508
+
509
+ @pytest.mark.asyncio
510
+ async def test_chat_sets_blocked_tools_on_agents(orchestrator_with_agents):
511
+ """Test that chat() method sets both planning mode and blocked tools on all agents."""
512
+ orchestrator, mock_backend = orchestrator_with_agents
513
+
514
+ # Add the set_planning_mode_blocked_tools method to mock
515
+ mock_backend.set_planning_mode_blocked_tools = MagicMock()
516
+
517
+ # Mock the analysis to return specific blocked tools
518
+ async def mock_analysis_stream(*args, **kwargs):
519
+ yield StreamChunk(
520
+ type="content",
521
+ content="IRREVERSIBLE: YES\nBLOCKED_TOOLS: mcp__discord__discord_send, mcp__twitter__post_tweet",
522
+ )
523
+
524
+ mock_backend.stream_with_tools = mock_analysis_stream
525
+
526
+ # Mock the coordination to avoid full execution
527
+ async def mock_coordinate(*args, **kwargs):
528
+ yield StreamChunk(type="content", content="Coordinated response")
529
+ yield StreamChunk(type="done")
530
+
531
+ with patch.object(orchestrator, "_coordinate_agents_with_timeout", mock_coordinate):
532
+ user_question = "Send a Discord message and post a tweet"
533
+ messages = [{"role": "user", "content": user_question}]
534
+
535
+ # Collect chunks
536
+ chunks = []
537
+ async for chunk in orchestrator.chat(messages):
538
+ chunks.append(chunk)
539
+
540
+ # Verify that set_planning_mode was called
541
+ assert mock_backend.set_planning_mode.call_count == 2
542
+ mock_backend.set_planning_mode.assert_called_with(True)
543
+
544
+ # Verify that set_planning_mode_blocked_tools was called
545
+ assert mock_backend.set_planning_mode_blocked_tools.call_count == 2
546
+ # Check that it was called with the correct tools
547
+ call_args = mock_backend.set_planning_mode_blocked_tools.call_args[0][0]
548
+ assert "mcp__discord__discord_send" in call_args
549
+ assert "mcp__twitter__post_tweet" in call_args
550
+
551
+
552
+ @pytest.mark.asyncio
553
+ async def test_empty_blocked_tools_list(orchestrator_with_agents):
554
+ """Test handling of empty BLOCKED_TOOLS list (no specific tools to block)."""
555
+ orchestrator, mock_backend = orchestrator_with_agents
556
+
557
+ # Mock the analysis to return YES but with empty blocked tools
558
+ async def mock_stream(*args, **kwargs):
559
+ yield StreamChunk(type="content", content="IRREVERSIBLE: YES\nBLOCKED_TOOLS: ")
560
+
561
+ mock_backend.stream_with_tools = mock_stream
562
+
563
+ user_question = "Do something risky"
564
+ conversation_context = {
565
+ "current_message": user_question,
566
+ "conversation_history": [],
567
+ "full_messages": [{"role": "user", "content": user_question}],
568
+ }
569
+
570
+ result = await orchestrator._analyze_question_irreversibility(user_question, conversation_context)
571
+
572
+ assert result["has_irreversible"] is True
573
+ assert len(result["blocked_tools"]) == 0, "Should have empty blocked tools set"
574
+ # Empty set means block ALL MCP tools (backward compatible)
575
+
576
+
577
+ @pytest.mark.asyncio
578
+ async def test_case_insensitive_workspace_detection():
579
+ """Test that workspace detection is case-insensitive."""
580
+
581
+ class MockFilesystemManager:
582
+ def __init__(self, cwd):
583
+ self.cwd = cwd
584
+
585
+ def setup_orchestration_paths(self, **kwargs):
586
+ """Mock method to avoid initialization errors."""
587
+
588
+ def update_backend_mcp_config(self, config):
589
+ """Mock method to avoid initialization errors."""
590
+
591
+ class MockBackendCaseTest:
592
+ def __init__(self, cwd):
593
+ self.filesystem_manager = MockFilesystemManager(cwd)
594
+ self.set_planning_mode = MagicMock()
595
+ self.set_planning_mode_blocked_tools = MagicMock()
596
+ self.config = {} # Add config attribute
597
+
598
+ async def mock_stream(*args, **kwargs):
599
+ yield StreamChunk(type="content", content="IRREVERSIBLE: NO\nBLOCKED_TOOLS: ")
600
+
601
+ self.stream_with_tools = mock_stream
602
+
603
+ # Test with various case patterns
604
+ test_cases = [
605
+ "/tmp/WORKSPACE_agent1",
606
+ "/tmp/WorkSpace_agent2",
607
+ "/tmp/workspace_AGENT3",
608
+ "/tmp/WoRkSpAcE_agent4",
609
+ ]
610
+
611
+ agents = {}
612
+ for i, cwd in enumerate(test_cases):
613
+ backend = MockBackendCaseTest(cwd)
614
+ config = AgentConfig.create_openai_config()
615
+ agent = ConfigurableAgent(config=config, backend=backend)
616
+ agents[f"agent{i+1}"] = agent
617
+
618
+ orchestrator_config = AgentConfig.create_openai_config()
619
+ orchestrator = Orchestrator(
620
+ agents=agents,
621
+ orchestrator_id="test_orchestrator",
622
+ config=orchestrator_config,
623
+ )
624
+
625
+ user_question = "Create some files"
626
+ conversation_context = {
627
+ "current_message": user_question,
628
+ "conversation_history": [],
629
+ "full_messages": [{"role": "user", "content": user_question}],
630
+ }
631
+
632
+ # Run analysis - should detect all workspaces regardless of case
633
+ result = await orchestrator._analyze_question_irreversibility(user_question, conversation_context)
634
+
635
+ # Verify the analysis completes successfully
636
+ assert result is not None
637
+ assert "has_irreversible" in result
638
+ assert "blocked_tools" in result
639
+
640
+
641
+ if __name__ == "__main__":
642
+ # Run tests
643
+ pytest.main([__file__, "-v"])