massgen 0.1.0a3__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of massgen might be problematic. Click here for more details.
- massgen/__init__.py +1 -1
- massgen/agent_config.py +17 -0
- massgen/api_params_handler/_api_params_handler_base.py +1 -0
- massgen/api_params_handler/_chat_completions_api_params_handler.py +15 -2
- massgen/api_params_handler/_claude_api_params_handler.py +8 -1
- massgen/api_params_handler/_gemini_api_params_handler.py +73 -0
- massgen/api_params_handler/_response_api_params_handler.py +8 -1
- massgen/backend/base.py +83 -0
- massgen/backend/{base_with_mcp.py → base_with_custom_tool_and_mcp.py} +286 -15
- massgen/backend/capabilities.py +6 -6
- massgen/backend/chat_completions.py +200 -103
- massgen/backend/claude.py +115 -18
- massgen/backend/claude_code.py +378 -14
- massgen/backend/docs/CLAUDE_API_RESEARCH.md +3 -3
- massgen/backend/gemini.py +1333 -1629
- massgen/backend/gemini_mcp_manager.py +545 -0
- massgen/backend/gemini_trackers.py +344 -0
- massgen/backend/gemini_utils.py +43 -0
- massgen/backend/grok.py +39 -6
- massgen/backend/response.py +147 -81
- massgen/cli.py +605 -110
- massgen/config_builder.py +376 -27
- massgen/configs/README.md +123 -80
- massgen/configs/basic/multi/three_agents_default.yaml +3 -3
- massgen/configs/basic/single/single_agent.yaml +1 -1
- massgen/configs/providers/openai/gpt5_nano.yaml +3 -3
- massgen/configs/tools/custom_tools/claude_code_custom_tool_example.yaml +32 -0
- massgen/configs/tools/custom_tools/claude_code_custom_tool_example_no_path.yaml +28 -0
- massgen/configs/tools/custom_tools/claude_code_custom_tool_with_mcp_example.yaml +40 -0
- massgen/configs/tools/custom_tools/claude_code_custom_tool_with_wrong_mcp_example.yaml +38 -0
- massgen/configs/tools/custom_tools/claude_code_wrong_custom_tool_with_mcp_example.yaml +38 -0
- massgen/configs/tools/custom_tools/claude_custom_tool_example.yaml +24 -0
- massgen/configs/tools/custom_tools/claude_custom_tool_example_no_path.yaml +22 -0
- massgen/configs/tools/custom_tools/claude_custom_tool_with_mcp_example.yaml +35 -0
- massgen/configs/tools/custom_tools/claude_custom_tool_with_wrong_mcp_example.yaml +33 -0
- massgen/configs/tools/custom_tools/claude_wrong_custom_tool_with_mcp_example.yaml +33 -0
- massgen/configs/tools/custom_tools/gemini_custom_tool_example.yaml +24 -0
- massgen/configs/tools/custom_tools/gemini_custom_tool_example_no_path.yaml +22 -0
- massgen/configs/tools/custom_tools/gemini_custom_tool_with_mcp_example.yaml +35 -0
- massgen/configs/tools/custom_tools/gemini_custom_tool_with_wrong_mcp_example.yaml +33 -0
- massgen/configs/tools/custom_tools/gemini_wrong_custom_tool_with_mcp_example.yaml +33 -0
- massgen/configs/tools/custom_tools/github_issue_market_analysis.yaml +94 -0
- massgen/configs/tools/custom_tools/gpt5_nano_custom_tool_example.yaml +24 -0
- massgen/configs/tools/custom_tools/gpt5_nano_custom_tool_example_no_path.yaml +22 -0
- massgen/configs/tools/custom_tools/gpt5_nano_custom_tool_with_mcp_example.yaml +35 -0
- massgen/configs/tools/custom_tools/gpt5_nano_custom_tool_with_wrong_mcp_example.yaml +33 -0
- massgen/configs/tools/custom_tools/gpt5_nano_wrong_custom_tool_with_mcp_example.yaml +33 -0
- massgen/configs/tools/custom_tools/gpt_oss_custom_tool_example.yaml +25 -0
- massgen/configs/tools/custom_tools/gpt_oss_custom_tool_example_no_path.yaml +23 -0
- massgen/configs/tools/custom_tools/gpt_oss_custom_tool_with_mcp_example.yaml +34 -0
- massgen/configs/tools/custom_tools/gpt_oss_custom_tool_with_wrong_mcp_example.yaml +34 -0
- massgen/configs/tools/custom_tools/gpt_oss_wrong_custom_tool_with_mcp_example.yaml +34 -0
- massgen/configs/tools/custom_tools/grok3_mini_custom_tool_example.yaml +24 -0
- massgen/configs/tools/custom_tools/grok3_mini_custom_tool_example_no_path.yaml +22 -0
- massgen/configs/tools/custom_tools/grok3_mini_custom_tool_with_mcp_example.yaml +35 -0
- massgen/configs/tools/custom_tools/grok3_mini_custom_tool_with_wrong_mcp_example.yaml +33 -0
- massgen/configs/tools/custom_tools/grok3_mini_wrong_custom_tool_with_mcp_example.yaml +33 -0
- massgen/configs/tools/custom_tools/qwen_api_custom_tool_example.yaml +25 -0
- massgen/configs/tools/custom_tools/qwen_api_custom_tool_example_no_path.yaml +23 -0
- massgen/configs/tools/custom_tools/qwen_api_custom_tool_with_mcp_example.yaml +36 -0
- massgen/configs/tools/custom_tools/qwen_api_custom_tool_with_wrong_mcp_example.yaml +34 -0
- massgen/configs/tools/custom_tools/qwen_api_wrong_custom_tool_with_mcp_example.yaml +34 -0
- massgen/configs/tools/custom_tools/qwen_local_custom_tool_example.yaml +24 -0
- massgen/configs/tools/custom_tools/qwen_local_custom_tool_example_no_path.yaml +22 -0
- massgen/configs/tools/custom_tools/qwen_local_custom_tool_with_mcp_example.yaml +35 -0
- massgen/configs/tools/custom_tools/qwen_local_custom_tool_with_wrong_mcp_example.yaml +33 -0
- massgen/configs/tools/custom_tools/qwen_local_wrong_custom_tool_with_mcp_example.yaml +33 -0
- massgen/configs/tools/filesystem/claude_code_context_sharing.yaml +1 -1
- massgen/configs/tools/planning/five_agents_discord_mcp_planning_mode.yaml +7 -29
- massgen/configs/tools/planning/five_agents_filesystem_mcp_planning_mode.yaml +5 -6
- massgen/configs/tools/planning/five_agents_notion_mcp_planning_mode.yaml +4 -4
- massgen/configs/tools/planning/five_agents_twitter_mcp_planning_mode.yaml +4 -4
- massgen/configs/tools/planning/gpt5_mini_case_study_mcp_planning_mode.yaml +2 -2
- massgen/configs/voting/gemini_gpt_voting_sensitivity.yaml +67 -0
- massgen/formatter/_chat_completions_formatter.py +104 -0
- massgen/formatter/_claude_formatter.py +120 -0
- massgen/formatter/_gemini_formatter.py +448 -0
- massgen/formatter/_response_formatter.py +88 -0
- massgen/frontend/coordination_ui.py +4 -2
- massgen/logger_config.py +35 -3
- massgen/message_templates.py +56 -6
- massgen/orchestrator.py +512 -16
- massgen/stream_chunk/base.py +3 -0
- massgen/tests/custom_tools_example.py +392 -0
- massgen/tests/mcp_test_server.py +17 -7
- massgen/tests/test_config_builder.py +423 -0
- massgen/tests/test_custom_tools.py +401 -0
- massgen/tests/test_intelligent_planning_mode.py +643 -0
- massgen/tests/test_tools.py +127 -0
- massgen/token_manager/token_manager.py +13 -4
- massgen/tool/README.md +935 -0
- massgen/tool/__init__.py +39 -0
- massgen/tool/_async_helpers.py +70 -0
- massgen/tool/_basic/__init__.py +8 -0
- massgen/tool/_basic/_two_num_tool.py +24 -0
- massgen/tool/_code_executors/__init__.py +10 -0
- massgen/tool/_code_executors/_python_executor.py +74 -0
- massgen/tool/_code_executors/_shell_executor.py +61 -0
- massgen/tool/_exceptions.py +39 -0
- massgen/tool/_file_handlers/__init__.py +10 -0
- massgen/tool/_file_handlers/_file_operations.py +218 -0
- massgen/tool/_manager.py +634 -0
- massgen/tool/_registered_tool.py +88 -0
- massgen/tool/_result.py +66 -0
- massgen/tool/_self_evolution/_github_issue_analyzer.py +369 -0
- massgen/tool/docs/builtin_tools.md +681 -0
- massgen/tool/docs/exceptions.md +794 -0
- massgen/tool/docs/execution_results.md +691 -0
- massgen/tool/docs/manager.md +887 -0
- massgen/tool/docs/workflow_toolkits.md +529 -0
- massgen/tool/workflow_toolkits/__init__.py +57 -0
- massgen/tool/workflow_toolkits/base.py +55 -0
- massgen/tool/workflow_toolkits/new_answer.py +126 -0
- massgen/tool/workflow_toolkits/vote.py +167 -0
- {massgen-0.1.0a3.dist-info → massgen-0.1.2.dist-info}/METADATA +87 -129
- {massgen-0.1.0a3.dist-info → massgen-0.1.2.dist-info}/RECORD +120 -44
- {massgen-0.1.0a3.dist-info → massgen-0.1.2.dist-info}/WHEEL +0 -0
- {massgen-0.1.0a3.dist-info → massgen-0.1.2.dist-info}/entry_points.txt +0 -0
- {massgen-0.1.0a3.dist-info → massgen-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {massgen-0.1.0a3.dist-info → massgen-0.1.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,643 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Test intelligent planning mode that analyzes questions for irreversibility.
|
|
4
|
+
|
|
5
|
+
This test verifies that the orchestrator can:
|
|
6
|
+
1. Analyze user questions to determine if they involve irreversible MCP operations
|
|
7
|
+
2. Automatically enable planning mode for irreversible operations (e.g., send Discord message)
|
|
8
|
+
3. Automatically disable planning mode for reversible operations (e.g., read Discord messages)
|
|
9
|
+
4. All analysis happens silently - users don't see the internal analysis
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import sys
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from unittest.mock import AsyncMock, MagicMock, patch
|
|
15
|
+
|
|
16
|
+
import pytest
|
|
17
|
+
|
|
18
|
+
from massgen.agent_config import AgentConfig
|
|
19
|
+
from massgen.backend.base import StreamChunk
|
|
20
|
+
from massgen.backend.response import ResponseBackend
|
|
21
|
+
from massgen.chat_agent import ConfigurableAgent
|
|
22
|
+
from massgen.orchestrator import Orchestrator
|
|
23
|
+
|
|
24
|
+
# Add parent directory to path for imports
|
|
25
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@pytest.fixture
|
|
29
|
+
def mock_backend():
|
|
30
|
+
"""Create a mock backend with planning mode support."""
|
|
31
|
+
backend = MagicMock(spec=ResponseBackend)
|
|
32
|
+
backend.set_planning_mode = MagicMock()
|
|
33
|
+
backend.is_planning_mode_enabled = MagicMock(return_value=False)
|
|
34
|
+
backend.stream_with_tools = AsyncMock()
|
|
35
|
+
backend.filesystem_manager = None
|
|
36
|
+
return backend
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@pytest.fixture
|
|
40
|
+
def orchestrator_with_agents(mock_backend):
|
|
41
|
+
"""Create an orchestrator with mock agents."""
|
|
42
|
+
from massgen.agent_config import CoordinationConfig
|
|
43
|
+
|
|
44
|
+
# Create agent configs
|
|
45
|
+
config1 = AgentConfig.create_openai_config(model="gpt-4")
|
|
46
|
+
config2 = AgentConfig.create_openai_config(model="gpt-4")
|
|
47
|
+
|
|
48
|
+
# Create agents with mock backends
|
|
49
|
+
agent1 = ConfigurableAgent(config=config1, backend=mock_backend)
|
|
50
|
+
agent2 = ConfigurableAgent(config=config2, backend=mock_backend)
|
|
51
|
+
|
|
52
|
+
agents = {
|
|
53
|
+
"agent1": agent1,
|
|
54
|
+
"agent2": agent2,
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
# Create orchestrator with planning mode enabled in coordination config
|
|
58
|
+
orchestrator_config = AgentConfig.create_openai_config()
|
|
59
|
+
orchestrator_config.coordination_config = CoordinationConfig()
|
|
60
|
+
orchestrator_config.coordination_config.enable_planning_mode = True
|
|
61
|
+
|
|
62
|
+
orchestrator = Orchestrator(
|
|
63
|
+
agents=agents,
|
|
64
|
+
orchestrator_id="test_orchestrator",
|
|
65
|
+
config=orchestrator_config,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
return orchestrator, mock_backend
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@pytest.mark.asyncio
|
|
72
|
+
async def test_irreversible_operation_enables_planning_mode(orchestrator_with_agents):
|
|
73
|
+
"""Test that irreversible operations (like sending Discord messages) enable planning mode."""
|
|
74
|
+
orchestrator, mock_backend = orchestrator_with_agents
|
|
75
|
+
|
|
76
|
+
# Mock the analysis to return YES (irreversible) in the new format
|
|
77
|
+
async def mock_analysis_stream(*args, **kwargs):
|
|
78
|
+
yield StreamChunk(type="content", content="IRREVERSIBLE: YES\nBLOCKED_TOOLS: mcp__discord__discord_send")
|
|
79
|
+
|
|
80
|
+
mock_backend.stream_with_tools = mock_analysis_stream
|
|
81
|
+
|
|
82
|
+
# Test with a question about sending a Discord message
|
|
83
|
+
user_question = "Send a message to the #general channel saying 'Hello everyone!'"
|
|
84
|
+
conversation_context = {
|
|
85
|
+
"current_message": user_question,
|
|
86
|
+
"conversation_history": [],
|
|
87
|
+
"full_messages": [{"role": "user", "content": user_question}],
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
# Run the analysis
|
|
91
|
+
result = await orchestrator._analyze_question_irreversibility(
|
|
92
|
+
user_question,
|
|
93
|
+
conversation_context,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Verify that it detected irreversible operation
|
|
97
|
+
assert result["has_irreversible"] is True, "Should detect sending Discord message as irreversible"
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@pytest.mark.asyncio
|
|
101
|
+
async def test_reversible_operation_disables_planning_mode(orchestrator_with_agents):
|
|
102
|
+
"""Test that reversible operations (like reading Discord messages) disable planning mode."""
|
|
103
|
+
orchestrator, mock_backend = orchestrator_with_agents
|
|
104
|
+
|
|
105
|
+
# Mock the analysis to return NO (reversible) in the new format
|
|
106
|
+
async def mock_analysis_stream(*args, **kwargs):
|
|
107
|
+
yield StreamChunk(type="content", content="IRREVERSIBLE: NO\nBLOCKED_TOOLS: ")
|
|
108
|
+
|
|
109
|
+
mock_backend.stream_with_tools = mock_analysis_stream
|
|
110
|
+
|
|
111
|
+
# Test with a question about reading Discord messages
|
|
112
|
+
user_question = "Show me the last 10 messages from the #general channel"
|
|
113
|
+
conversation_context = {
|
|
114
|
+
"current_message": user_question,
|
|
115
|
+
"conversation_history": [],
|
|
116
|
+
"full_messages": [{"role": "user", "content": user_question}],
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
# Run the analysis
|
|
120
|
+
result = await orchestrator._analyze_question_irreversibility(
|
|
121
|
+
user_question,
|
|
122
|
+
conversation_context,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# Verify that it detected reversible operation
|
|
126
|
+
assert result["has_irreversible"] is False, "Should detect reading Discord messages as reversible"
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@pytest.mark.asyncio
|
|
130
|
+
async def test_planning_mode_set_on_all_agents(orchestrator_with_agents):
|
|
131
|
+
"""Test that planning mode is set on all agents during chat."""
|
|
132
|
+
orchestrator, mock_backend = orchestrator_with_agents
|
|
133
|
+
|
|
134
|
+
# Mock the analysis to return YES (irreversible) in the new format
|
|
135
|
+
async def mock_analysis_stream(*args, **kwargs):
|
|
136
|
+
yield StreamChunk(type="content", content="IRREVERSIBLE: YES\nBLOCKED_TOOLS: mcp__filesystem__delete_file")
|
|
137
|
+
|
|
138
|
+
mock_backend.stream_with_tools = mock_analysis_stream
|
|
139
|
+
|
|
140
|
+
# Add the set_planning_mode_blocked_tools method to mock
|
|
141
|
+
mock_backend.set_planning_mode_blocked_tools = MagicMock()
|
|
142
|
+
|
|
143
|
+
# Mock the coordination to avoid full execution
|
|
144
|
+
async def mock_coordinate(*args, **kwargs):
|
|
145
|
+
yield StreamChunk(type="content", content="Coordinated response")
|
|
146
|
+
yield StreamChunk(type="done")
|
|
147
|
+
|
|
148
|
+
with patch.object(orchestrator, "_coordinate_agents_with_timeout", mock_coordinate):
|
|
149
|
+
# Simulate a chat interaction
|
|
150
|
+
user_question = "Delete all files in the temp directory"
|
|
151
|
+
messages = [{"role": "user", "content": user_question}]
|
|
152
|
+
|
|
153
|
+
# Collect chunks
|
|
154
|
+
chunks = []
|
|
155
|
+
async for chunk in orchestrator.chat(messages):
|
|
156
|
+
chunks.append(chunk)
|
|
157
|
+
|
|
158
|
+
# Verify that set_planning_mode was called on the backend
|
|
159
|
+
# It should be called twice (once for each agent)
|
|
160
|
+
assert mock_backend.set_planning_mode.call_count == 2
|
|
161
|
+
# Verify it was called with True (planning mode enabled)
|
|
162
|
+
mock_backend.set_planning_mode.assert_called_with(True)
|
|
163
|
+
|
|
164
|
+
# Verify set_planning_mode_blocked_tools was also called
|
|
165
|
+
assert mock_backend.set_planning_mode_blocked_tools.call_count == 2
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
@pytest.mark.asyncio
|
|
169
|
+
async def test_error_defaults_to_safe_mode(orchestrator_with_agents):
|
|
170
|
+
"""Test that errors during analysis default to safe mode (planning enabled)."""
|
|
171
|
+
orchestrator, mock_backend = orchestrator_with_agents
|
|
172
|
+
|
|
173
|
+
# Mock the analysis to raise an error
|
|
174
|
+
async def mock_analysis_error(*args, **kwargs):
|
|
175
|
+
raise Exception("Analysis failed")
|
|
176
|
+
|
|
177
|
+
mock_backend.stream_with_tools = mock_analysis_error
|
|
178
|
+
|
|
179
|
+
# Test with any question
|
|
180
|
+
user_question = "Test question"
|
|
181
|
+
conversation_context = {
|
|
182
|
+
"current_message": user_question,
|
|
183
|
+
"conversation_history": [],
|
|
184
|
+
"full_messages": [{"role": "user", "content": user_question}],
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
# Run the analysis
|
|
188
|
+
result = await orchestrator._analyze_question_irreversibility(
|
|
189
|
+
user_question,
|
|
190
|
+
conversation_context,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Verify that it defaulted to safe mode (True = planning enabled)
|
|
194
|
+
assert result["has_irreversible"] is True, "Should default to planning mode on error"
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
@pytest.mark.asyncio
|
|
198
|
+
async def test_analysis_uses_random_agent():
|
|
199
|
+
"""Test that the analysis randomly selects an available agent."""
|
|
200
|
+
# Create multiple agents with different IDs
|
|
201
|
+
agent_ids = ["agent1", "agent2", "agent3"]
|
|
202
|
+
agents = {}
|
|
203
|
+
|
|
204
|
+
for agent_id in agent_ids:
|
|
205
|
+
backend = MagicMock(spec=ResponseBackend)
|
|
206
|
+
backend.set_planning_mode = MagicMock()
|
|
207
|
+
backend.filesystem_manager = None
|
|
208
|
+
|
|
209
|
+
# Mock stream to return NO
|
|
210
|
+
async def mock_stream(*args, **kwargs):
|
|
211
|
+
yield StreamChunk(type="content", content="NO")
|
|
212
|
+
|
|
213
|
+
backend.stream_with_tools = mock_stream
|
|
214
|
+
|
|
215
|
+
config = AgentConfig.create_openai_config()
|
|
216
|
+
agent = ConfigurableAgent(config=config, backend=backend)
|
|
217
|
+
agents[agent_id] = agent
|
|
218
|
+
|
|
219
|
+
orchestrator_config = AgentConfig.create_openai_config()
|
|
220
|
+
orchestrator = Orchestrator(
|
|
221
|
+
agents=agents,
|
|
222
|
+
orchestrator_id="test_orchestrator",
|
|
223
|
+
config=orchestrator_config,
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# Run analysis multiple times to verify random selection
|
|
227
|
+
user_question = "Test question"
|
|
228
|
+
conversation_context = {
|
|
229
|
+
"current_message": user_question,
|
|
230
|
+
"conversation_history": [],
|
|
231
|
+
"full_messages": [{"role": "user", "content": user_question}],
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
# Run analysis once
|
|
235
|
+
result = await orchestrator._analyze_question_irreversibility(
|
|
236
|
+
user_question,
|
|
237
|
+
conversation_context,
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
# Just verify it completes without error
|
|
241
|
+
# (Random selection is hard to test deterministically)
|
|
242
|
+
assert result["has_irreversible"] is False, "Should return False for NO response"
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
@pytest.mark.asyncio
|
|
246
|
+
async def test_mixed_responses_parsed_correctly(orchestrator_with_agents):
|
|
247
|
+
"""Test that YES/NO responses are parsed correctly even with extra text."""
|
|
248
|
+
orchestrator, mock_backend = orchestrator_with_agents
|
|
249
|
+
|
|
250
|
+
# Test with YES in mixed text
|
|
251
|
+
async def mock_stream_yes(*args, **kwargs):
|
|
252
|
+
yield StreamChunk(type="content", content="IRREVERSIBLE: YES\nBLOCKED_TOOLS: mcp__discord__discord_send")
|
|
253
|
+
|
|
254
|
+
mock_backend.stream_with_tools = mock_stream_yes
|
|
255
|
+
|
|
256
|
+
user_question = "Test question"
|
|
257
|
+
conversation_context = {
|
|
258
|
+
"current_message": user_question,
|
|
259
|
+
"conversation_history": [],
|
|
260
|
+
"full_messages": [{"role": "user", "content": user_question}],
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
result = await orchestrator._analyze_question_irreversibility(user_question, conversation_context)
|
|
264
|
+
assert result["has_irreversible"] is True, "Should parse YES from formatted response"
|
|
265
|
+
assert "mcp__discord__discord_send" in result["blocked_tools"], "Should extract blocked tools"
|
|
266
|
+
|
|
267
|
+
# Test with NO in mixed text
|
|
268
|
+
async def mock_stream_no(*args, **kwargs):
|
|
269
|
+
yield StreamChunk(type="content", content="IRREVERSIBLE: NO\nBLOCKED_TOOLS: ")
|
|
270
|
+
|
|
271
|
+
mock_backend.stream_with_tools = mock_stream_no
|
|
272
|
+
|
|
273
|
+
result = await orchestrator._analyze_question_irreversibility(user_question, conversation_context)
|
|
274
|
+
assert result["has_irreversible"] is False, "Should parse NO from formatted response"
|
|
275
|
+
assert len(result["blocked_tools"]) == 0, "Should have empty blocked tools for reversible operations"
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
@pytest.mark.asyncio
|
|
279
|
+
async def test_selective_blocking_multiple_tools(orchestrator_with_agents):
|
|
280
|
+
"""Test that multiple tools can be blocked selectively."""
|
|
281
|
+
orchestrator, mock_backend = orchestrator_with_agents
|
|
282
|
+
|
|
283
|
+
# Mock the analysis to return multiple blocked tools
|
|
284
|
+
async def mock_stream(*args, **kwargs):
|
|
285
|
+
yield StreamChunk(
|
|
286
|
+
type="content",
|
|
287
|
+
content="IRREVERSIBLE: YES\nBLOCKED_TOOLS: mcp__discord__discord_send, mcp__twitter__post_tweet, mcp__filesystem__delete_file",
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
mock_backend.stream_with_tools = mock_stream
|
|
291
|
+
|
|
292
|
+
user_question = "Send a Discord message, post a tweet, and delete a file"
|
|
293
|
+
conversation_context = {
|
|
294
|
+
"current_message": user_question,
|
|
295
|
+
"conversation_history": [],
|
|
296
|
+
"full_messages": [{"role": "user", "content": user_question}],
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
result = await orchestrator._analyze_question_irreversibility(user_question, conversation_context)
|
|
300
|
+
|
|
301
|
+
assert result["has_irreversible"] is True, "Should detect irreversible operations"
|
|
302
|
+
assert len(result["blocked_tools"]) == 3, "Should identify 3 blocked tools"
|
|
303
|
+
assert "mcp__discord__discord_send" in result["blocked_tools"]
|
|
304
|
+
assert "mcp__twitter__post_tweet" in result["blocked_tools"]
|
|
305
|
+
assert "mcp__filesystem__delete_file" in result["blocked_tools"]
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
@pytest.mark.asyncio
|
|
309
|
+
async def test_selective_blocking_with_whitespace(orchestrator_with_agents):
|
|
310
|
+
"""Test that tool names are parsed correctly even with extra whitespace."""
|
|
311
|
+
orchestrator, mock_backend = orchestrator_with_agents
|
|
312
|
+
|
|
313
|
+
# Mock the analysis with various whitespace patterns
|
|
314
|
+
async def mock_stream(*args, **kwargs):
|
|
315
|
+
yield StreamChunk(
|
|
316
|
+
type="content",
|
|
317
|
+
content="IRREVERSIBLE: YES\nBLOCKED_TOOLS: mcp__discord__discord_send , mcp__twitter__post_tweet ",
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
mock_backend.stream_with_tools = mock_stream
|
|
321
|
+
|
|
322
|
+
user_question = "Test question"
|
|
323
|
+
conversation_context = {
|
|
324
|
+
"current_message": user_question,
|
|
325
|
+
"conversation_history": [],
|
|
326
|
+
"full_messages": [{"role": "user", "content": user_question}],
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
result = await orchestrator._analyze_question_irreversibility(user_question, conversation_context)
|
|
330
|
+
|
|
331
|
+
assert result["has_irreversible"] is True
|
|
332
|
+
assert len(result["blocked_tools"]) == 2, "Should parse tools correctly despite whitespace"
|
|
333
|
+
assert "mcp__discord__discord_send" in result["blocked_tools"]
|
|
334
|
+
assert "mcp__twitter__post_tweet" in result["blocked_tools"]
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
@pytest.mark.asyncio
|
|
338
|
+
async def test_isolated_workspace_detection():
|
|
339
|
+
"""Test that isolated workspaces are detected correctly."""
|
|
340
|
+
|
|
341
|
+
# Create mock filesystem managers with isolated workspaces
|
|
342
|
+
class MockFilesystemManager:
|
|
343
|
+
def __init__(self, cwd):
|
|
344
|
+
self.cwd = cwd
|
|
345
|
+
|
|
346
|
+
def setup_orchestration_paths(self, **kwargs):
|
|
347
|
+
"""Mock method to avoid initialization errors."""
|
|
348
|
+
|
|
349
|
+
def update_backend_mcp_config(self, config):
|
|
350
|
+
"""Mock method to avoid initialization errors."""
|
|
351
|
+
|
|
352
|
+
class MockBackendWithWorkspace:
|
|
353
|
+
def __init__(self, cwd):
|
|
354
|
+
self.filesystem_manager = MockFilesystemManager(cwd)
|
|
355
|
+
self.set_planning_mode = MagicMock()
|
|
356
|
+
self.set_planning_mode_blocked_tools = MagicMock()
|
|
357
|
+
self.config = {} # Add config attribute
|
|
358
|
+
|
|
359
|
+
async def mock_stream(*args, **kwargs):
|
|
360
|
+
yield StreamChunk(type="content", content="IRREVERSIBLE: NO\nBLOCKED_TOOLS: ")
|
|
361
|
+
|
|
362
|
+
self.stream_with_tools = mock_stream
|
|
363
|
+
|
|
364
|
+
# Create agents with isolated workspaces
|
|
365
|
+
backend1 = MockBackendWithWorkspace("/tmp/massgen_workspace_agent1")
|
|
366
|
+
backend2 = MockBackendWithWorkspace("/tmp/workspace_agent2")
|
|
367
|
+
|
|
368
|
+
config1 = AgentConfig.create_openai_config()
|
|
369
|
+
config2 = AgentConfig.create_openai_config()
|
|
370
|
+
|
|
371
|
+
agent1 = ConfigurableAgent(config=config1, backend=backend1)
|
|
372
|
+
agent2 = ConfigurableAgent(config=config2, backend=backend2)
|
|
373
|
+
|
|
374
|
+
agents = {"agent1": agent1, "agent2": agent2}
|
|
375
|
+
|
|
376
|
+
orchestrator_config = AgentConfig.create_openai_config()
|
|
377
|
+
orchestrator = Orchestrator(
|
|
378
|
+
agents=agents,
|
|
379
|
+
orchestrator_id="test_orchestrator",
|
|
380
|
+
config=orchestrator_config,
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
user_question = "Create a file and write some data"
|
|
384
|
+
conversation_context = {
|
|
385
|
+
"current_message": user_question,
|
|
386
|
+
"conversation_history": [],
|
|
387
|
+
"full_messages": [{"role": "user", "content": user_question}],
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
# Run analysis - should detect isolated workspaces
|
|
391
|
+
result = await orchestrator._analyze_question_irreversibility(user_question, conversation_context)
|
|
392
|
+
|
|
393
|
+
# The prompt will inform the LLM about isolated workspaces
|
|
394
|
+
# In this test, we're just verifying the detection logic runs without error
|
|
395
|
+
assert result is not None
|
|
396
|
+
assert "has_irreversible" in result
|
|
397
|
+
assert "blocked_tools" in result
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
@pytest.mark.asyncio
|
|
401
|
+
async def test_no_isolated_workspace_detection():
|
|
402
|
+
"""Test behavior when no isolated workspaces are present."""
|
|
403
|
+
|
|
404
|
+
# Create mock filesystem managers without isolated workspaces
|
|
405
|
+
class MockFilesystemManager:
|
|
406
|
+
def __init__(self, cwd):
|
|
407
|
+
self.cwd = cwd
|
|
408
|
+
|
|
409
|
+
def setup_orchestration_paths(self, **kwargs):
|
|
410
|
+
"""Mock method to avoid initialization errors."""
|
|
411
|
+
|
|
412
|
+
def update_backend_mcp_config(self, config):
|
|
413
|
+
"""Mock method to avoid initialization errors."""
|
|
414
|
+
|
|
415
|
+
class MockBackendNoWorkspace:
|
|
416
|
+
def __init__(self, cwd):
|
|
417
|
+
self.filesystem_manager = MockFilesystemManager(cwd)
|
|
418
|
+
self.set_planning_mode = MagicMock()
|
|
419
|
+
self.set_planning_mode_blocked_tools = MagicMock()
|
|
420
|
+
self.config = {} # Add config attribute
|
|
421
|
+
|
|
422
|
+
async def mock_stream(*args, **kwargs):
|
|
423
|
+
yield StreamChunk(type="content", content="IRREVERSIBLE: YES\nBLOCKED_TOOLS: mcp__filesystem__write_file")
|
|
424
|
+
|
|
425
|
+
self.stream_with_tools = mock_stream
|
|
426
|
+
|
|
427
|
+
# Create agents with regular directories (not workspaces)
|
|
428
|
+
backend1 = MockBackendNoWorkspace("/home/user/project")
|
|
429
|
+
backend2 = MockBackendNoWorkspace("/tmp/mydir")
|
|
430
|
+
|
|
431
|
+
config1 = AgentConfig.create_openai_config()
|
|
432
|
+
config2 = AgentConfig.create_openai_config()
|
|
433
|
+
|
|
434
|
+
agent1 = ConfigurableAgent(config=config1, backend=backend1)
|
|
435
|
+
agent2 = ConfigurableAgent(config=config2, backend=backend2)
|
|
436
|
+
|
|
437
|
+
agents = {"agent1": agent1, "agent2": agent2}
|
|
438
|
+
|
|
439
|
+
orchestrator_config = AgentConfig.create_openai_config()
|
|
440
|
+
orchestrator = Orchestrator(
|
|
441
|
+
agents=agents,
|
|
442
|
+
orchestrator_id="test_orchestrator",
|
|
443
|
+
config=orchestrator_config,
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
user_question = "Write a file to /tmp/test.txt"
|
|
447
|
+
conversation_context = {
|
|
448
|
+
"current_message": user_question,
|
|
449
|
+
"conversation_history": [],
|
|
450
|
+
"full_messages": [{"role": "user", "content": user_question}],
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
# Run analysis - should NOT detect isolated workspaces
|
|
454
|
+
result = await orchestrator._analyze_question_irreversibility(user_question, conversation_context)
|
|
455
|
+
|
|
456
|
+
assert result is not None
|
|
457
|
+
assert result["has_irreversible"] is True
|
|
458
|
+
assert "mcp__filesystem__write_file" in result["blocked_tools"]
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
@pytest.mark.asyncio
|
|
462
|
+
async def test_backend_selective_blocking_logic():
|
|
463
|
+
"""Test the backend's is_mcp_tool_blocked logic directly."""
|
|
464
|
+
from massgen.backend.base import LLMBackend
|
|
465
|
+
|
|
466
|
+
# Create a minimal mock backend
|
|
467
|
+
class TestBackend(LLMBackend):
|
|
468
|
+
def __init__(self):
|
|
469
|
+
# Skip full initialization
|
|
470
|
+
self._planning_mode_enabled = False
|
|
471
|
+
self._planning_mode_blocked_tools = set()
|
|
472
|
+
|
|
473
|
+
async def stream_with_tools(self, messages, tools, **kwargs):
|
|
474
|
+
pass
|
|
475
|
+
|
|
476
|
+
def get_provider_name(self):
|
|
477
|
+
return "test"
|
|
478
|
+
|
|
479
|
+
backend = TestBackend()
|
|
480
|
+
|
|
481
|
+
# Test 1: Planning mode disabled - all tools allowed
|
|
482
|
+
backend.set_planning_mode(False)
|
|
483
|
+
assert backend.is_mcp_tool_blocked("mcp__discord__discord_send") is False
|
|
484
|
+
assert backend.is_mcp_tool_blocked("any_tool") is False
|
|
485
|
+
|
|
486
|
+
# Test 2: Planning mode enabled with empty blocked set - block ALL
|
|
487
|
+
backend.set_planning_mode(True)
|
|
488
|
+
backend.set_planning_mode_blocked_tools(set())
|
|
489
|
+
assert backend.is_mcp_tool_blocked("mcp__discord__discord_send") is True
|
|
490
|
+
assert backend.is_mcp_tool_blocked("mcp__discord__discord_read") is True
|
|
491
|
+
assert backend.is_mcp_tool_blocked("any_tool") is True
|
|
492
|
+
|
|
493
|
+
# Test 3: Planning mode enabled with specific tools - selective blocking
|
|
494
|
+
backend.set_planning_mode(True)
|
|
495
|
+
backend.set_planning_mode_blocked_tools({"mcp__discord__discord_send", "mcp__twitter__post_tweet"})
|
|
496
|
+
|
|
497
|
+
assert backend.is_mcp_tool_blocked("mcp__discord__discord_send") is True
|
|
498
|
+
assert backend.is_mcp_tool_blocked("mcp__twitter__post_tweet") is True
|
|
499
|
+
assert backend.is_mcp_tool_blocked("mcp__discord__discord_read") is False
|
|
500
|
+
assert backend.is_mcp_tool_blocked("mcp__twitter__search_tweets") is False
|
|
501
|
+
|
|
502
|
+
# Test 4: Get blocked tools
|
|
503
|
+
blocked = backend.get_planning_mode_blocked_tools()
|
|
504
|
+
assert len(blocked) == 2
|
|
505
|
+
assert "mcp__discord__discord_send" in blocked
|
|
506
|
+
assert "mcp__twitter__post_tweet" in blocked
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
@pytest.mark.asyncio
|
|
510
|
+
async def test_chat_sets_blocked_tools_on_agents(orchestrator_with_agents):
|
|
511
|
+
"""Test that chat() method sets both planning mode and blocked tools on all agents."""
|
|
512
|
+
orchestrator, mock_backend = orchestrator_with_agents
|
|
513
|
+
|
|
514
|
+
# Add the set_planning_mode_blocked_tools method to mock
|
|
515
|
+
mock_backend.set_planning_mode_blocked_tools = MagicMock()
|
|
516
|
+
|
|
517
|
+
# Mock the analysis to return specific blocked tools
|
|
518
|
+
async def mock_analysis_stream(*args, **kwargs):
|
|
519
|
+
yield StreamChunk(
|
|
520
|
+
type="content",
|
|
521
|
+
content="IRREVERSIBLE: YES\nBLOCKED_TOOLS: mcp__discord__discord_send, mcp__twitter__post_tweet",
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
mock_backend.stream_with_tools = mock_analysis_stream
|
|
525
|
+
|
|
526
|
+
# Mock the coordination to avoid full execution
|
|
527
|
+
async def mock_coordinate(*args, **kwargs):
|
|
528
|
+
yield StreamChunk(type="content", content="Coordinated response")
|
|
529
|
+
yield StreamChunk(type="done")
|
|
530
|
+
|
|
531
|
+
with patch.object(orchestrator, "_coordinate_agents_with_timeout", mock_coordinate):
|
|
532
|
+
user_question = "Send a Discord message and post a tweet"
|
|
533
|
+
messages = [{"role": "user", "content": user_question}]
|
|
534
|
+
|
|
535
|
+
# Collect chunks
|
|
536
|
+
chunks = []
|
|
537
|
+
async for chunk in orchestrator.chat(messages):
|
|
538
|
+
chunks.append(chunk)
|
|
539
|
+
|
|
540
|
+
# Verify that set_planning_mode was called
|
|
541
|
+
assert mock_backend.set_planning_mode.call_count == 2
|
|
542
|
+
mock_backend.set_planning_mode.assert_called_with(True)
|
|
543
|
+
|
|
544
|
+
# Verify that set_planning_mode_blocked_tools was called
|
|
545
|
+
assert mock_backend.set_planning_mode_blocked_tools.call_count == 2
|
|
546
|
+
# Check that it was called with the correct tools
|
|
547
|
+
call_args = mock_backend.set_planning_mode_blocked_tools.call_args[0][0]
|
|
548
|
+
assert "mcp__discord__discord_send" in call_args
|
|
549
|
+
assert "mcp__twitter__post_tweet" in call_args
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
@pytest.mark.asyncio
|
|
553
|
+
async def test_empty_blocked_tools_list(orchestrator_with_agents):
|
|
554
|
+
"""Test handling of empty BLOCKED_TOOLS list (no specific tools to block)."""
|
|
555
|
+
orchestrator, mock_backend = orchestrator_with_agents
|
|
556
|
+
|
|
557
|
+
# Mock the analysis to return YES but with empty blocked tools
|
|
558
|
+
async def mock_stream(*args, **kwargs):
|
|
559
|
+
yield StreamChunk(type="content", content="IRREVERSIBLE: YES\nBLOCKED_TOOLS: ")
|
|
560
|
+
|
|
561
|
+
mock_backend.stream_with_tools = mock_stream
|
|
562
|
+
|
|
563
|
+
user_question = "Do something risky"
|
|
564
|
+
conversation_context = {
|
|
565
|
+
"current_message": user_question,
|
|
566
|
+
"conversation_history": [],
|
|
567
|
+
"full_messages": [{"role": "user", "content": user_question}],
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
result = await orchestrator._analyze_question_irreversibility(user_question, conversation_context)
|
|
571
|
+
|
|
572
|
+
assert result["has_irreversible"] is True
|
|
573
|
+
assert len(result["blocked_tools"]) == 0, "Should have empty blocked tools set"
|
|
574
|
+
# Empty set means block ALL MCP tools (backward compatible)
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
@pytest.mark.asyncio
|
|
578
|
+
async def test_case_insensitive_workspace_detection():
|
|
579
|
+
"""Test that workspace detection is case-insensitive."""
|
|
580
|
+
|
|
581
|
+
class MockFilesystemManager:
|
|
582
|
+
def __init__(self, cwd):
|
|
583
|
+
self.cwd = cwd
|
|
584
|
+
|
|
585
|
+
def setup_orchestration_paths(self, **kwargs):
|
|
586
|
+
"""Mock method to avoid initialization errors."""
|
|
587
|
+
|
|
588
|
+
def update_backend_mcp_config(self, config):
|
|
589
|
+
"""Mock method to avoid initialization errors."""
|
|
590
|
+
|
|
591
|
+
class MockBackendCaseTest:
|
|
592
|
+
def __init__(self, cwd):
|
|
593
|
+
self.filesystem_manager = MockFilesystemManager(cwd)
|
|
594
|
+
self.set_planning_mode = MagicMock()
|
|
595
|
+
self.set_planning_mode_blocked_tools = MagicMock()
|
|
596
|
+
self.config = {} # Add config attribute
|
|
597
|
+
|
|
598
|
+
async def mock_stream(*args, **kwargs):
|
|
599
|
+
yield StreamChunk(type="content", content="IRREVERSIBLE: NO\nBLOCKED_TOOLS: ")
|
|
600
|
+
|
|
601
|
+
self.stream_with_tools = mock_stream
|
|
602
|
+
|
|
603
|
+
# Test with various case patterns
|
|
604
|
+
test_cases = [
|
|
605
|
+
"/tmp/WORKSPACE_agent1",
|
|
606
|
+
"/tmp/WorkSpace_agent2",
|
|
607
|
+
"/tmp/workspace_AGENT3",
|
|
608
|
+
"/tmp/WoRkSpAcE_agent4",
|
|
609
|
+
]
|
|
610
|
+
|
|
611
|
+
agents = {}
|
|
612
|
+
for i, cwd in enumerate(test_cases):
|
|
613
|
+
backend = MockBackendCaseTest(cwd)
|
|
614
|
+
config = AgentConfig.create_openai_config()
|
|
615
|
+
agent = ConfigurableAgent(config=config, backend=backend)
|
|
616
|
+
agents[f"agent{i+1}"] = agent
|
|
617
|
+
|
|
618
|
+
orchestrator_config = AgentConfig.create_openai_config()
|
|
619
|
+
orchestrator = Orchestrator(
|
|
620
|
+
agents=agents,
|
|
621
|
+
orchestrator_id="test_orchestrator",
|
|
622
|
+
config=orchestrator_config,
|
|
623
|
+
)
|
|
624
|
+
|
|
625
|
+
user_question = "Create some files"
|
|
626
|
+
conversation_context = {
|
|
627
|
+
"current_message": user_question,
|
|
628
|
+
"conversation_history": [],
|
|
629
|
+
"full_messages": [{"role": "user", "content": user_question}],
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
# Run analysis - should detect all workspaces regardless of case
|
|
633
|
+
result = await orchestrator._analyze_question_irreversibility(user_question, conversation_context)
|
|
634
|
+
|
|
635
|
+
# Verify the analysis completes successfully
|
|
636
|
+
assert result is not None
|
|
637
|
+
assert "has_irreversible" in result
|
|
638
|
+
assert "blocked_tools" in result
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
if __name__ == "__main__":
|
|
642
|
+
# Run tests
|
|
643
|
+
pytest.main([__file__, "-v"])
|