massgen 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- massgen/__init__.py +1 -1
- massgen/agent_config.py +33 -7
- massgen/api_params_handler/_api_params_handler_base.py +3 -0
- massgen/api_params_handler/_chat_completions_api_params_handler.py +7 -1
- massgen/backend/azure_openai.py +9 -1
- massgen/backend/base.py +56 -0
- massgen/backend/base_with_custom_tool_and_mcp.py +4 -4
- massgen/backend/capabilities.py +6 -6
- massgen/backend/chat_completions.py +18 -11
- massgen/backend/claude_code.py +9 -1
- massgen/backend/gemini.py +71 -6
- massgen/backend/gemini_utils.py +30 -0
- massgen/backend/grok.py +39 -6
- massgen/backend/response.py +18 -11
- massgen/chat_agent.py +9 -3
- massgen/cli.py +319 -43
- massgen/config_builder.py +163 -18
- massgen/configs/README.md +78 -20
- massgen/configs/basic/multi/three_agents_default.yaml +2 -2
- massgen/configs/debug/restart_test_controlled.yaml +60 -0
- massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
- massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
- massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
- massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
- massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
- massgen/configs/tools/custom_tools/crawl4ai_mcp_example.yaml +67 -0
- massgen/configs/tools/custom_tools/crawl4ai_multi_agent_example.yaml +68 -0
- massgen/configs/tools/custom_tools/multimodal_tools/playwright_with_img_understanding.yaml +98 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_video_example.yaml +54 -0
- massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
- massgen/configs/tools/memory/README.md +199 -0
- massgen/configs/tools/memory/gpt5mini_gemini_context_window_management.yaml +131 -0
- massgen/configs/tools/memory/gpt5mini_gemini_no_persistent_memory.yaml +133 -0
- massgen/configs/tools/memory/test_context_window_management.py +286 -0
- massgen/configs/tools/multimodal/gpt5mini_gpt5nano_documentation_evolution.yaml +97 -0
- massgen/configs/tools/planning/five_agents_discord_mcp_planning_mode.yaml +7 -29
- massgen/configs/tools/planning/five_agents_filesystem_mcp_planning_mode.yaml +5 -6
- massgen/configs/tools/planning/five_agents_notion_mcp_planning_mode.yaml +4 -4
- massgen/configs/tools/planning/five_agents_twitter_mcp_planning_mode.yaml +4 -4
- massgen/configs/tools/planning/gpt5_mini_case_study_mcp_planning_mode.yaml +2 -2
- massgen/docker/README.md +83 -0
- massgen/filesystem_manager/_code_execution_server.py +22 -7
- massgen/filesystem_manager/_docker_manager.py +21 -1
- massgen/filesystem_manager/_filesystem_manager.py +8 -0
- massgen/filesystem_manager/_workspace_tools_server.py +0 -997
- massgen/formatter/_gemini_formatter.py +73 -0
- massgen/frontend/coordination_ui.py +175 -257
- massgen/frontend/displays/base_display.py +29 -0
- massgen/frontend/displays/rich_terminal_display.py +155 -9
- massgen/frontend/displays/simple_display.py +21 -0
- massgen/frontend/displays/terminal_display.py +22 -2
- massgen/logger_config.py +50 -6
- massgen/message_templates.py +123 -3
- massgen/orchestrator.py +652 -44
- massgen/tests/test_code_execution.py +178 -0
- massgen/tests/test_intelligent_planning_mode.py +643 -0
- massgen/tests/test_orchestration_restart.py +204 -0
- massgen/token_manager/token_manager.py +13 -4
- massgen/tool/__init__.py +4 -0
- massgen/tool/_multimodal_tools/understand_audio.py +193 -0
- massgen/tool/_multimodal_tools/understand_file.py +550 -0
- massgen/tool/_multimodal_tools/understand_image.py +212 -0
- massgen/tool/_multimodal_tools/understand_video.py +313 -0
- massgen/tool/docs/multimodal_tools.md +779 -0
- massgen/tool/workflow_toolkits/__init__.py +26 -0
- massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
- massgen/utils.py +1 -0
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/METADATA +57 -52
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/RECORD +77 -49
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/WHEEL +0 -0
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/entry_points.txt +0 -0
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/top_level.txt +0 -0
massgen/__init__.py
CHANGED
massgen/agent_config.py
CHANGED
|
@@ -35,12 +35,17 @@ class CoordinationConfig:
|
|
|
35
35
|
Only the winning agent executes actions during final presentation.
|
|
36
36
|
If False, agents execute actions during coordination (default behavior).
|
|
37
37
|
planning_mode_instruction: Custom instruction to add when planning mode is enabled.
|
|
38
|
+
max_orchestration_restarts: Maximum number of times orchestration can be restarted after
|
|
39
|
+
post-evaluation determines the answer is insufficient.
|
|
40
|
+
For example, max_orchestration_restarts=2 allows 3 total attempts
|
|
41
|
+
(initial + 2 restarts). Default is 0 (no restarts).
|
|
38
42
|
"""
|
|
39
43
|
|
|
40
44
|
enable_planning_mode: bool = False
|
|
41
45
|
planning_mode_instruction: str = (
|
|
42
46
|
"During coordination, describe what you would do without actually executing actions. Only provide concrete implementation details without calling external APIs or tools."
|
|
43
47
|
)
|
|
48
|
+
max_orchestration_restarts: int = 0
|
|
44
49
|
|
|
45
50
|
|
|
46
51
|
@dataclass
|
|
@@ -87,6 +92,9 @@ class AgentConfig:
|
|
|
87
92
|
# Debug/test mode - skip coordination rounds and go straight to final presentation
|
|
88
93
|
skip_coordination_rounds: bool = False
|
|
89
94
|
|
|
95
|
+
# Debug mode for restart feature - override final answer on attempt 1 only
|
|
96
|
+
debug_final_answer: Optional[str] = None
|
|
97
|
+
|
|
90
98
|
@property
|
|
91
99
|
def custom_system_instruction(self) -> Optional[str]:
|
|
92
100
|
"""
|
|
@@ -432,7 +440,8 @@ class AgentConfig:
|
|
|
432
440
|
import copy
|
|
433
441
|
|
|
434
442
|
new_config = copy.deepcopy(self)
|
|
435
|
-
|
|
443
|
+
# Set private attribute directly to avoid deprecation warning
|
|
444
|
+
new_config._custom_system_instruction = instruction
|
|
436
445
|
return new_config
|
|
437
446
|
|
|
438
447
|
def with_agent_id(self, agent_id: str) -> "AgentConfig":
|
|
@@ -538,7 +547,8 @@ class AgentConfig:
|
|
|
538
547
|
else:
|
|
539
548
|
raise ValueError(f"Domain expert configuration not available for backend: {backend}")
|
|
540
549
|
|
|
541
|
-
|
|
550
|
+
# Set private attribute directly to avoid deprecation warning
|
|
551
|
+
config._custom_system_instruction = instruction
|
|
542
552
|
return config
|
|
543
553
|
|
|
544
554
|
# =============================================================================
|
|
@@ -567,9 +577,10 @@ class AgentConfig:
|
|
|
567
577
|
conversation = templates.build_initial_conversation(task=task, agent_summaries=agent_summaries, valid_agent_ids=valid_agent_ids)
|
|
568
578
|
|
|
569
579
|
# Add custom system instruction if provided
|
|
570
|
-
|
|
580
|
+
# Access private attribute to avoid deprecation warning
|
|
581
|
+
if self._custom_system_instruction:
|
|
571
582
|
base_system = conversation["system_message"]
|
|
572
|
-
conversation["system_message"] = f"{self.
|
|
583
|
+
conversation["system_message"] = f"{self._custom_system_instruction}\n\n{base_system}"
|
|
573
584
|
|
|
574
585
|
# Add backend configuration
|
|
575
586
|
conversation.update(
|
|
@@ -703,7 +714,8 @@ class AgentConfig:
|
|
|
703
714
|
result = {
|
|
704
715
|
"backend_params": self.backend_params,
|
|
705
716
|
"agent_id": self.agent_id,
|
|
706
|
-
|
|
717
|
+
# Access private attribute to avoid deprecation warning
|
|
718
|
+
"custom_system_instruction": self._custom_system_instruction,
|
|
707
719
|
"voting_sensitivity": self.voting_sensitivity,
|
|
708
720
|
"max_new_answers_per_agent": self.max_new_answers_per_agent,
|
|
709
721
|
"answer_novelty_requirement": self.answer_novelty_requirement,
|
|
@@ -716,8 +728,12 @@ class AgentConfig:
|
|
|
716
728
|
result["coordination_config"] = {
|
|
717
729
|
"enable_planning_mode": self.coordination_config.enable_planning_mode,
|
|
718
730
|
"planning_mode_instruction": self.coordination_config.planning_mode_instruction,
|
|
731
|
+
"max_orchestration_restarts": self.coordination_config.max_orchestration_restarts,
|
|
719
732
|
}
|
|
720
733
|
|
|
734
|
+
# Handle debug fields
|
|
735
|
+
result["debug_final_answer"] = self.debug_final_answer
|
|
736
|
+
|
|
721
737
|
# Handle message_templates serialization
|
|
722
738
|
if self.message_templates is not None:
|
|
723
739
|
try:
|
|
@@ -757,6 +773,9 @@ class AgentConfig:
|
|
|
757
773
|
if coordination_data:
|
|
758
774
|
coordination_config = CoordinationConfig(**coordination_data)
|
|
759
775
|
|
|
776
|
+
# Handle debug fields
|
|
777
|
+
debug_final_answer = data.get("debug_final_answer")
|
|
778
|
+
|
|
760
779
|
# Handle message_templates
|
|
761
780
|
message_templates = None
|
|
762
781
|
template_data = data.get("message_templates")
|
|
@@ -765,17 +784,24 @@ class AgentConfig:
|
|
|
765
784
|
|
|
766
785
|
message_templates = MessageTemplates(**template_data)
|
|
767
786
|
|
|
768
|
-
|
|
787
|
+
config = cls(
|
|
769
788
|
backend_params=backend_params,
|
|
770
789
|
message_templates=message_templates,
|
|
771
790
|
agent_id=agent_id,
|
|
772
|
-
custom_system_instruction=custom_system_instruction,
|
|
773
791
|
voting_sensitivity=voting_sensitivity,
|
|
774
792
|
max_new_answers_per_agent=max_new_answers_per_agent,
|
|
775
793
|
answer_novelty_requirement=answer_novelty_requirement,
|
|
776
794
|
timeout_config=timeout_config,
|
|
777
795
|
coordination_config=coordination_config,
|
|
778
796
|
)
|
|
797
|
+
config.debug_final_answer = debug_final_answer
|
|
798
|
+
return config
|
|
799
|
+
|
|
800
|
+
# Set custom_system_instruction separately to avoid deprecation warning
|
|
801
|
+
if custom_system_instruction is not None:
|
|
802
|
+
config._custom_system_instruction = custom_system_instruction
|
|
803
|
+
|
|
804
|
+
return config
|
|
779
805
|
|
|
780
806
|
|
|
781
807
|
# =============================================================================
|
|
@@ -56,8 +56,10 @@ class APIParamsHandlerBase(ABC):
|
|
|
56
56
|
# Filesystem manager parameters (handled by base class)
|
|
57
57
|
"cwd",
|
|
58
58
|
"agent_temporary_workspace",
|
|
59
|
+
"agent_temporary_workspace_parent",
|
|
59
60
|
"context_paths",
|
|
60
61
|
"context_write_access_enabled",
|
|
62
|
+
"enforce_read_before_delete",
|
|
61
63
|
"enable_image_generation",
|
|
62
64
|
"enable_mcp_command_line",
|
|
63
65
|
"command_line_allowed_commands",
|
|
@@ -67,6 +69,7 @@ class APIParamsHandlerBase(ABC):
|
|
|
67
69
|
"command_line_docker_memory_limit",
|
|
68
70
|
"command_line_docker_cpu_limit",
|
|
69
71
|
"command_line_docker_network_mode",
|
|
72
|
+
"command_line_docker_enable_sudo",
|
|
70
73
|
# Backend identification (handled by orchestrator)
|
|
71
74
|
"enable_audio_generation", # Audio generation parameter
|
|
72
75
|
"type",
|
|
@@ -31,7 +31,13 @@ class ChatCompletionsAPIParamsHandler(APIParamsHandlerBase):
|
|
|
31
31
|
"""Get provider tools for Chat Completions format."""
|
|
32
32
|
provider_tools = []
|
|
33
33
|
|
|
34
|
-
if
|
|
34
|
+
# Check if this is Grok backend - Grok uses extra_body.search_parameters instead of function tools
|
|
35
|
+
backend_provider = getattr(self.backend, "get_provider_name", lambda: "")()
|
|
36
|
+
is_grok = backend_provider.lower() == "grok"
|
|
37
|
+
|
|
38
|
+
# Add web_search function tool for non-Grok backends
|
|
39
|
+
# Grok handles web search via extra_body.search_parameters (set in grok.py)
|
|
40
|
+
if all_params.get("enable_web_search", False) and not is_grok:
|
|
35
41
|
provider_tools.append(
|
|
36
42
|
{
|
|
37
43
|
"type": "function",
|
massgen/backend/azure_openai.py
CHANGED
|
@@ -94,7 +94,7 @@ class AzureOpenAIBackend(LLMBackend):
|
|
|
94
94
|
raise ValueError("Azure OpenAI requires a deployment name. Pass it as the 'model' parameter.")
|
|
95
95
|
|
|
96
96
|
# Check if workflow tools are present
|
|
97
|
-
workflow_tools = [t for t in tools if t.get("function", {}).get("name") in ["new_answer", "vote"]] if tools else []
|
|
97
|
+
workflow_tools = [t for t in tools if t.get("function", {}).get("name") in ["new_answer", "vote", "submit", "restart_orchestration"]] if tools else []
|
|
98
98
|
has_workflow_tools = len(workflow_tools) > 0
|
|
99
99
|
|
|
100
100
|
# Modify messages to include workflow tool instructions if needed
|
|
@@ -270,6 +270,14 @@ class AzureOpenAIBackend(LLMBackend):
|
|
|
270
270
|
system_parts.append(f' Usage: {{"tool_name": "vote", ' f'"arguments": {{"agent_id": "agent1", ' f'"reason": "explanation"}}}} // Choose agent_id from: {agent_list}')
|
|
271
271
|
else:
|
|
272
272
|
system_parts.append(' Usage: {"tool_name": "vote", ' '"arguments": {"agent_id": "agent1", ' '"reason": "explanation"}}')
|
|
273
|
+
elif name == "submit":
|
|
274
|
+
system_parts.append(
|
|
275
|
+
' Usage: {"tool_name": "submit", ' '"arguments": {"confirmed": true}}',
|
|
276
|
+
)
|
|
277
|
+
elif name == "restart_orchestration":
|
|
278
|
+
system_parts.append(
|
|
279
|
+
' Usage: {"tool_name": "restart_orchestration", ' '"arguments": {"reason": "The answer is incomplete because...", ' '"instructions": "In the next attempt, please..."}}',
|
|
280
|
+
)
|
|
273
281
|
|
|
274
282
|
system_parts.append("\n--- MassGen Workflow Instructions ---")
|
|
275
283
|
system_parts.append("IMPORTANT: You must respond with a structured JSON decision at the end of your response.")
|
massgen/backend/base.py
CHANGED
|
@@ -70,6 +70,11 @@ class LLMBackend(ABC):
|
|
|
70
70
|
# Planning mode flag - when True, MCP tools should be blocked during coordination
|
|
71
71
|
self._planning_mode_enabled: bool = False
|
|
72
72
|
|
|
73
|
+
# Selective tool blocking - list of specific MCP tools to block during planning mode
|
|
74
|
+
# When planning_mode is enabled, only these specific tools are blocked
|
|
75
|
+
# If empty, ALL MCP tools are blocked (backward compatible behavior)
|
|
76
|
+
self._planning_mode_blocked_tools: set = set()
|
|
77
|
+
|
|
73
78
|
self.token_calculator = TokenCostCalculator()
|
|
74
79
|
|
|
75
80
|
# Filesystem manager integration
|
|
@@ -107,6 +112,7 @@ class LLMBackend(ABC):
|
|
|
107
112
|
"command_line_docker_memory_limit": kwargs.get("command_line_docker_memory_limit"),
|
|
108
113
|
"command_line_docker_cpu_limit": kwargs.get("command_line_docker_cpu_limit"),
|
|
109
114
|
"command_line_docker_network_mode": network_mode,
|
|
115
|
+
"command_line_docker_enable_sudo": kwargs.get("command_line_docker_enable_sudo", False),
|
|
110
116
|
"enable_audio_generation": kwargs.get("enable_audio_generation", False),
|
|
111
117
|
}
|
|
112
118
|
|
|
@@ -183,8 +189,10 @@ class LLMBackend(ABC):
|
|
|
183
189
|
# Filesystem manager parameters (handled by base class)
|
|
184
190
|
"cwd",
|
|
185
191
|
"agent_temporary_workspace",
|
|
192
|
+
"agent_temporary_workspace_parent",
|
|
186
193
|
"context_paths",
|
|
187
194
|
"context_write_access_enabled",
|
|
195
|
+
"enforce_read_before_delete",
|
|
188
196
|
"enable_image_generation",
|
|
189
197
|
"enable_mcp_command_line",
|
|
190
198
|
"command_line_allowed_commands",
|
|
@@ -194,6 +202,7 @@ class LLMBackend(ABC):
|
|
|
194
202
|
"command_line_docker_memory_limit",
|
|
195
203
|
"command_line_docker_cpu_limit",
|
|
196
204
|
"command_line_docker_network_mode",
|
|
205
|
+
"command_line_docker_enable_sudo",
|
|
197
206
|
# Backend identification (handled by orchestrator)
|
|
198
207
|
"type",
|
|
199
208
|
"agent_id",
|
|
@@ -465,6 +474,53 @@ class LLMBackend(ABC):
|
|
|
465
474
|
"""
|
|
466
475
|
return self._planning_mode_enabled
|
|
467
476
|
|
|
477
|
+
def set_planning_mode_blocked_tools(self, tool_names: set) -> None:
|
|
478
|
+
"""
|
|
479
|
+
Set specific MCP tools to block during planning mode.
|
|
480
|
+
|
|
481
|
+
This enables selective tool blocking - only the specified tools will be blocked
|
|
482
|
+
when planning mode is enabled, allowing other MCP tools to be used.
|
|
483
|
+
|
|
484
|
+
Args:
|
|
485
|
+
tool_names: Set of MCP tool names to block (e.g., {'mcp__discord__discord_send'})
|
|
486
|
+
If empty set, ALL MCP tools are blocked (backward compatible)
|
|
487
|
+
"""
|
|
488
|
+
self._planning_mode_blocked_tools = set(tool_names)
|
|
489
|
+
|
|
490
|
+
def get_planning_mode_blocked_tools(self) -> set:
|
|
491
|
+
"""
|
|
492
|
+
Get the set of MCP tools currently blocked in planning mode.
|
|
493
|
+
|
|
494
|
+
Returns:
|
|
495
|
+
Set of blocked MCP tool names. Empty set means ALL MCP tools are blocked.
|
|
496
|
+
"""
|
|
497
|
+
return self._planning_mode_blocked_tools.copy()
|
|
498
|
+
|
|
499
|
+
def is_mcp_tool_blocked(self, tool_name: str) -> bool:
|
|
500
|
+
"""
|
|
501
|
+
Check if a specific MCP tool is blocked in planning mode.
|
|
502
|
+
|
|
503
|
+
Args:
|
|
504
|
+
tool_name: Name of the MCP tool to check (e.g., 'mcp__discord__discord_send')
|
|
505
|
+
|
|
506
|
+
Returns:
|
|
507
|
+
True if the tool should be blocked, False otherwise
|
|
508
|
+
|
|
509
|
+
Note:
|
|
510
|
+
- If planning mode is disabled, returns False (no blocking)
|
|
511
|
+
- If planning mode is enabled and blocked_tools is empty, returns True (block ALL)
|
|
512
|
+
- If planning mode is enabled and blocked_tools is set, returns True only if tool is in the set
|
|
513
|
+
"""
|
|
514
|
+
if not self._planning_mode_enabled:
|
|
515
|
+
return False
|
|
516
|
+
|
|
517
|
+
# Empty set means block ALL MCP tools (backward compatible behavior)
|
|
518
|
+
if not self._planning_mode_blocked_tools:
|
|
519
|
+
return True
|
|
520
|
+
|
|
521
|
+
# Otherwise, block only if tool is in the blocked set
|
|
522
|
+
return tool_name in self._planning_mode_blocked_tools
|
|
523
|
+
|
|
468
524
|
async def _cleanup_client(self, client: Any) -> None:
|
|
469
525
|
"""Clean up OpenAI client resources."""
|
|
470
526
|
try:
|
|
@@ -533,10 +533,10 @@ class CustomToolAndMCPBackend(LLMBackend):
|
|
|
533
533
|
max_retries: int = 3,
|
|
534
534
|
) -> Tuple[str, Any]:
|
|
535
535
|
"""Execute MCP function with exponential backoff retry logic."""
|
|
536
|
-
# Check if
|
|
537
|
-
if self.
|
|
538
|
-
logger.info(f"[MCP] Planning mode enabled - blocking MCP tool
|
|
539
|
-
error_str = "🚫 [MCP]
|
|
536
|
+
# Check if this specific MCP tool is blocked by planning mode
|
|
537
|
+
if self.is_mcp_tool_blocked(function_name):
|
|
538
|
+
logger.info(f"[MCP] Planning mode enabled - blocking MCP tool: {function_name}")
|
|
539
|
+
error_str = f"🚫 [MCP] Tool '{function_name}' blocked during coordination (planning mode active)"
|
|
540
540
|
return error_str, {"error": error_str, "blocked_by": "planning_mode", "function_name": function_name}
|
|
541
541
|
|
|
542
542
|
# Convert JSON string to dict for shared utility
|
massgen/backend/capabilities.py
CHANGED
|
@@ -137,13 +137,14 @@ BACKEND_CAPABILITIES: Dict[str, BackendCapabilities] = {
|
|
|
137
137
|
builtin_tools=["web_search", "code_execution"],
|
|
138
138
|
filesystem_support="mcp",
|
|
139
139
|
models=[
|
|
140
|
+
"claude-haiku-4-5-20251001",
|
|
140
141
|
"claude-sonnet-4-5-20250929",
|
|
142
|
+
"claude-opus-4-1-20250805",
|
|
141
143
|
"claude-sonnet-4-20250514",
|
|
142
|
-
"claude-opus-4-20250514",
|
|
143
144
|
"claude-3-5-sonnet-latest",
|
|
144
145
|
"claude-3-5-haiku-latest",
|
|
145
146
|
],
|
|
146
|
-
default_model="claude-sonnet-4-
|
|
147
|
+
default_model="claude-sonnet-4-5-20250929",
|
|
147
148
|
env_var="ANTHROPIC_API_KEY",
|
|
148
149
|
notes="Web search and code execution are built-in tools. Audio/video understanding support (v0.0.30+).",
|
|
149
150
|
),
|
|
@@ -175,8 +176,8 @@ BACKEND_CAPABILITIES: Dict[str, BackendCapabilities] = {
|
|
|
175
176
|
filesystem_support="native",
|
|
176
177
|
models=[
|
|
177
178
|
"claude-sonnet-4-5-20250929",
|
|
179
|
+
"claude-opus-4-1-20250805",
|
|
178
180
|
"claude-sonnet-4-20250514",
|
|
179
|
-
"claude-opus-4-20250514",
|
|
180
181
|
],
|
|
181
182
|
default_model="claude-sonnet-4-5-20250929",
|
|
182
183
|
env_var="ANTHROPIC_API_KEY",
|
|
@@ -218,12 +219,11 @@ BACKEND_CAPABILITIES: Dict[str, BackendCapabilities] = {
|
|
|
218
219
|
filesystem_support="mcp",
|
|
219
220
|
models=[
|
|
220
221
|
"grok-4",
|
|
222
|
+
"grok-4-fast",
|
|
221
223
|
"grok-3",
|
|
222
224
|
"grok-3-mini",
|
|
223
|
-
"grok-beta",
|
|
224
|
-
"grok-vision-beta",
|
|
225
225
|
],
|
|
226
|
-
default_model="grok-
|
|
226
|
+
default_model="grok-4",
|
|
227
227
|
env_var="XAI_API_KEY",
|
|
228
228
|
notes="Web search includes real-time data access.",
|
|
229
229
|
),
|
|
@@ -229,18 +229,25 @@ class ChatCompletionsBackend(CustomToolAndMCPBackend):
|
|
|
229
229
|
updated_messages = current_messages.copy()
|
|
230
230
|
processed_call_ids = set() # Track processed calls
|
|
231
231
|
|
|
232
|
-
# Check if planning mode is enabled - block MCP tool execution during planning
|
|
232
|
+
# Check if planning mode is enabled - selectively block MCP tool execution during planning
|
|
233
233
|
if self.is_planning_mode_enabled():
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
234
|
+
blocked_tools = self.get_planning_mode_blocked_tools()
|
|
235
|
+
|
|
236
|
+
if not blocked_tools:
|
|
237
|
+
# Empty set means block ALL MCP tools (backward compatible)
|
|
238
|
+
logger.info("[ChatCompletions] Planning mode enabled - blocking ALL MCP tool execution")
|
|
239
|
+
yield StreamChunk(
|
|
240
|
+
type="mcp_status",
|
|
241
|
+
status="planning_mode_blocked",
|
|
242
|
+
content="🚫 [MCP] Planning mode active - all MCP tools blocked during coordination",
|
|
243
|
+
source="planning_mode",
|
|
244
|
+
)
|
|
245
|
+
# Skip all MCP tool execution but still continue with workflow
|
|
246
|
+
yield StreamChunk(type="done")
|
|
247
|
+
return
|
|
248
|
+
else:
|
|
249
|
+
# Selective blocking - log but continue to check each tool individually
|
|
250
|
+
logger.info(f"[ChatCompletions] Planning mode enabled - selective blocking of {len(blocked_tools)} tools")
|
|
244
251
|
|
|
245
252
|
# Create single assistant message with all tool calls
|
|
246
253
|
if captured_function_calls:
|
massgen/backend/claude_code.py
CHANGED
|
@@ -795,7 +795,7 @@ class ClaudeCodeBackend(LLMBackend):
|
|
|
795
795
|
|
|
796
796
|
# Add workflow tools information if present
|
|
797
797
|
if tools:
|
|
798
|
-
workflow_tools = [t for t in tools if t.get("function", {}).get("name") in ["new_answer", "vote"]]
|
|
798
|
+
workflow_tools = [t for t in tools if t.get("function", {}).get("name") in ["new_answer", "vote", "submit", "restart_orchestration"]]
|
|
799
799
|
if workflow_tools:
|
|
800
800
|
system_parts.append("\n--- Coordination Actions ---")
|
|
801
801
|
for tool in workflow_tools:
|
|
@@ -823,6 +823,14 @@ class ClaudeCodeBackend(LLMBackend):
|
|
|
823
823
|
system_parts.append(f' Usage: {{"tool_name": "vote", ' f'"arguments": {{"agent_id": "agent1", ' f'"reason": "explanation"}}}} // Choose agent_id from: {agent_list}')
|
|
824
824
|
else:
|
|
825
825
|
system_parts.append(' Usage: {"tool_name": "vote", ' '"arguments": {"agent_id": "agent1", ' '"reason": "explanation"}}')
|
|
826
|
+
elif name == "submit":
|
|
827
|
+
system_parts.append(
|
|
828
|
+
' Usage: {"tool_name": "submit", ' '"arguments": {"confirmed": true}}',
|
|
829
|
+
)
|
|
830
|
+
elif name == "restart_orchestration":
|
|
831
|
+
system_parts.append(
|
|
832
|
+
' Usage: {"tool_name": "restart_orchestration", ' '"arguments": {"reason": "The answer is incomplete because...", ' '"instructions": "In the next attempt, please..."}}',
|
|
833
|
+
)
|
|
826
834
|
|
|
827
835
|
system_parts.append("\n--- MassGen Coordination Instructions ---")
|
|
828
836
|
system_parts.append("IMPORTANT: You must respond with a structured JSON decision at the end of your response.")
|
massgen/backend/gemini.py
CHANGED
|
@@ -20,6 +20,7 @@ TECHNICAL SOLUTION:
|
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
import json
|
|
23
|
+
import logging
|
|
23
24
|
import os
|
|
24
25
|
import time
|
|
25
26
|
from typing import Any, AsyncGenerator, Dict, List, Optional
|
|
@@ -39,6 +40,19 @@ from .gemini_mcp_manager import GeminiMCPManager
|
|
|
39
40
|
from .gemini_trackers import MCPCallTracker, MCPResponseExtractor, MCPResponseTracker
|
|
40
41
|
from .gemini_utils import CoordinationResponse
|
|
41
42
|
|
|
43
|
+
|
|
44
|
+
# Suppress Gemini SDK logger warning about non-text parts in response
|
|
45
|
+
# Using custom filter per https://github.com/googleapis/python-genai/issues/850
|
|
46
|
+
class NoFunctionCallWarning(logging.Filter):
|
|
47
|
+
def filter(self, record: logging.LogRecord) -> bool:
|
|
48
|
+
message = record.getMessage()
|
|
49
|
+
if "there are non-text parts in the response:" in message:
|
|
50
|
+
return False
|
|
51
|
+
return True
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
logging.getLogger("google_genai.types").addFilter(NoFunctionCallWarning())
|
|
55
|
+
|
|
42
56
|
try:
|
|
43
57
|
from pydantic import BaseModel, Field
|
|
44
58
|
except ImportError:
|
|
@@ -220,6 +234,7 @@ class GeminiBackend(CustomToolAndMCPBackend):
|
|
|
220
234
|
|
|
221
235
|
# Analyze tool types
|
|
222
236
|
is_coordination = self.formatter.has_coordination_tools(tools)
|
|
237
|
+
is_post_evaluation = self.formatter.has_post_evaluation_tools(tools)
|
|
223
238
|
|
|
224
239
|
valid_agent_ids = None
|
|
225
240
|
|
|
@@ -239,6 +254,9 @@ class GeminiBackend(CustomToolAndMCPBackend):
|
|
|
239
254
|
# For coordination requests, modify the prompt to use structured output
|
|
240
255
|
if is_coordination:
|
|
241
256
|
full_content = self.formatter.build_structured_output_prompt(full_content, valid_agent_ids)
|
|
257
|
+
elif is_post_evaluation:
|
|
258
|
+
# For post-evaluation, modify prompt to use structured output
|
|
259
|
+
full_content = self.formatter.build_post_evaluation_prompt(full_content)
|
|
242
260
|
|
|
243
261
|
# Use google-genai package
|
|
244
262
|
client = genai.Client(api_key=self.api_key)
|
|
@@ -277,6 +295,16 @@ class GeminiBackend(CustomToolAndMCPBackend):
|
|
|
277
295
|
else:
|
|
278
296
|
# Tools or sessions are present; fallback to text parsing
|
|
279
297
|
pass
|
|
298
|
+
elif is_post_evaluation:
|
|
299
|
+
# For post-evaluation, use JSON response format for structured decisions
|
|
300
|
+
from .gemini_utils import PostEvaluationResponse
|
|
301
|
+
|
|
302
|
+
if (not using_sdk_mcp) and (not using_custom_tools) and (not all_tools):
|
|
303
|
+
config["response_mime_type"] = "application/json"
|
|
304
|
+
config["response_schema"] = PostEvaluationResponse.model_json_schema()
|
|
305
|
+
else:
|
|
306
|
+
# Tools or sessions are present; fallback to text parsing
|
|
307
|
+
pass
|
|
280
308
|
# Log messages being sent after builtin_tools is defined
|
|
281
309
|
log_backend_agent_message(
|
|
282
310
|
agent_id or "default",
|
|
@@ -387,6 +415,42 @@ class GeminiBackend(CustomToolAndMCPBackend):
|
|
|
387
415
|
tools_to_apply.extend(mcp_sessions)
|
|
388
416
|
sessions_applied = True
|
|
389
417
|
|
|
418
|
+
if self.is_planning_mode_enabled():
|
|
419
|
+
blocked_tools = self.get_planning_mode_blocked_tools()
|
|
420
|
+
|
|
421
|
+
if not blocked_tools:
|
|
422
|
+
# Empty set means block ALL MCP tools (backward compatible)
|
|
423
|
+
logger.info("[Gemini] Planning mode enabled - blocking ALL MCP tools during coordination")
|
|
424
|
+
# Don't set tools at all - this prevents any MCP tool execution
|
|
425
|
+
log_backend_activity(
|
|
426
|
+
"gemini",
|
|
427
|
+
"All MCP tools blocked in planning mode",
|
|
428
|
+
{
|
|
429
|
+
"blocked_tools": len(available_mcp_tools),
|
|
430
|
+
"session_count": len(mcp_sessions),
|
|
431
|
+
},
|
|
432
|
+
agent_id=agent_id,
|
|
433
|
+
)
|
|
434
|
+
else:
|
|
435
|
+
# Selective blocking - allow non-blocked tools to be called
|
|
436
|
+
# The execution layer (_execute_mcp_function_with_retry) will enforce blocking
|
|
437
|
+
# but we still register all tools so non-blocked ones can be used
|
|
438
|
+
logger.info(f"[Gemini] Planning mode enabled - allowing non-blocked MCP tools, blocking {len(blocked_tools)} specific tools")
|
|
439
|
+
|
|
440
|
+
# Pass all sessions - the backend's is_mcp_tool_blocked() will handle selective blocking
|
|
441
|
+
session_config["tools"] = mcp_sessions
|
|
442
|
+
|
|
443
|
+
log_backend_activity(
|
|
444
|
+
"gemini",
|
|
445
|
+
"Selective MCP tools blocked in planning mode",
|
|
446
|
+
{
|
|
447
|
+
"total_tools": len(available_mcp_tools),
|
|
448
|
+
"blocked_tools": len(blocked_tools),
|
|
449
|
+
"allowed_tools": len(available_mcp_tools) - len(blocked_tools),
|
|
450
|
+
},
|
|
451
|
+
agent_id=agent_id,
|
|
452
|
+
)
|
|
453
|
+
|
|
390
454
|
# Add custom tools (if available)
|
|
391
455
|
if has_custom_tools:
|
|
392
456
|
# Wrap FunctionDeclarations in a Tool object for Gemini SDK
|
|
@@ -1567,11 +1631,11 @@ class GeminiBackend(CustomToolAndMCPBackend):
|
|
|
1567
1631
|
|
|
1568
1632
|
content = full_content_text
|
|
1569
1633
|
|
|
1570
|
-
# Process tool calls -
|
|
1634
|
+
# Process tool calls - coordination and post-evaluation tool calls (MCP manual mode removed)
|
|
1571
1635
|
tool_calls_detected: List[Dict[str, Any]] = []
|
|
1572
1636
|
|
|
1573
|
-
#
|
|
1574
|
-
if is_coordination and content.strip() and not tool_calls_detected:
|
|
1637
|
+
# Process coordination tools OR post-evaluation tools if present
|
|
1638
|
+
if (is_coordination or is_post_evaluation) and content.strip() and not tool_calls_detected:
|
|
1575
1639
|
# For structured output mode, the entire content is JSON
|
|
1576
1640
|
structured_response = None
|
|
1577
1641
|
# Try multiple parsing strategies
|
|
@@ -1590,14 +1654,15 @@ class GeminiBackend(CustomToolAndMCPBackend):
|
|
|
1590
1654
|
# Log conversion to tool calls (summary)
|
|
1591
1655
|
log_stream_chunk("backend.gemini", "tool_calls", tool_calls, agent_id)
|
|
1592
1656
|
|
|
1593
|
-
# Log each
|
|
1657
|
+
# Log each tool call for analytics/debugging
|
|
1658
|
+
tool_type = "post_evaluation" if is_post_evaluation else "coordination"
|
|
1594
1659
|
try:
|
|
1595
1660
|
for tool_call in tool_calls:
|
|
1596
1661
|
log_tool_call(
|
|
1597
1662
|
agent_id,
|
|
1598
|
-
tool_call.get("function", {}).get("name", "
|
|
1663
|
+
tool_call.get("function", {}).get("name", f"unknown_{tool_type}_tool"),
|
|
1599
1664
|
tool_call.get("function", {}).get("arguments", {}),
|
|
1600
|
-
result="
|
|
1665
|
+
result=f"{tool_type}_tool_called",
|
|
1601
1666
|
backend_name="gemini",
|
|
1602
1667
|
)
|
|
1603
1668
|
except Exception:
|
massgen/backend/gemini_utils.py
CHANGED
|
@@ -20,6 +20,13 @@ class ActionType(enum.Enum):
|
|
|
20
20
|
NEW_ANSWER = "new_answer"
|
|
21
21
|
|
|
22
22
|
|
|
23
|
+
class PostEvaluationActionType(enum.Enum):
|
|
24
|
+
"""Action types for post-evaluation structured output."""
|
|
25
|
+
|
|
26
|
+
SUBMIT = "submit"
|
|
27
|
+
RESTART = "restart"
|
|
28
|
+
|
|
29
|
+
|
|
23
30
|
class VoteAction(BaseModel):
|
|
24
31
|
"""Structured output for voting action."""
|
|
25
32
|
|
|
@@ -41,3 +48,26 @@ class CoordinationResponse(BaseModel):
|
|
|
41
48
|
action_type: ActionType = Field(description="Type of action to take")
|
|
42
49
|
vote_data: Optional[VoteAction] = Field(default=None, description="Vote data if action is vote")
|
|
43
50
|
answer_data: Optional[NewAnswerAction] = Field(default=None, description="Answer data if action is new_answer")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class SubmitAction(BaseModel):
|
|
54
|
+
"""Structured output for submit action (post-evaluation)."""
|
|
55
|
+
|
|
56
|
+
action: PostEvaluationActionType = Field(default=PostEvaluationActionType.SUBMIT, description="Action type")
|
|
57
|
+
confirmed: bool = Field(default=True, description="Confirmation that answer is satisfactory")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class RestartAction(BaseModel):
|
|
61
|
+
"""Structured output for restart action (post-evaluation)."""
|
|
62
|
+
|
|
63
|
+
action: PostEvaluationActionType = Field(default=PostEvaluationActionType.RESTART, description="Action type")
|
|
64
|
+
reason: str = Field(description="Clear explanation of why the answer is insufficient")
|
|
65
|
+
instructions: str = Field(description="Detailed, actionable guidance for agents on the next attempt")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class PostEvaluationResponse(BaseModel):
|
|
69
|
+
"""Structured response for post-evaluation actions."""
|
|
70
|
+
|
|
71
|
+
action_type: PostEvaluationActionType = Field(description="Type of post-evaluation action to take")
|
|
72
|
+
submit_data: Optional[SubmitAction] = Field(default=None, description="Submit data if action is submit")
|
|
73
|
+
restart_data: Optional[RestartAction] = Field(default=None, description="Restart data if action is restart")
|