PyPI - massgen - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

massgen 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

massgen/__init__.py +1 -1
massgen/agent_config.py +33 -7
massgen/api_params_handler/_api_params_handler_base.py +3 -0
massgen/api_params_handler/_chat_completions_api_params_handler.py +7 -1
massgen/backend/azure_openai.py +9 -1
massgen/backend/base.py +56 -0
massgen/backend/base_with_custom_tool_and_mcp.py +4 -4
massgen/backend/capabilities.py +6 -6
massgen/backend/chat_completions.py +18 -11
massgen/backend/claude_code.py +9 -1
massgen/backend/gemini.py +71 -6
massgen/backend/gemini_utils.py +30 -0
massgen/backend/grok.py +39 -6
massgen/backend/response.py +18 -11
massgen/chat_agent.py +9 -3
massgen/cli.py +319 -43
massgen/config_builder.py +163 -18
massgen/configs/README.md +78 -20
massgen/configs/basic/multi/three_agents_default.yaml +2 -2
massgen/configs/debug/restart_test_controlled.yaml +60 -0
massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
massgen/configs/tools/custom_tools/crawl4ai_mcp_example.yaml +67 -0
massgen/configs/tools/custom_tools/crawl4ai_multi_agent_example.yaml +68 -0
massgen/configs/tools/custom_tools/multimodal_tools/playwright_with_img_understanding.yaml +98 -0
massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
massgen/configs/tools/custom_tools/multimodal_tools/understand_video_example.yaml +54 -0
massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
massgen/configs/tools/memory/README.md +199 -0
massgen/configs/tools/memory/gpt5mini_gemini_context_window_management.yaml +131 -0
massgen/configs/tools/memory/gpt5mini_gemini_no_persistent_memory.yaml +133 -0
massgen/configs/tools/memory/test_context_window_management.py +286 -0
massgen/configs/tools/multimodal/gpt5mini_gpt5nano_documentation_evolution.yaml +97 -0
massgen/configs/tools/planning/five_agents_discord_mcp_planning_mode.yaml +7 -29
massgen/configs/tools/planning/five_agents_filesystem_mcp_planning_mode.yaml +5 -6
massgen/configs/tools/planning/five_agents_notion_mcp_planning_mode.yaml +4 -4
massgen/configs/tools/planning/five_agents_twitter_mcp_planning_mode.yaml +4 -4
massgen/configs/tools/planning/gpt5_mini_case_study_mcp_planning_mode.yaml +2 -2
massgen/docker/README.md +83 -0
massgen/filesystem_manager/_code_execution_server.py +22 -7
massgen/filesystem_manager/_docker_manager.py +21 -1
massgen/filesystem_manager/_filesystem_manager.py +8 -0
massgen/filesystem_manager/_workspace_tools_server.py +0 -997
massgen/formatter/_gemini_formatter.py +73 -0
massgen/frontend/coordination_ui.py +175 -257
massgen/frontend/displays/base_display.py +29 -0
massgen/frontend/displays/rich_terminal_display.py +155 -9
massgen/frontend/displays/simple_display.py +21 -0
massgen/frontend/displays/terminal_display.py +22 -2
massgen/logger_config.py +50 -6
massgen/message_templates.py +123 -3
massgen/orchestrator.py +652 -44
massgen/tests/test_code_execution.py +178 -0
massgen/tests/test_intelligent_planning_mode.py +643 -0
massgen/tests/test_orchestration_restart.py +204 -0
massgen/token_manager/token_manager.py +13 -4
massgen/tool/__init__.py +4 -0
massgen/tool/_multimodal_tools/understand_audio.py +193 -0
massgen/tool/_multimodal_tools/understand_file.py +550 -0
massgen/tool/_multimodal_tools/understand_image.py +212 -0
massgen/tool/_multimodal_tools/understand_video.py +313 -0
massgen/tool/docs/multimodal_tools.md +779 -0
massgen/tool/workflow_toolkits/__init__.py +26 -0
massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
massgen/utils.py +1 -0
{massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/METADATA +57 -52
{massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/RECORD +77 -49
{massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/WHEEL +0 -0
{massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/entry_points.txt +0 -0
{massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/licenses/LICENSE +0 -0
{massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/top_level.txt +0 -0

massgen/__init__.py CHANGED Viewed

@@ -68,7 +68,7 @@ from .chat_agent import (
 from .message_templates import MessageTemplates, get_templates
 from .orchestrator import Orchestrator, create_orchestrator
-__version__ = "0.1.1"
+__version__ = "0.1.3"
 __author__ = "MassGen Contributors"

massgen/agent_config.py CHANGED Viewed

@@ -35,12 +35,17 @@ class CoordinationConfig:
                              Only the winning agent executes actions during final presentation.
                              If False, agents execute actions during coordination (default behavior).
         planning_mode_instruction: Custom instruction to add when planning mode is enabled.
+        max_orchestration_restarts: Maximum number of times orchestration can be restarted after
+                                   post-evaluation determines the answer is insufficient.
+                                   For example, max_orchestration_restarts=2 allows 3 total attempts
+                                   (initial + 2 restarts). Default is 0 (no restarts).
     """
     enable_planning_mode: bool = False
     planning_mode_instruction: str = (
         "During coordination, describe what you would do without actually executing actions. Only provide concrete implementation details without calling external APIs or tools."
     )
+    max_orchestration_restarts: int = 0
 @dataclass
@@ -87,6 +92,9 @@ class AgentConfig:
     # Debug/test mode - skip coordination rounds and go straight to final presentation
     skip_coordination_rounds: bool = False
+    # Debug mode for restart feature - override final answer on attempt 1 only
+    debug_final_answer: Optional[str] = None
     @property
     def custom_system_instruction(self) -> Optional[str]:
         """
@@ -432,7 +440,8 @@ class AgentConfig:
         import copy
         new_config = copy.deepcopy(self)
-        new_config.custom_system_instruction = instruction
+        # Set private attribute directly to avoid deprecation warning
+        new_config._custom_system_instruction = instruction
         return new_config
     def with_agent_id(self, agent_id: str) -> "AgentConfig":
@@ -538,7 +547,8 @@ class AgentConfig:
         else:
             raise ValueError(f"Domain expert configuration not available for backend: {backend}")
-        config.custom_system_instruction = instruction
+        # Set private attribute directly to avoid deprecation warning
+        config._custom_system_instruction = instruction
         return config
     # =============================================================================
@@ -567,9 +577,10 @@ class AgentConfig:
         conversation = templates.build_initial_conversation(task=task, agent_summaries=agent_summaries, valid_agent_ids=valid_agent_ids)
         # Add custom system instruction if provided
-        if self.custom_system_instruction:
+        # Access private attribute to avoid deprecation warning
+        if self._custom_system_instruction:
             base_system = conversation["system_message"]
-            conversation["system_message"] = f"{self.custom_system_instruction}\n\n{base_system}"
+            conversation["system_message"] = f"{self._custom_system_instruction}\n\n{base_system}"
         # Add backend configuration
         conversation.update(
@@ -703,7 +714,8 @@ class AgentConfig:
         result = {
             "backend_params": self.backend_params,
             "agent_id": self.agent_id,
-            "custom_system_instruction": self.custom_system_instruction,
+            # Access private attribute to avoid deprecation warning
+            "custom_system_instruction": self._custom_system_instruction,
             "voting_sensitivity": self.voting_sensitivity,
             "max_new_answers_per_agent": self.max_new_answers_per_agent,
             "answer_novelty_requirement": self.answer_novelty_requirement,
@@ -716,8 +728,12 @@ class AgentConfig:
         result["coordination_config"] = {
             "enable_planning_mode": self.coordination_config.enable_planning_mode,
             "planning_mode_instruction": self.coordination_config.planning_mode_instruction,
+            "max_orchestration_restarts": self.coordination_config.max_orchestration_restarts,
         }
+        # Handle debug fields
+        result["debug_final_answer"] = self.debug_final_answer
         # Handle message_templates serialization
         if self.message_templates is not None:
             try:
@@ -757,6 +773,9 @@ class AgentConfig:
         if coordination_data:
             coordination_config = CoordinationConfig(**coordination_data)
+        # Handle debug fields
+        debug_final_answer = data.get("debug_final_answer")
         # Handle message_templates
         message_templates = None
         template_data = data.get("message_templates")
@@ -765,17 +784,24 @@ class AgentConfig:
             message_templates = MessageTemplates(**template_data)
-        return cls(
+        config = cls(
             backend_params=backend_params,
             message_templates=message_templates,
             agent_id=agent_id,
-            custom_system_instruction=custom_system_instruction,
             voting_sensitivity=voting_sensitivity,
             max_new_answers_per_agent=max_new_answers_per_agent,
             answer_novelty_requirement=answer_novelty_requirement,
             timeout_config=timeout_config,
             coordination_config=coordination_config,
         )
+        config.debug_final_answer = debug_final_answer
+        return config
+        # Set custom_system_instruction separately to avoid deprecation warning
+        if custom_system_instruction is not None:
+            config._custom_system_instruction = custom_system_instruction
+        return config
 # =============================================================================

massgen/api_params_handler/_api_params_handler_base.py CHANGED Viewed

@@ -56,8 +56,10 @@ class APIParamsHandlerBase(ABC):
             # Filesystem manager parameters (handled by base class)
             "cwd",
             "agent_temporary_workspace",
+            "agent_temporary_workspace_parent",
             "context_paths",
             "context_write_access_enabled",
+            "enforce_read_before_delete",
             "enable_image_generation",
             "enable_mcp_command_line",
             "command_line_allowed_commands",
@@ -67,6 +69,7 @@ class APIParamsHandlerBase(ABC):
             "command_line_docker_memory_limit",
             "command_line_docker_cpu_limit",
             "command_line_docker_network_mode",
+            "command_line_docker_enable_sudo",
             # Backend identification (handled by orchestrator)
             "enable_audio_generation",  # Audio generation parameter
             "type",

massgen/api_params_handler/_chat_completions_api_params_handler.py CHANGED Viewed

@@ -31,7 +31,13 @@ class ChatCompletionsAPIParamsHandler(APIParamsHandlerBase):
         """Get provider tools for Chat Completions format."""
         provider_tools = []
-        if all_params.get("enable_web_search", False):
+        # Check if this is Grok backend - Grok uses extra_body.search_parameters instead of function tools
+        backend_provider = getattr(self.backend, "get_provider_name", lambda: "")()
+        is_grok = backend_provider.lower() == "grok"
+        # Add web_search function tool for non-Grok backends
+        # Grok handles web search via extra_body.search_parameters (set in grok.py)
+        if all_params.get("enable_web_search", False) and not is_grok:
             provider_tools.append(
                 {
                     "type": "function",

massgen/backend/azure_openai.py CHANGED Viewed

@@ -94,7 +94,7 @@ class AzureOpenAIBackend(LLMBackend):
                 raise ValueError("Azure OpenAI requires a deployment name. Pass it as the 'model' parameter.")
             # Check if workflow tools are present
-            workflow_tools = [t for t in tools if t.get("function", {}).get("name") in ["new_answer", "vote"]] if tools else []
+            workflow_tools = [t for t in tools if t.get("function", {}).get("name") in ["new_answer", "vote", "submit", "restart_orchestration"]] if tools else []
             has_workflow_tools = len(workflow_tools) > 0
             # Modify messages to include workflow tool instructions if needed
@@ -270,6 +270,14 @@ class AzureOpenAIBackend(LLMBackend):
                         system_parts.append(f'    Usage: {{"tool_name": "vote", ' f'"arguments": {{"agent_id": "agent1", ' f'"reason": "explanation"}}}} // Choose agent_id from: {agent_list}')
                     else:
                         system_parts.append('    Usage: {"tool_name": "vote", ' '"arguments": {"agent_id": "agent1", ' '"reason": "explanation"}}')
+                elif name == "submit":
+                    system_parts.append(
+                        '    Usage: {"tool_name": "submit", ' '"arguments": {"confirmed": true}}',
+                    )
+                elif name == "restart_orchestration":
+                    system_parts.append(
+                        '    Usage: {"tool_name": "restart_orchestration", ' '"arguments": {"reason": "The answer is incomplete because...", ' '"instructions": "In the next attempt, please..."}}',
+                    )
             system_parts.append("\n--- MassGen Workflow Instructions ---")
             system_parts.append("IMPORTANT: You must respond with a structured JSON decision at the end of your response.")

massgen/backend/base.py CHANGED Viewed

@@ -70,6 +70,11 @@ class LLMBackend(ABC):
         # Planning mode flag - when True, MCP tools should be blocked during coordination
         self._planning_mode_enabled: bool = False
+        # Selective tool blocking - list of specific MCP tools to block during planning mode
+        # When planning_mode is enabled, only these specific tools are blocked
+        # If empty, ALL MCP tools are blocked (backward compatible behavior)
+        self._planning_mode_blocked_tools: set = set()
         self.token_calculator = TokenCostCalculator()
         # Filesystem manager integration
@@ -107,6 +112,7 @@ class LLMBackend(ABC):
                     "command_line_docker_memory_limit": kwargs.get("command_line_docker_memory_limit"),
                     "command_line_docker_cpu_limit": kwargs.get("command_line_docker_cpu_limit"),
                     "command_line_docker_network_mode": network_mode,
+                    "command_line_docker_enable_sudo": kwargs.get("command_line_docker_enable_sudo", False),
                     "enable_audio_generation": kwargs.get("enable_audio_generation", False),
                 }
@@ -183,8 +189,10 @@ class LLMBackend(ABC):
             # Filesystem manager parameters (handled by base class)
             "cwd",
             "agent_temporary_workspace",
+            "agent_temporary_workspace_parent",
             "context_paths",
             "context_write_access_enabled",
+            "enforce_read_before_delete",
             "enable_image_generation",
             "enable_mcp_command_line",
             "command_line_allowed_commands",
@@ -194,6 +202,7 @@ class LLMBackend(ABC):
             "command_line_docker_memory_limit",
             "command_line_docker_cpu_limit",
             "command_line_docker_network_mode",
+            "command_line_docker_enable_sudo",
             # Backend identification (handled by orchestrator)
             "type",
             "agent_id",
@@ -465,6 +474,53 @@ class LLMBackend(ABC):
         """
         return self._planning_mode_enabled
+    def set_planning_mode_blocked_tools(self, tool_names: set) -> None:
+        """
+        Set specific MCP tools to block during planning mode.
+        This enables selective tool blocking - only the specified tools will be blocked
+        when planning mode is enabled, allowing other MCP tools to be used.
+        Args:
+            tool_names: Set of MCP tool names to block (e.g., {'mcp__discord__discord_send'})
+                       If empty set, ALL MCP tools are blocked (backward compatible)
+        """
+        self._planning_mode_blocked_tools = set(tool_names)
+    def get_planning_mode_blocked_tools(self) -> set:
+        """
+        Get the set of MCP tools currently blocked in planning mode.
+        Returns:
+            Set of blocked MCP tool names. Empty set means ALL MCP tools are blocked.
+        """
+        return self._planning_mode_blocked_tools.copy()
+    def is_mcp_tool_blocked(self, tool_name: str) -> bool:
+        """
+        Check if a specific MCP tool is blocked in planning mode.
+        Args:
+            tool_name: Name of the MCP tool to check (e.g., 'mcp__discord__discord_send')
+        Returns:
+            True if the tool should be blocked, False otherwise
+        Note:
+            - If planning mode is disabled, returns False (no blocking)
+            - If planning mode is enabled and blocked_tools is empty, returns True (block ALL)
+            - If planning mode is enabled and blocked_tools is set, returns True only if tool is in the set
+        """
+        if not self._planning_mode_enabled:
+            return False
+        # Empty set means block ALL MCP tools (backward compatible behavior)
+        if not self._planning_mode_blocked_tools:
+            return True
+        # Otherwise, block only if tool is in the blocked set
+        return tool_name in self._planning_mode_blocked_tools
     async def _cleanup_client(self, client: Any) -> None:
         """Clean up OpenAI client resources."""
         try:

massgen/backend/base_with_custom_tool_and_mcp.py CHANGED Viewed

@@ -533,10 +533,10 @@ class CustomToolAndMCPBackend(LLMBackend):
         max_retries: int = 3,
     ) -> Tuple[str, Any]:
         """Execute MCP function with exponential backoff retry logic."""
-        # Check if planning mode is enabled - block MCP tool execution during planning
-        if self.is_planning_mode_enabled():
-            logger.info(f"[MCP] Planning mode enabled - blocking MCP tool execution: {function_name}")
-            error_str = "🚫 [MCP] Planning mode active - MCP tools blocked during coordination"
+        # Check if this specific MCP tool is blocked by planning mode
+        if self.is_mcp_tool_blocked(function_name):
+            logger.info(f"[MCP] Planning mode enabled - blocking MCP tool: {function_name}")
+            error_str = f"🚫 [MCP] Tool '{function_name}' blocked during coordination (planning mode active)"
             return error_str, {"error": error_str, "blocked_by": "planning_mode", "function_name": function_name}
         # Convert JSON string to dict for shared utility

massgen/backend/capabilities.py CHANGED Viewed

@@ -137,13 +137,14 @@ BACKEND_CAPABILITIES: Dict[str, BackendCapabilities] = {
         builtin_tools=["web_search", "code_execution"],
         filesystem_support="mcp",
         models=[
+            "claude-haiku-4-5-20251001",
             "claude-sonnet-4-5-20250929",
+            "claude-opus-4-1-20250805",
             "claude-sonnet-4-20250514",
-            "claude-opus-4-20250514",
             "claude-3-5-sonnet-latest",
             "claude-3-5-haiku-latest",
         ],
-        default_model="claude-sonnet-4-20250514",
+        default_model="claude-sonnet-4-5-20250929",
         env_var="ANTHROPIC_API_KEY",
         notes="Web search and code execution are built-in tools. Audio/video understanding support (v0.0.30+).",
     ),
@@ -175,8 +176,8 @@ BACKEND_CAPABILITIES: Dict[str, BackendCapabilities] = {
         filesystem_support="native",
         models=[
             "claude-sonnet-4-5-20250929",
+            "claude-opus-4-1-20250805",
             "claude-sonnet-4-20250514",
-            "claude-opus-4-20250514",
         ],
         default_model="claude-sonnet-4-5-20250929",
         env_var="ANTHROPIC_API_KEY",
@@ -218,12 +219,11 @@ BACKEND_CAPABILITIES: Dict[str, BackendCapabilities] = {
         filesystem_support="mcp",
         models=[
             "grok-4",
+            "grok-4-fast",
             "grok-3",
             "grok-3-mini",
-            "grok-beta",
-            "grok-vision-beta",
         ],
-        default_model="grok-beta",
+        default_model="grok-4",
         env_var="XAI_API_KEY",
         notes="Web search includes real-time data access.",
     ),

massgen/backend/chat_completions.py CHANGED Viewed

@@ -229,18 +229,25 @@ class ChatCompletionsBackend(CustomToolAndMCPBackend):
             updated_messages = current_messages.copy()
             processed_call_ids = set()  # Track processed calls
-            # Check if planning mode is enabled - block MCP tool execution during planning
+            # Check if planning mode is enabled - selectively block MCP tool execution during planning
             if self.is_planning_mode_enabled():
-                logger.info("[MCP] Planning mode enabled - blocking all MCP tool execution")
-                yield StreamChunk(
-                    type="mcp_status",
-                    status="planning_mode_blocked",
-                    content="🚫 [MCP] Planning mode active - MCP tools blocked during coordination",
-                    source="planning_mode",
-                )
-                # Skip all MCP tool execution but still continue with workflow
-                yield StreamChunk(type="done")
-                return
+                blocked_tools = self.get_planning_mode_blocked_tools()
+                if not blocked_tools:
+                    # Empty set means block ALL MCP tools (backward compatible)
+                    logger.info("[ChatCompletions] Planning mode enabled - blocking ALL MCP tool execution")
+                    yield StreamChunk(
+                        type="mcp_status",
+                        status="planning_mode_blocked",
+                        content="🚫 [MCP] Planning mode active - all MCP tools blocked during coordination",
+                        source="planning_mode",
+                    )
+                    # Skip all MCP tool execution but still continue with workflow
+                    yield StreamChunk(type="done")
+                    return
+                else:
+                    # Selective blocking - log but continue to check each tool individually
+                    logger.info(f"[ChatCompletions] Planning mode enabled - selective blocking of {len(blocked_tools)} tools")
             # Create single assistant message with all tool calls
             if captured_function_calls:

massgen/backend/claude_code.py CHANGED Viewed

@@ -795,7 +795,7 @@ class ClaudeCodeBackend(LLMBackend):
         # Add workflow tools information if present
         if tools:
-            workflow_tools = [t for t in tools if t.get("function", {}).get("name") in ["new_answer", "vote"]]
+            workflow_tools = [t for t in tools if t.get("function", {}).get("name") in ["new_answer", "vote", "submit", "restart_orchestration"]]
             if workflow_tools:
                 system_parts.append("\n--- Coordination Actions ---")
                 for tool in workflow_tools:
@@ -823,6 +823,14 @@ class ClaudeCodeBackend(LLMBackend):
                             system_parts.append(f'    Usage: {{"tool_name": "vote", ' f'"arguments": {{"agent_id": "agent1", ' f'"reason": "explanation"}}}} // Choose agent_id from: {agent_list}')
                         else:
                             system_parts.append('    Usage: {"tool_name": "vote", ' '"arguments": {"agent_id": "agent1", ' '"reason": "explanation"}}')
+                    elif name == "submit":
+                        system_parts.append(
+                            '    Usage: {"tool_name": "submit", ' '"arguments": {"confirmed": true}}',
+                        )
+                    elif name == "restart_orchestration":
+                        system_parts.append(
+                            '    Usage: {"tool_name": "restart_orchestration", ' '"arguments": {"reason": "The answer is incomplete because...", ' '"instructions": "In the next attempt, please..."}}',
+                        )
                 system_parts.append("\n--- MassGen Coordination Instructions ---")
                 system_parts.append("IMPORTANT: You must respond with a structured JSON decision at the end of your response.")

massgen/backend/gemini.py CHANGED Viewed

@@ -20,6 +20,7 @@ TECHNICAL SOLUTION:
 """
 import json
+import logging
 import os
 import time
 from typing import Any, AsyncGenerator, Dict, List, Optional
@@ -39,6 +40,19 @@ from .gemini_mcp_manager import GeminiMCPManager
 from .gemini_trackers import MCPCallTracker, MCPResponseExtractor, MCPResponseTracker
 from .gemini_utils import CoordinationResponse
+# Suppress Gemini SDK logger warning about non-text parts in response
+# Using custom filter per https://github.com/googleapis/python-genai/issues/850
+class NoFunctionCallWarning(logging.Filter):
+    def filter(self, record: logging.LogRecord) -> bool:
+        message = record.getMessage()
+        if "there are non-text parts in the response:" in message:
+            return False
+        return True
+logging.getLogger("google_genai.types").addFilter(NoFunctionCallWarning())
 try:
     from pydantic import BaseModel, Field
 except ImportError:
@@ -220,6 +234,7 @@ class GeminiBackend(CustomToolAndMCPBackend):
             # Analyze tool types
             is_coordination = self.formatter.has_coordination_tools(tools)
+            is_post_evaluation = self.formatter.has_post_evaluation_tools(tools)
             valid_agent_ids = None
@@ -239,6 +254,9 @@ class GeminiBackend(CustomToolAndMCPBackend):
             # For coordination requests, modify the prompt to use structured output
             if is_coordination:
                 full_content = self.formatter.build_structured_output_prompt(full_content, valid_agent_ids)
+            elif is_post_evaluation:
+                # For post-evaluation, modify prompt to use structured output
+                full_content = self.formatter.build_post_evaluation_prompt(full_content)
             # Use google-genai package
             client = genai.Client(api_key=self.api_key)
@@ -277,6 +295,16 @@ class GeminiBackend(CustomToolAndMCPBackend):
                 else:
                     # Tools or sessions are present; fallback to text parsing
                     pass
+            elif is_post_evaluation:
+                # For post-evaluation, use JSON response format for structured decisions
+                from .gemini_utils import PostEvaluationResponse
+                if (not using_sdk_mcp) and (not using_custom_tools) and (not all_tools):
+                    config["response_mime_type"] = "application/json"
+                    config["response_schema"] = PostEvaluationResponse.model_json_schema()
+                else:
+                    # Tools or sessions are present; fallback to text parsing
+                    pass
             # Log messages being sent after builtin_tools is defined
             log_backend_agent_message(
                 agent_id or "default",
@@ -387,6 +415,42 @@ class GeminiBackend(CustomToolAndMCPBackend):
                             tools_to_apply.extend(mcp_sessions)
                             sessions_applied = True
+                        if self.is_planning_mode_enabled():
+                            blocked_tools = self.get_planning_mode_blocked_tools()
+                            if not blocked_tools:
+                                # Empty set means block ALL MCP tools (backward compatible)
+                                logger.info("[Gemini] Planning mode enabled - blocking ALL MCP tools during coordination")
+                                # Don't set tools at all - this prevents any MCP tool execution
+                                log_backend_activity(
+                                    "gemini",
+                                    "All MCP tools blocked in planning mode",
+                                    {
+                                        "blocked_tools": len(available_mcp_tools),
+                                        "session_count": len(mcp_sessions),
+                                    },
+                                    agent_id=agent_id,
+                                )
+                            else:
+                                # Selective blocking - allow non-blocked tools to be called
+                                # The execution layer (_execute_mcp_function_with_retry) will enforce blocking
+                                # but we still register all tools so non-blocked ones can be used
+                                logger.info(f"[Gemini] Planning mode enabled - allowing non-blocked MCP tools, blocking {len(blocked_tools)} specific tools")
+                                # Pass all sessions - the backend's is_mcp_tool_blocked() will handle selective blocking
+                                session_config["tools"] = mcp_sessions
+                                log_backend_activity(
+                                    "gemini",
+                                    "Selective MCP tools blocked in planning mode",
+                                    {
+                                        "total_tools": len(available_mcp_tools),
+                                        "blocked_tools": len(blocked_tools),
+                                        "allowed_tools": len(available_mcp_tools) - len(blocked_tools),
+                                    },
+                                    agent_id=agent_id,
+                                )
                     # Add custom tools (if available)
                     if has_custom_tools:
                         # Wrap FunctionDeclarations in a Tool object for Gemini SDK
@@ -1567,11 +1631,11 @@ class GeminiBackend(CustomToolAndMCPBackend):
             content = full_content_text
-            # Process tool calls - only coordination tool calls (MCP manual mode removed)
+            # Process tool calls - coordination and post-evaluation tool calls (MCP manual mode removed)
             tool_calls_detected: List[Dict[str, Any]] = []
-            # Then, process coordination tools if present
-            if is_coordination and content.strip() and not tool_calls_detected:
+            # Process coordination tools OR post-evaluation tools if present
+            if (is_coordination or is_post_evaluation) and content.strip() and not tool_calls_detected:
                 # For structured output mode, the entire content is JSON
                 structured_response = None
                 # Try multiple parsing strategies
@@ -1590,14 +1654,15 @@ class GeminiBackend(CustomToolAndMCPBackend):
                         # Log conversion to tool calls (summary)
                         log_stream_chunk("backend.gemini", "tool_calls", tool_calls, agent_id)
-                        # Log each coordination tool call for analytics/debugging
+                        # Log each tool call for analytics/debugging
+                        tool_type = "post_evaluation" if is_post_evaluation else "coordination"
                         try:
                             for tool_call in tool_calls:
                                 log_tool_call(
                                     agent_id,
-                                    tool_call.get("function", {}).get("name", "unknown_coordination_tool"),
+                                    tool_call.get("function", {}).get("name", f"unknown_{tool_type}_tool"),
                                     tool_call.get("function", {}).get("arguments", {}),
-                                    result="coordination_tool_called",
+                                    result=f"{tool_type}_tool_called",
                                     backend_name="gemini",
                                 )
                         except Exception:

massgen/backend/gemini_utils.py CHANGED Viewed

@@ -20,6 +20,13 @@ class ActionType(enum.Enum):
     NEW_ANSWER = "new_answer"
+class PostEvaluationActionType(enum.Enum):
+    """Action types for post-evaluation structured output."""
+    SUBMIT = "submit"
+    RESTART = "restart"
 class VoteAction(BaseModel):
     """Structured output for voting action."""
@@ -41,3 +48,26 @@ class CoordinationResponse(BaseModel):
     action_type: ActionType = Field(description="Type of action to take")
     vote_data: Optional[VoteAction] = Field(default=None, description="Vote data if action is vote")
     answer_data: Optional[NewAnswerAction] = Field(default=None, description="Answer data if action is new_answer")
+class SubmitAction(BaseModel):
+    """Structured output for submit action (post-evaluation)."""
+    action: PostEvaluationActionType = Field(default=PostEvaluationActionType.SUBMIT, description="Action type")
+    confirmed: bool = Field(default=True, description="Confirmation that answer is satisfactory")
+class RestartAction(BaseModel):
+    """Structured output for restart action (post-evaluation)."""
+    action: PostEvaluationActionType = Field(default=PostEvaluationActionType.RESTART, description="Action type")
+    reason: str = Field(description="Clear explanation of why the answer is insufficient")
+    instructions: str = Field(description="Detailed, actionable guidance for agents on the next attempt")
+class PostEvaluationResponse(BaseModel):
+    """Structured response for post-evaluation actions."""
+    action_type: PostEvaluationActionType = Field(description="Type of post-evaluation action to take")
+    submit_data: Optional[SubmitAction] = Field(default=None, description="Submit data if action is submit")
+    restart_data: Optional[RestartAction] = Field(default=None, description="Restart data if action is restart")

massgen 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

massgen 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl