massgen 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. massgen/__init__.py +1 -1
  2. massgen/agent_config.py +33 -7
  3. massgen/api_params_handler/_api_params_handler_base.py +3 -0
  4. massgen/api_params_handler/_chat_completions_api_params_handler.py +7 -1
  5. massgen/backend/azure_openai.py +9 -1
  6. massgen/backend/base.py +56 -0
  7. massgen/backend/base_with_custom_tool_and_mcp.py +4 -4
  8. massgen/backend/capabilities.py +6 -6
  9. massgen/backend/chat_completions.py +18 -11
  10. massgen/backend/claude_code.py +9 -1
  11. massgen/backend/gemini.py +71 -6
  12. massgen/backend/gemini_utils.py +30 -0
  13. massgen/backend/grok.py +39 -6
  14. massgen/backend/response.py +18 -11
  15. massgen/chat_agent.py +9 -3
  16. massgen/cli.py +319 -43
  17. massgen/config_builder.py +163 -18
  18. massgen/configs/README.md +78 -20
  19. massgen/configs/basic/multi/three_agents_default.yaml +2 -2
  20. massgen/configs/debug/restart_test_controlled.yaml +60 -0
  21. massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
  22. massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
  23. massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
  24. massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
  25. massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
  26. massgen/configs/tools/custom_tools/crawl4ai_mcp_example.yaml +67 -0
  27. massgen/configs/tools/custom_tools/crawl4ai_multi_agent_example.yaml +68 -0
  28. massgen/configs/tools/custom_tools/multimodal_tools/playwright_with_img_understanding.yaml +98 -0
  29. massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
  30. massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
  31. massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
  32. massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
  33. massgen/configs/tools/custom_tools/multimodal_tools/understand_video_example.yaml +54 -0
  34. massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
  35. massgen/configs/tools/memory/README.md +199 -0
  36. massgen/configs/tools/memory/gpt5mini_gemini_context_window_management.yaml +131 -0
  37. massgen/configs/tools/memory/gpt5mini_gemini_no_persistent_memory.yaml +133 -0
  38. massgen/configs/tools/memory/test_context_window_management.py +286 -0
  39. massgen/configs/tools/multimodal/gpt5mini_gpt5nano_documentation_evolution.yaml +97 -0
  40. massgen/configs/tools/planning/five_agents_discord_mcp_planning_mode.yaml +7 -29
  41. massgen/configs/tools/planning/five_agents_filesystem_mcp_planning_mode.yaml +5 -6
  42. massgen/configs/tools/planning/five_agents_notion_mcp_planning_mode.yaml +4 -4
  43. massgen/configs/tools/planning/five_agents_twitter_mcp_planning_mode.yaml +4 -4
  44. massgen/configs/tools/planning/gpt5_mini_case_study_mcp_planning_mode.yaml +2 -2
  45. massgen/docker/README.md +83 -0
  46. massgen/filesystem_manager/_code_execution_server.py +22 -7
  47. massgen/filesystem_manager/_docker_manager.py +21 -1
  48. massgen/filesystem_manager/_filesystem_manager.py +8 -0
  49. massgen/filesystem_manager/_workspace_tools_server.py +0 -997
  50. massgen/formatter/_gemini_formatter.py +73 -0
  51. massgen/frontend/coordination_ui.py +175 -257
  52. massgen/frontend/displays/base_display.py +29 -0
  53. massgen/frontend/displays/rich_terminal_display.py +155 -9
  54. massgen/frontend/displays/simple_display.py +21 -0
  55. massgen/frontend/displays/terminal_display.py +22 -2
  56. massgen/logger_config.py +50 -6
  57. massgen/message_templates.py +123 -3
  58. massgen/orchestrator.py +652 -44
  59. massgen/tests/test_code_execution.py +178 -0
  60. massgen/tests/test_intelligent_planning_mode.py +643 -0
  61. massgen/tests/test_orchestration_restart.py +204 -0
  62. massgen/token_manager/token_manager.py +13 -4
  63. massgen/tool/__init__.py +4 -0
  64. massgen/tool/_multimodal_tools/understand_audio.py +193 -0
  65. massgen/tool/_multimodal_tools/understand_file.py +550 -0
  66. massgen/tool/_multimodal_tools/understand_image.py +212 -0
  67. massgen/tool/_multimodal_tools/understand_video.py +313 -0
  68. massgen/tool/docs/multimodal_tools.md +779 -0
  69. massgen/tool/workflow_toolkits/__init__.py +26 -0
  70. massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
  71. massgen/utils.py +1 -0
  72. {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/METADATA +57 -52
  73. {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/RECORD +77 -49
  74. {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/WHEEL +0 -0
  75. {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/entry_points.txt +0 -0
  76. {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/licenses/LICENSE +0 -0
  77. {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/top_level.txt +0 -0
massgen/__init__.py CHANGED
@@ -68,7 +68,7 @@ from .chat_agent import (
68
68
  from .message_templates import MessageTemplates, get_templates
69
69
  from .orchestrator import Orchestrator, create_orchestrator
70
70
 
71
- __version__ = "0.1.1"
71
+ __version__ = "0.1.3"
72
72
  __author__ = "MassGen Contributors"
73
73
 
74
74
 
massgen/agent_config.py CHANGED
@@ -35,12 +35,17 @@ class CoordinationConfig:
35
35
  Only the winning agent executes actions during final presentation.
36
36
  If False, agents execute actions during coordination (default behavior).
37
37
  planning_mode_instruction: Custom instruction to add when planning mode is enabled.
38
+ max_orchestration_restarts: Maximum number of times orchestration can be restarted after
39
+ post-evaluation determines the answer is insufficient.
40
+ For example, max_orchestration_restarts=2 allows 3 total attempts
41
+ (initial + 2 restarts). Default is 0 (no restarts).
38
42
  """
39
43
 
40
44
  enable_planning_mode: bool = False
41
45
  planning_mode_instruction: str = (
42
46
  "During coordination, describe what you would do without actually executing actions. Only provide concrete implementation details without calling external APIs or tools."
43
47
  )
48
+ max_orchestration_restarts: int = 0
44
49
 
45
50
 
46
51
  @dataclass
@@ -87,6 +92,9 @@ class AgentConfig:
87
92
  # Debug/test mode - skip coordination rounds and go straight to final presentation
88
93
  skip_coordination_rounds: bool = False
89
94
 
95
+ # Debug mode for restart feature - override final answer on attempt 1 only
96
+ debug_final_answer: Optional[str] = None
97
+
90
98
  @property
91
99
  def custom_system_instruction(self) -> Optional[str]:
92
100
  """
@@ -432,7 +440,8 @@ class AgentConfig:
432
440
  import copy
433
441
 
434
442
  new_config = copy.deepcopy(self)
435
- new_config.custom_system_instruction = instruction
443
+ # Set private attribute directly to avoid deprecation warning
444
+ new_config._custom_system_instruction = instruction
436
445
  return new_config
437
446
 
438
447
  def with_agent_id(self, agent_id: str) -> "AgentConfig":
@@ -538,7 +547,8 @@ class AgentConfig:
538
547
  else:
539
548
  raise ValueError(f"Domain expert configuration not available for backend: {backend}")
540
549
 
541
- config.custom_system_instruction = instruction
550
+ # Set private attribute directly to avoid deprecation warning
551
+ config._custom_system_instruction = instruction
542
552
  return config
543
553
 
544
554
  # =============================================================================
@@ -567,9 +577,10 @@ class AgentConfig:
567
577
  conversation = templates.build_initial_conversation(task=task, agent_summaries=agent_summaries, valid_agent_ids=valid_agent_ids)
568
578
 
569
579
  # Add custom system instruction if provided
570
- if self.custom_system_instruction:
580
+ # Access private attribute to avoid deprecation warning
581
+ if self._custom_system_instruction:
571
582
  base_system = conversation["system_message"]
572
- conversation["system_message"] = f"{self.custom_system_instruction}\n\n{base_system}"
583
+ conversation["system_message"] = f"{self._custom_system_instruction}\n\n{base_system}"
573
584
 
574
585
  # Add backend configuration
575
586
  conversation.update(
@@ -703,7 +714,8 @@ class AgentConfig:
703
714
  result = {
704
715
  "backend_params": self.backend_params,
705
716
  "agent_id": self.agent_id,
706
- "custom_system_instruction": self.custom_system_instruction,
717
+ # Access private attribute to avoid deprecation warning
718
+ "custom_system_instruction": self._custom_system_instruction,
707
719
  "voting_sensitivity": self.voting_sensitivity,
708
720
  "max_new_answers_per_agent": self.max_new_answers_per_agent,
709
721
  "answer_novelty_requirement": self.answer_novelty_requirement,
@@ -716,8 +728,12 @@ class AgentConfig:
716
728
  result["coordination_config"] = {
717
729
  "enable_planning_mode": self.coordination_config.enable_planning_mode,
718
730
  "planning_mode_instruction": self.coordination_config.planning_mode_instruction,
731
+ "max_orchestration_restarts": self.coordination_config.max_orchestration_restarts,
719
732
  }
720
733
 
734
+ # Handle debug fields
735
+ result["debug_final_answer"] = self.debug_final_answer
736
+
721
737
  # Handle message_templates serialization
722
738
  if self.message_templates is not None:
723
739
  try:
@@ -757,6 +773,9 @@ class AgentConfig:
757
773
  if coordination_data:
758
774
  coordination_config = CoordinationConfig(**coordination_data)
759
775
 
776
+ # Handle debug fields
777
+ debug_final_answer = data.get("debug_final_answer")
778
+
760
779
  # Handle message_templates
761
780
  message_templates = None
762
781
  template_data = data.get("message_templates")
@@ -765,17 +784,24 @@ class AgentConfig:
765
784
 
766
785
  message_templates = MessageTemplates(**template_data)
767
786
 
768
- return cls(
787
+ config = cls(
769
788
  backend_params=backend_params,
770
789
  message_templates=message_templates,
771
790
  agent_id=agent_id,
772
- custom_system_instruction=custom_system_instruction,
773
791
  voting_sensitivity=voting_sensitivity,
774
792
  max_new_answers_per_agent=max_new_answers_per_agent,
775
793
  answer_novelty_requirement=answer_novelty_requirement,
776
794
  timeout_config=timeout_config,
777
795
  coordination_config=coordination_config,
778
796
  )
797
+ config.debug_final_answer = debug_final_answer
798
+ return config
799
+
800
+ # Set custom_system_instruction separately to avoid deprecation warning
801
+ if custom_system_instruction is not None:
802
+ config._custom_system_instruction = custom_system_instruction
803
+
804
+ return config
779
805
 
780
806
 
781
807
  # =============================================================================
@@ -56,8 +56,10 @@ class APIParamsHandlerBase(ABC):
56
56
  # Filesystem manager parameters (handled by base class)
57
57
  "cwd",
58
58
  "agent_temporary_workspace",
59
+ "agent_temporary_workspace_parent",
59
60
  "context_paths",
60
61
  "context_write_access_enabled",
62
+ "enforce_read_before_delete",
61
63
  "enable_image_generation",
62
64
  "enable_mcp_command_line",
63
65
  "command_line_allowed_commands",
@@ -67,6 +69,7 @@ class APIParamsHandlerBase(ABC):
67
69
  "command_line_docker_memory_limit",
68
70
  "command_line_docker_cpu_limit",
69
71
  "command_line_docker_network_mode",
72
+ "command_line_docker_enable_sudo",
70
73
  # Backend identification (handled by orchestrator)
71
74
  "enable_audio_generation", # Audio generation parameter
72
75
  "type",
@@ -31,7 +31,13 @@ class ChatCompletionsAPIParamsHandler(APIParamsHandlerBase):
31
31
  """Get provider tools for Chat Completions format."""
32
32
  provider_tools = []
33
33
 
34
- if all_params.get("enable_web_search", False):
34
+ # Check if this is Grok backend - Grok uses extra_body.search_parameters instead of function tools
35
+ backend_provider = getattr(self.backend, "get_provider_name", lambda: "")()
36
+ is_grok = backend_provider.lower() == "grok"
37
+
38
+ # Add web_search function tool for non-Grok backends
39
+ # Grok handles web search via extra_body.search_parameters (set in grok.py)
40
+ if all_params.get("enable_web_search", False) and not is_grok:
35
41
  provider_tools.append(
36
42
  {
37
43
  "type": "function",
@@ -94,7 +94,7 @@ class AzureOpenAIBackend(LLMBackend):
94
94
  raise ValueError("Azure OpenAI requires a deployment name. Pass it as the 'model' parameter.")
95
95
 
96
96
  # Check if workflow tools are present
97
- workflow_tools = [t for t in tools if t.get("function", {}).get("name") in ["new_answer", "vote"]] if tools else []
97
+ workflow_tools = [t for t in tools if t.get("function", {}).get("name") in ["new_answer", "vote", "submit", "restart_orchestration"]] if tools else []
98
98
  has_workflow_tools = len(workflow_tools) > 0
99
99
 
100
100
  # Modify messages to include workflow tool instructions if needed
@@ -270,6 +270,14 @@ class AzureOpenAIBackend(LLMBackend):
270
270
  system_parts.append(f' Usage: {{"tool_name": "vote", ' f'"arguments": {{"agent_id": "agent1", ' f'"reason": "explanation"}}}} // Choose agent_id from: {agent_list}')
271
271
  else:
272
272
  system_parts.append(' Usage: {"tool_name": "vote", ' '"arguments": {"agent_id": "agent1", ' '"reason": "explanation"}}')
273
+ elif name == "submit":
274
+ system_parts.append(
275
+ ' Usage: {"tool_name": "submit", ' '"arguments": {"confirmed": true}}',
276
+ )
277
+ elif name == "restart_orchestration":
278
+ system_parts.append(
279
+ ' Usage: {"tool_name": "restart_orchestration", ' '"arguments": {"reason": "The answer is incomplete because...", ' '"instructions": "In the next attempt, please..."}}',
280
+ )
273
281
 
274
282
  system_parts.append("\n--- MassGen Workflow Instructions ---")
275
283
  system_parts.append("IMPORTANT: You must respond with a structured JSON decision at the end of your response.")
massgen/backend/base.py CHANGED
@@ -70,6 +70,11 @@ class LLMBackend(ABC):
70
70
  # Planning mode flag - when True, MCP tools should be blocked during coordination
71
71
  self._planning_mode_enabled: bool = False
72
72
 
73
+ # Selective tool blocking - list of specific MCP tools to block during planning mode
74
+ # When planning_mode is enabled, only these specific tools are blocked
75
+ # If empty, ALL MCP tools are blocked (backward compatible behavior)
76
+ self._planning_mode_blocked_tools: set = set()
77
+
73
78
  self.token_calculator = TokenCostCalculator()
74
79
 
75
80
  # Filesystem manager integration
@@ -107,6 +112,7 @@ class LLMBackend(ABC):
107
112
  "command_line_docker_memory_limit": kwargs.get("command_line_docker_memory_limit"),
108
113
  "command_line_docker_cpu_limit": kwargs.get("command_line_docker_cpu_limit"),
109
114
  "command_line_docker_network_mode": network_mode,
115
+ "command_line_docker_enable_sudo": kwargs.get("command_line_docker_enable_sudo", False),
110
116
  "enable_audio_generation": kwargs.get("enable_audio_generation", False),
111
117
  }
112
118
 
@@ -183,8 +189,10 @@ class LLMBackend(ABC):
183
189
  # Filesystem manager parameters (handled by base class)
184
190
  "cwd",
185
191
  "agent_temporary_workspace",
192
+ "agent_temporary_workspace_parent",
186
193
  "context_paths",
187
194
  "context_write_access_enabled",
195
+ "enforce_read_before_delete",
188
196
  "enable_image_generation",
189
197
  "enable_mcp_command_line",
190
198
  "command_line_allowed_commands",
@@ -194,6 +202,7 @@ class LLMBackend(ABC):
194
202
  "command_line_docker_memory_limit",
195
203
  "command_line_docker_cpu_limit",
196
204
  "command_line_docker_network_mode",
205
+ "command_line_docker_enable_sudo",
197
206
  # Backend identification (handled by orchestrator)
198
207
  "type",
199
208
  "agent_id",
@@ -465,6 +474,53 @@ class LLMBackend(ABC):
465
474
  """
466
475
  return self._planning_mode_enabled
467
476
 
477
+ def set_planning_mode_blocked_tools(self, tool_names: set) -> None:
478
+ """
479
+ Set specific MCP tools to block during planning mode.
480
+
481
+ This enables selective tool blocking - only the specified tools will be blocked
482
+ when planning mode is enabled, allowing other MCP tools to be used.
483
+
484
+ Args:
485
+ tool_names: Set of MCP tool names to block (e.g., {'mcp__discord__discord_send'})
486
+ If empty set, ALL MCP tools are blocked (backward compatible)
487
+ """
488
+ self._planning_mode_blocked_tools = set(tool_names)
489
+
490
+ def get_planning_mode_blocked_tools(self) -> set:
491
+ """
492
+ Get the set of MCP tools currently blocked in planning mode.
493
+
494
+ Returns:
495
+ Set of blocked MCP tool names. Empty set means ALL MCP tools are blocked.
496
+ """
497
+ return self._planning_mode_blocked_tools.copy()
498
+
499
+ def is_mcp_tool_blocked(self, tool_name: str) -> bool:
500
+ """
501
+ Check if a specific MCP tool is blocked in planning mode.
502
+
503
+ Args:
504
+ tool_name: Name of the MCP tool to check (e.g., 'mcp__discord__discord_send')
505
+
506
+ Returns:
507
+ True if the tool should be blocked, False otherwise
508
+
509
+ Note:
510
+ - If planning mode is disabled, returns False (no blocking)
511
+ - If planning mode is enabled and blocked_tools is empty, returns True (block ALL)
512
+ - If planning mode is enabled and blocked_tools is set, returns True only if tool is in the set
513
+ """
514
+ if not self._planning_mode_enabled:
515
+ return False
516
+
517
+ # Empty set means block ALL MCP tools (backward compatible behavior)
518
+ if not self._planning_mode_blocked_tools:
519
+ return True
520
+
521
+ # Otherwise, block only if tool is in the blocked set
522
+ return tool_name in self._planning_mode_blocked_tools
523
+
468
524
  async def _cleanup_client(self, client: Any) -> None:
469
525
  """Clean up OpenAI client resources."""
470
526
  try:
@@ -533,10 +533,10 @@ class CustomToolAndMCPBackend(LLMBackend):
533
533
  max_retries: int = 3,
534
534
  ) -> Tuple[str, Any]:
535
535
  """Execute MCP function with exponential backoff retry logic."""
536
- # Check if planning mode is enabled - block MCP tool execution during planning
537
- if self.is_planning_mode_enabled():
538
- logger.info(f"[MCP] Planning mode enabled - blocking MCP tool execution: {function_name}")
539
- error_str = "🚫 [MCP] Planning mode active - MCP tools blocked during coordination"
536
+ # Check if this specific MCP tool is blocked by planning mode
537
+ if self.is_mcp_tool_blocked(function_name):
538
+ logger.info(f"[MCP] Planning mode enabled - blocking MCP tool: {function_name}")
539
+ error_str = f"🚫 [MCP] Tool '{function_name}' blocked during coordination (planning mode active)"
540
540
  return error_str, {"error": error_str, "blocked_by": "planning_mode", "function_name": function_name}
541
541
 
542
542
  # Convert JSON string to dict for shared utility
@@ -137,13 +137,14 @@ BACKEND_CAPABILITIES: Dict[str, BackendCapabilities] = {
137
137
  builtin_tools=["web_search", "code_execution"],
138
138
  filesystem_support="mcp",
139
139
  models=[
140
+ "claude-haiku-4-5-20251001",
140
141
  "claude-sonnet-4-5-20250929",
142
+ "claude-opus-4-1-20250805",
141
143
  "claude-sonnet-4-20250514",
142
- "claude-opus-4-20250514",
143
144
  "claude-3-5-sonnet-latest",
144
145
  "claude-3-5-haiku-latest",
145
146
  ],
146
- default_model="claude-sonnet-4-20250514",
147
+ default_model="claude-sonnet-4-5-20250929",
147
148
  env_var="ANTHROPIC_API_KEY",
148
149
  notes="Web search and code execution are built-in tools. Audio/video understanding support (v0.0.30+).",
149
150
  ),
@@ -175,8 +176,8 @@ BACKEND_CAPABILITIES: Dict[str, BackendCapabilities] = {
175
176
  filesystem_support="native",
176
177
  models=[
177
178
  "claude-sonnet-4-5-20250929",
179
+ "claude-opus-4-1-20250805",
178
180
  "claude-sonnet-4-20250514",
179
- "claude-opus-4-20250514",
180
181
  ],
181
182
  default_model="claude-sonnet-4-5-20250929",
182
183
  env_var="ANTHROPIC_API_KEY",
@@ -218,12 +219,11 @@ BACKEND_CAPABILITIES: Dict[str, BackendCapabilities] = {
218
219
  filesystem_support="mcp",
219
220
  models=[
220
221
  "grok-4",
222
+ "grok-4-fast",
221
223
  "grok-3",
222
224
  "grok-3-mini",
223
- "grok-beta",
224
- "grok-vision-beta",
225
225
  ],
226
- default_model="grok-beta",
226
+ default_model="grok-4",
227
227
  env_var="XAI_API_KEY",
228
228
  notes="Web search includes real-time data access.",
229
229
  ),
@@ -229,18 +229,25 @@ class ChatCompletionsBackend(CustomToolAndMCPBackend):
229
229
  updated_messages = current_messages.copy()
230
230
  processed_call_ids = set() # Track processed calls
231
231
 
232
- # Check if planning mode is enabled - block MCP tool execution during planning
232
+ # Check if planning mode is enabled - selectively block MCP tool execution during planning
233
233
  if self.is_planning_mode_enabled():
234
- logger.info("[MCP] Planning mode enabled - blocking all MCP tool execution")
235
- yield StreamChunk(
236
- type="mcp_status",
237
- status="planning_mode_blocked",
238
- content="🚫 [MCP] Planning mode active - MCP tools blocked during coordination",
239
- source="planning_mode",
240
- )
241
- # Skip all MCP tool execution but still continue with workflow
242
- yield StreamChunk(type="done")
243
- return
234
+ blocked_tools = self.get_planning_mode_blocked_tools()
235
+
236
+ if not blocked_tools:
237
+ # Empty set means block ALL MCP tools (backward compatible)
238
+ logger.info("[ChatCompletions] Planning mode enabled - blocking ALL MCP tool execution")
239
+ yield StreamChunk(
240
+ type="mcp_status",
241
+ status="planning_mode_blocked",
242
+ content="🚫 [MCP] Planning mode active - all MCP tools blocked during coordination",
243
+ source="planning_mode",
244
+ )
245
+ # Skip all MCP tool execution but still continue with workflow
246
+ yield StreamChunk(type="done")
247
+ return
248
+ else:
249
+ # Selective blocking - log but continue to check each tool individually
250
+ logger.info(f"[ChatCompletions] Planning mode enabled - selective blocking of {len(blocked_tools)} tools")
244
251
 
245
252
  # Create single assistant message with all tool calls
246
253
  if captured_function_calls:
@@ -795,7 +795,7 @@ class ClaudeCodeBackend(LLMBackend):
795
795
 
796
796
  # Add workflow tools information if present
797
797
  if tools:
798
- workflow_tools = [t for t in tools if t.get("function", {}).get("name") in ["new_answer", "vote"]]
798
+ workflow_tools = [t for t in tools if t.get("function", {}).get("name") in ["new_answer", "vote", "submit", "restart_orchestration"]]
799
799
  if workflow_tools:
800
800
  system_parts.append("\n--- Coordination Actions ---")
801
801
  for tool in workflow_tools:
@@ -823,6 +823,14 @@ class ClaudeCodeBackend(LLMBackend):
823
823
  system_parts.append(f' Usage: {{"tool_name": "vote", ' f'"arguments": {{"agent_id": "agent1", ' f'"reason": "explanation"}}}} // Choose agent_id from: {agent_list}')
824
824
  else:
825
825
  system_parts.append(' Usage: {"tool_name": "vote", ' '"arguments": {"agent_id": "agent1", ' '"reason": "explanation"}}')
826
+ elif name == "submit":
827
+ system_parts.append(
828
+ ' Usage: {"tool_name": "submit", ' '"arguments": {"confirmed": true}}',
829
+ )
830
+ elif name == "restart_orchestration":
831
+ system_parts.append(
832
+ ' Usage: {"tool_name": "restart_orchestration", ' '"arguments": {"reason": "The answer is incomplete because...", ' '"instructions": "In the next attempt, please..."}}',
833
+ )
826
834
 
827
835
  system_parts.append("\n--- MassGen Coordination Instructions ---")
828
836
  system_parts.append("IMPORTANT: You must respond with a structured JSON decision at the end of your response.")
massgen/backend/gemini.py CHANGED
@@ -20,6 +20,7 @@ TECHNICAL SOLUTION:
20
20
  """
21
21
 
22
22
  import json
23
+ import logging
23
24
  import os
24
25
  import time
25
26
  from typing import Any, AsyncGenerator, Dict, List, Optional
@@ -39,6 +40,19 @@ from .gemini_mcp_manager import GeminiMCPManager
39
40
  from .gemini_trackers import MCPCallTracker, MCPResponseExtractor, MCPResponseTracker
40
41
  from .gemini_utils import CoordinationResponse
41
42
 
43
+
44
+ # Suppress Gemini SDK logger warning about non-text parts in response
45
+ # Using custom filter per https://github.com/googleapis/python-genai/issues/850
46
+ class NoFunctionCallWarning(logging.Filter):
47
+ def filter(self, record: logging.LogRecord) -> bool:
48
+ message = record.getMessage()
49
+ if "there are non-text parts in the response:" in message:
50
+ return False
51
+ return True
52
+
53
+
54
+ logging.getLogger("google_genai.types").addFilter(NoFunctionCallWarning())
55
+
42
56
  try:
43
57
  from pydantic import BaseModel, Field
44
58
  except ImportError:
@@ -220,6 +234,7 @@ class GeminiBackend(CustomToolAndMCPBackend):
220
234
 
221
235
  # Analyze tool types
222
236
  is_coordination = self.formatter.has_coordination_tools(tools)
237
+ is_post_evaluation = self.formatter.has_post_evaluation_tools(tools)
223
238
 
224
239
  valid_agent_ids = None
225
240
 
@@ -239,6 +254,9 @@ class GeminiBackend(CustomToolAndMCPBackend):
239
254
  # For coordination requests, modify the prompt to use structured output
240
255
  if is_coordination:
241
256
  full_content = self.formatter.build_structured_output_prompt(full_content, valid_agent_ids)
257
+ elif is_post_evaluation:
258
+ # For post-evaluation, modify prompt to use structured output
259
+ full_content = self.formatter.build_post_evaluation_prompt(full_content)
242
260
 
243
261
  # Use google-genai package
244
262
  client = genai.Client(api_key=self.api_key)
@@ -277,6 +295,16 @@ class GeminiBackend(CustomToolAndMCPBackend):
277
295
  else:
278
296
  # Tools or sessions are present; fallback to text parsing
279
297
  pass
298
+ elif is_post_evaluation:
299
+ # For post-evaluation, use JSON response format for structured decisions
300
+ from .gemini_utils import PostEvaluationResponse
301
+
302
+ if (not using_sdk_mcp) and (not using_custom_tools) and (not all_tools):
303
+ config["response_mime_type"] = "application/json"
304
+ config["response_schema"] = PostEvaluationResponse.model_json_schema()
305
+ else:
306
+ # Tools or sessions are present; fallback to text parsing
307
+ pass
280
308
  # Log messages being sent after builtin_tools is defined
281
309
  log_backend_agent_message(
282
310
  agent_id or "default",
@@ -387,6 +415,42 @@ class GeminiBackend(CustomToolAndMCPBackend):
387
415
  tools_to_apply.extend(mcp_sessions)
388
416
  sessions_applied = True
389
417
 
418
+ if self.is_planning_mode_enabled():
419
+ blocked_tools = self.get_planning_mode_blocked_tools()
420
+
421
+ if not blocked_tools:
422
+ # Empty set means block ALL MCP tools (backward compatible)
423
+ logger.info("[Gemini] Planning mode enabled - blocking ALL MCP tools during coordination")
424
+ # Don't set tools at all - this prevents any MCP tool execution
425
+ log_backend_activity(
426
+ "gemini",
427
+ "All MCP tools blocked in planning mode",
428
+ {
429
+ "blocked_tools": len(available_mcp_tools),
430
+ "session_count": len(mcp_sessions),
431
+ },
432
+ agent_id=agent_id,
433
+ )
434
+ else:
435
+ # Selective blocking - allow non-blocked tools to be called
436
+ # The execution layer (_execute_mcp_function_with_retry) will enforce blocking
437
+ # but we still register all tools so non-blocked ones can be used
438
+ logger.info(f"[Gemini] Planning mode enabled - allowing non-blocked MCP tools, blocking {len(blocked_tools)} specific tools")
439
+
440
+ # Pass all sessions - the backend's is_mcp_tool_blocked() will handle selective blocking
441
+ session_config["tools"] = mcp_sessions
442
+
443
+ log_backend_activity(
444
+ "gemini",
445
+ "Selective MCP tools blocked in planning mode",
446
+ {
447
+ "total_tools": len(available_mcp_tools),
448
+ "blocked_tools": len(blocked_tools),
449
+ "allowed_tools": len(available_mcp_tools) - len(blocked_tools),
450
+ },
451
+ agent_id=agent_id,
452
+ )
453
+
390
454
  # Add custom tools (if available)
391
455
  if has_custom_tools:
392
456
  # Wrap FunctionDeclarations in a Tool object for Gemini SDK
@@ -1567,11 +1631,11 @@ class GeminiBackend(CustomToolAndMCPBackend):
1567
1631
 
1568
1632
  content = full_content_text
1569
1633
 
1570
- # Process tool calls - only coordination tool calls (MCP manual mode removed)
1634
+ # Process tool calls - coordination and post-evaluation tool calls (MCP manual mode removed)
1571
1635
  tool_calls_detected: List[Dict[str, Any]] = []
1572
1636
 
1573
- # Then, process coordination tools if present
1574
- if is_coordination and content.strip() and not tool_calls_detected:
1637
+ # Process coordination tools OR post-evaluation tools if present
1638
+ if (is_coordination or is_post_evaluation) and content.strip() and not tool_calls_detected:
1575
1639
  # For structured output mode, the entire content is JSON
1576
1640
  structured_response = None
1577
1641
  # Try multiple parsing strategies
@@ -1590,14 +1654,15 @@ class GeminiBackend(CustomToolAndMCPBackend):
1590
1654
  # Log conversion to tool calls (summary)
1591
1655
  log_stream_chunk("backend.gemini", "tool_calls", tool_calls, agent_id)
1592
1656
 
1593
- # Log each coordination tool call for analytics/debugging
1657
+ # Log each tool call for analytics/debugging
1658
+ tool_type = "post_evaluation" if is_post_evaluation else "coordination"
1594
1659
  try:
1595
1660
  for tool_call in tool_calls:
1596
1661
  log_tool_call(
1597
1662
  agent_id,
1598
- tool_call.get("function", {}).get("name", "unknown_coordination_tool"),
1663
+ tool_call.get("function", {}).get("name", f"unknown_{tool_type}_tool"),
1599
1664
  tool_call.get("function", {}).get("arguments", {}),
1600
- result="coordination_tool_called",
1665
+ result=f"{tool_type}_tool_called",
1601
1666
  backend_name="gemini",
1602
1667
  )
1603
1668
  except Exception:
@@ -20,6 +20,13 @@ class ActionType(enum.Enum):
20
20
  NEW_ANSWER = "new_answer"
21
21
 
22
22
 
23
+ class PostEvaluationActionType(enum.Enum):
24
+ """Action types for post-evaluation structured output."""
25
+
26
+ SUBMIT = "submit"
27
+ RESTART = "restart"
28
+
29
+
23
30
  class VoteAction(BaseModel):
24
31
  """Structured output for voting action."""
25
32
 
@@ -41,3 +48,26 @@ class CoordinationResponse(BaseModel):
41
48
  action_type: ActionType = Field(description="Type of action to take")
42
49
  vote_data: Optional[VoteAction] = Field(default=None, description="Vote data if action is vote")
43
50
  answer_data: Optional[NewAnswerAction] = Field(default=None, description="Answer data if action is new_answer")
51
+
52
+
53
+ class SubmitAction(BaseModel):
54
+ """Structured output for submit action (post-evaluation)."""
55
+
56
+ action: PostEvaluationActionType = Field(default=PostEvaluationActionType.SUBMIT, description="Action type")
57
+ confirmed: bool = Field(default=True, description="Confirmation that answer is satisfactory")
58
+
59
+
60
+ class RestartAction(BaseModel):
61
+ """Structured output for restart action (post-evaluation)."""
62
+
63
+ action: PostEvaluationActionType = Field(default=PostEvaluationActionType.RESTART, description="Action type")
64
+ reason: str = Field(description="Clear explanation of why the answer is insufficient")
65
+ instructions: str = Field(description="Detailed, actionable guidance for agents on the next attempt")
66
+
67
+
68
+ class PostEvaluationResponse(BaseModel):
69
+ """Structured response for post-evaluation actions."""
70
+
71
+ action_type: PostEvaluationActionType = Field(description="Type of post-evaluation action to take")
72
+ submit_data: Optional[SubmitAction] = Field(default=None, description="Submit data if action is submit")
73
+ restart_data: Optional[RestartAction] = Field(default=None, description="Restart data if action is restart")