massgen 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. massgen/__init__.py +1 -1
  2. massgen/agent_config.py +33 -7
  3. massgen/api_params_handler/_api_params_handler_base.py +3 -0
  4. massgen/api_params_handler/_chat_completions_api_params_handler.py +7 -1
  5. massgen/backend/azure_openai.py +9 -1
  6. massgen/backend/base.py +56 -0
  7. massgen/backend/base_with_custom_tool_and_mcp.py +4 -4
  8. massgen/backend/capabilities.py +6 -6
  9. massgen/backend/chat_completions.py +18 -11
  10. massgen/backend/claude_code.py +9 -1
  11. massgen/backend/gemini.py +71 -6
  12. massgen/backend/gemini_utils.py +30 -0
  13. massgen/backend/grok.py +39 -6
  14. massgen/backend/response.py +18 -11
  15. massgen/chat_agent.py +9 -3
  16. massgen/cli.py +319 -43
  17. massgen/config_builder.py +163 -18
  18. massgen/configs/README.md +78 -20
  19. massgen/configs/basic/multi/three_agents_default.yaml +2 -2
  20. massgen/configs/debug/restart_test_controlled.yaml +60 -0
  21. massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
  22. massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
  23. massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
  24. massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
  25. massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
  26. massgen/configs/tools/custom_tools/crawl4ai_mcp_example.yaml +67 -0
  27. massgen/configs/tools/custom_tools/crawl4ai_multi_agent_example.yaml +68 -0
  28. massgen/configs/tools/custom_tools/multimodal_tools/playwright_with_img_understanding.yaml +98 -0
  29. massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
  30. massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
  31. massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
  32. massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
  33. massgen/configs/tools/custom_tools/multimodal_tools/understand_video_example.yaml +54 -0
  34. massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
  35. massgen/configs/tools/memory/README.md +199 -0
  36. massgen/configs/tools/memory/gpt5mini_gemini_context_window_management.yaml +131 -0
  37. massgen/configs/tools/memory/gpt5mini_gemini_no_persistent_memory.yaml +133 -0
  38. massgen/configs/tools/memory/test_context_window_management.py +286 -0
  39. massgen/configs/tools/multimodal/gpt5mini_gpt5nano_documentation_evolution.yaml +97 -0
  40. massgen/configs/tools/planning/five_agents_discord_mcp_planning_mode.yaml +7 -29
  41. massgen/configs/tools/planning/five_agents_filesystem_mcp_planning_mode.yaml +5 -6
  42. massgen/configs/tools/planning/five_agents_notion_mcp_planning_mode.yaml +4 -4
  43. massgen/configs/tools/planning/five_agents_twitter_mcp_planning_mode.yaml +4 -4
  44. massgen/configs/tools/planning/gpt5_mini_case_study_mcp_planning_mode.yaml +2 -2
  45. massgen/docker/README.md +83 -0
  46. massgen/filesystem_manager/_code_execution_server.py +22 -7
  47. massgen/filesystem_manager/_docker_manager.py +21 -1
  48. massgen/filesystem_manager/_filesystem_manager.py +8 -0
  49. massgen/filesystem_manager/_workspace_tools_server.py +0 -997
  50. massgen/formatter/_gemini_formatter.py +73 -0
  51. massgen/frontend/coordination_ui.py +175 -257
  52. massgen/frontend/displays/base_display.py +29 -0
  53. massgen/frontend/displays/rich_terminal_display.py +155 -9
  54. massgen/frontend/displays/simple_display.py +21 -0
  55. massgen/frontend/displays/terminal_display.py +22 -2
  56. massgen/logger_config.py +50 -6
  57. massgen/message_templates.py +123 -3
  58. massgen/orchestrator.py +652 -44
  59. massgen/tests/test_code_execution.py +178 -0
  60. massgen/tests/test_intelligent_planning_mode.py +643 -0
  61. massgen/tests/test_orchestration_restart.py +204 -0
  62. massgen/token_manager/token_manager.py +13 -4
  63. massgen/tool/__init__.py +4 -0
  64. massgen/tool/_multimodal_tools/understand_audio.py +193 -0
  65. massgen/tool/_multimodal_tools/understand_file.py +550 -0
  66. massgen/tool/_multimodal_tools/understand_image.py +212 -0
  67. massgen/tool/_multimodal_tools/understand_video.py +313 -0
  68. massgen/tool/docs/multimodal_tools.md +779 -0
  69. massgen/tool/workflow_toolkits/__init__.py +26 -0
  70. massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
  71. massgen/utils.py +1 -0
  72. {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/METADATA +57 -52
  73. {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/RECORD +77 -49
  74. {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/WHEEL +0 -0
  75. {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/entry_points.txt +0 -0
  76. {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/licenses/LICENSE +0 -0
  77. {massgen-0.1.1.dist-info → massgen-0.1.3.dist-info}/top_level.txt +0 -0
massgen/orchestrator.py CHANGED
@@ -44,7 +44,7 @@ from .logger_config import (
44
44
  )
45
45
  from .message_templates import MessageTemplates
46
46
  from .stream_chunk import ChunkType
47
- from .tool import get_workflow_tools
47
+ from .tool import get_post_evaluation_tools, get_workflow_tools
48
48
  from .utils import ActionType, AgentStatus, CoordinationStage
49
49
 
50
50
 
@@ -164,6 +164,14 @@ class Orchestrator(ChatAgent):
164
164
  self.is_orchestrator_timeout: bool = False
165
165
  self.timeout_reason: Optional[str] = None
166
166
 
167
+ # Restart feature state tracking
168
+ self.current_attempt: int = 0
169
+ max_restarts = self.config.coordination_config.max_orchestration_restarts
170
+ self.max_attempts: int = 1 + max_restarts
171
+ self.restart_pending: bool = False
172
+ self.restart_reason: Optional[str] = None
173
+ self.restart_instructions: Optional[str] = None
174
+
167
175
  # Coordination state tracking for cleanup
168
176
  self._active_streams: Dict = {}
169
177
  self._active_tasks: Dict = {}
@@ -264,10 +272,41 @@ class Orchestrator(ChatAgent):
264
272
  self.coordination_tracker.initialize_session(list(self.agents.keys()), self.current_task)
265
273
  self.workflow_phase = "coordinating"
266
274
 
275
+ # Reset restart_pending flag at start of coordination (will be set again if restart needed)
276
+ self.restart_pending = False
277
+
267
278
  # Clear agent workspaces for new turn (if this is a multi-turn conversation with history)
268
279
  if conversation_context and conversation_context.get("conversation_history"):
269
280
  self._clear_agent_workspaces()
270
281
 
282
+ # Check if planning mode is enabled in config
283
+ planning_mode_config_exists = (
284
+ self.config.coordination_config and self.config.coordination_config.enable_planning_mode if self.config and hasattr(self.config, "coordination_config") else False
285
+ )
286
+
287
+ if planning_mode_config_exists:
288
+ # Analyze question for irreversibility and set planning mode accordingly
289
+ # This happens silently - users don't see this analysis
290
+ analysis_result = await self._analyze_question_irreversibility(user_message, conversation_context)
291
+ has_irreversible = analysis_result["has_irreversible"]
292
+ blocked_tools = analysis_result["blocked_tools"]
293
+
294
+ # Set planning mode and blocked tools for all agents based on analysis
295
+ for agent_id, agent in self.agents.items():
296
+ if hasattr(agent.backend, "set_planning_mode"):
297
+ agent.backend.set_planning_mode(has_irreversible)
298
+ if hasattr(agent.backend, "set_planning_mode_blocked_tools"):
299
+ agent.backend.set_planning_mode_blocked_tools(blocked_tools)
300
+ log_orchestrator_activity(
301
+ self.orchestrator_id,
302
+ f"Set planning mode for {agent_id}",
303
+ {
304
+ "planning_mode_enabled": has_irreversible,
305
+ "blocked_tools_count": len(blocked_tools),
306
+ "reason": "irreversibility analysis",
307
+ },
308
+ )
309
+
271
310
  async for chunk in self._coordinate_agents_with_timeout(conversation_context):
272
311
  yield chunk
273
312
 
@@ -336,8 +375,299 @@ class Orchestrator(ChatAgent):
336
375
  if log_session_dir:
337
376
  self.coordination_tracker.save_coordination_logs(log_session_dir)
338
377
 
378
+ def _format_planning_mode_ui(
379
+ self,
380
+ has_irreversible: bool,
381
+ blocked_tools: set,
382
+ has_isolated_workspaces: bool,
383
+ user_question: str,
384
+ ) -> str:
385
+ """
386
+ Format a nice UI box for planning mode status.
387
+
388
+ Args:
389
+ has_irreversible: Whether irreversible operations were detected
390
+ blocked_tools: Set of specific blocked tool names
391
+ has_isolated_workspaces: Whether agents have isolated workspaces
392
+ user_question: The user's question for context
393
+
394
+ Returns:
395
+ Formatted string with nice box UI
396
+ """
397
+ if not has_irreversible:
398
+ # Planning mode disabled - brief message
399
+ box = "\n╭─ Coordination Mode ────────────────────────────────────────╮\n"
400
+ box += "│ ✅ Planning Mode: DISABLED │\n"
401
+ box += "│ │\n"
402
+ box += "│ All tools available during coordination. │\n"
403
+ box += "│ No irreversible operations detected. │\n"
404
+ box += "╰────────────────────────────────────────────────────────────╯\n"
405
+ return box
406
+
407
+ # Planning mode enabled
408
+ box = "\n╭─ Coordination Mode ────────────────────────────────────────╮\n"
409
+ box += "│ 🧠 Planning Mode: ENABLED │\n"
410
+ box += "│ │\n"
411
+
412
+ if has_isolated_workspaces:
413
+ box += "│ 🔒 Workspace: Isolated (filesystem ops allowed) │\n"
414
+ box += "│ │\n"
415
+
416
+ # Description
417
+ box += "│ Agents will plan and coordinate without executing │\n"
418
+ box += "│ irreversible actions. The winning agent will implement │\n"
419
+ box += "│ the plan during final presentation. │\n"
420
+ box += "│ │\n"
421
+
422
+ # Blocked tools section
423
+ if blocked_tools:
424
+ box += "│ 🚫 Blocked Tools: │\n"
425
+ # Format tools into nice columns
426
+ sorted_tools = sorted(blocked_tools)
427
+ for i, tool in enumerate(sorted_tools[:5], 1): # Show max 5 tools
428
+ # Shorten tool name if too long
429
+ display_tool = tool if len(tool) <= 50 else tool[:47] + "..."
430
+ box += f"│ {i}. {display_tool:<54} │\n"
431
+
432
+ if len(sorted_tools) > 5:
433
+ remaining = len(sorted_tools) - 5
434
+ box += f"│ ... and {remaining} more tool(s) │\n"
435
+ box += "│ │\n"
436
+ else:
437
+ box += "│ 🚫 Blocking: ALL MCP tools │\n"
438
+ box += "│ │\n"
439
+
440
+ # Add brief analysis summary
441
+ box += "│ 📊 Analysis: │\n"
442
+ # Create a brief summary from the question
443
+ summary = user_question[:50] + "..." if len(user_question) > 50 else user_question
444
+ # Wrap text to fit in box
445
+ words = summary.split()
446
+ line = "│ "
447
+ for word in words:
448
+ if len(line) + len(word) + 1 > 60:
449
+ box += line.ljust(61) + "│\n"
450
+ line = "│ " + word + " "
451
+ else:
452
+ line += word + " "
453
+ if len(line) > 4: # If there's content
454
+ box += line.ljust(61) + "│\n"
455
+
456
+ box += "╰────────────────────────────────────────────────────────────╯\n"
457
+ return box
458
+
459
+ async def _analyze_question_irreversibility(self, user_question: str, conversation_context: Dict[str, Any]) -> Dict[str, Any]:
460
+ """
461
+ Analyze if the user's question involves MCP tools with irreversible outcomes.
462
+
463
+ This method randomly selects an available agent to analyze whether executing
464
+ the user's question would involve MCP tool operations with irreversible outcomes
465
+ (e.g., sending Discord messages, posting tweets, deleting files) vs reversible
466
+ read operations (e.g., reading Discord messages, searching tweets, listing files).
467
+
468
+ Args:
469
+ user_question: The user's question/request
470
+ conversation_context: Full conversation context including history
471
+
472
+ Returns:
473
+ Dict with:
474
+ - has_irreversible (bool): True if irreversible operations detected
475
+ - blocked_tools (set): Set of MCP tool names to block (e.g., {'mcp__discord__discord_send'})
476
+ Empty set means block ALL MCP tools
477
+ """
478
+ import random
479
+
480
+ print("=" * 80, flush=True)
481
+ print("🔍 [INTELLIGENT PLANNING MODE] Analyzing question for irreversibility...", flush=True)
482
+ print(f"📝 Question: {user_question[:100]}{'...' if len(user_question) > 100 else ''}", flush=True)
483
+ print("=" * 80, flush=True)
484
+
485
+ # Select a random agent for analysis
486
+ available_agents = [aid for aid, agent in self.agents.items() if agent.backend is not None]
487
+ if not available_agents:
488
+ # No agents available, default to safe mode (planning enabled, block ALL)
489
+ log_orchestrator_activity(
490
+ self.orchestrator_id,
491
+ "No agents available for irreversibility analysis, defaulting to planning mode",
492
+ {},
493
+ )
494
+ return {"has_irreversible": True, "blocked_tools": set()}
495
+
496
+ analyzer_agent_id = random.choice(available_agents)
497
+ analyzer_agent = self.agents[analyzer_agent_id]
498
+
499
+ print(f"🤖 Selected analyzer agent: {analyzer_agent_id}", flush=True)
500
+
501
+ # Check if agents have isolated workspaces
502
+ has_isolated_workspaces = False
503
+ workspace_info = []
504
+ for agent_id, agent in self.agents.items():
505
+ if agent.backend and agent.backend.filesystem_manager:
506
+ cwd = agent.backend.filesystem_manager.cwd
507
+ if cwd and "workspace" in os.path.basename(cwd).lower():
508
+ has_isolated_workspaces = True
509
+ workspace_info.append(f"{agent_id}: {cwd}")
510
+
511
+ if has_isolated_workspaces:
512
+ print("🔒 Detected isolated agent workspaces - filesystem ops will be allowed", flush=True)
513
+
514
+ log_orchestrator_activity(
515
+ self.orchestrator_id,
516
+ "Analyzing question irreversibility",
517
+ {
518
+ "analyzer_agent": analyzer_agent_id,
519
+ "question_preview": user_question[:100] + "..." if len(user_question) > 100 else user_question,
520
+ "has_isolated_workspaces": has_isolated_workspaces,
521
+ },
522
+ )
523
+
524
+ # Build analysis prompt - now asking for specific tool names
525
+ workspace_context = ""
526
+ if has_isolated_workspaces:
527
+ workspace_context = """
528
+ IMPORTANT - ISOLATED WORKSPACES:
529
+ The agents are working in isolated temporary workspaces (directories containing "workspace" in their name).
530
+ Filesystem operations (read_file, write_file, delete_file, list_files, etc.) within these isolated workspaces are SAFE and REVERSIBLE.
531
+ They should NOT be blocked because:
532
+ - These are temporary directories specific to this coordination session
533
+ - Files created/modified are isolated from external systems
534
+ - Changes are contained within the agent's sandbox
535
+ - The workspace can be cleared after coordination
536
+
537
+ Only block filesystem operations if they explicitly target paths OUTSIDE the isolated workspace.
538
+ """
539
+
540
+ analysis_prompt = f"""You are analyzing whether a user's request involves operations with irreversible outcomes.
541
+
542
+ USER REQUEST:
543
+ {user_question}
544
+ {workspace_context}
545
+ CONTEXT:
546
+ Your task is to determine if executing this request would involve MCP (Model Context Protocol) tools that have irreversible outcomes, and if so, identify which specific tools should be blocked.
547
+
548
+ MCP tools follow the naming convention: mcp__<server>__<tool_name>
549
+ Examples:
550
+ - mcp__discord__discord_send (irreversible - sends messages)
551
+ - mcp__discord__discord_read_channel (reversible - reads messages)
552
+ - mcp__twitter__post_tweet (irreversible - posts publicly)
553
+ - mcp__twitter__search_tweets (reversible - searches)
554
+ - mcp__filesystem__write_file (SAFE in isolated workspace - writes to temporary files)
555
+ - mcp__filesystem__read_file (reversible - reads files)
556
+
557
+ IRREVERSIBLE OPERATIONS:
558
+ - Sending messages (discord_send, slack_send, etc.)
559
+ - Posting content publicly (post_tweet, create_post, etc.)
560
+ - Deleting files or data OUTSIDE isolated workspace (delete_file on external paths, remove_data, etc.)
561
+ - Modifying external systems (write_file to external paths, update_record, etc.)
562
+ - Creating permanent records (create_issue, add_comment, etc.)
563
+ - Executing commands that change state (run_command, execute_script, etc.)
564
+
565
+ REVERSIBLE OPERATIONS (DO NOT BLOCK):
566
+ - Reading messages or data (read_channel, get_messages, etc.)
567
+ - Searching or querying information (search_tweets, query_data, etc.)
568
+ - Listing files or resources (list_files, list_channels, etc.)
569
+ - Fetching data from APIs (get_user, fetch_data, etc.)
570
+ - Viewing information (view_channel, get_info, etc.)
571
+ - Filesystem operations IN ISOLATED WORKSPACE (write_file, read_file, delete_file, list_files when in workspace*)
572
+
573
+ Respond in this EXACT format:
574
+ IRREVERSIBLE: YES/NO
575
+ BLOCKED_TOOLS: tool1, tool2, tool3
576
+
577
+ If IRREVERSIBLE is NO, leave BLOCKED_TOOLS empty.
578
+ If IRREVERSIBLE is YES, list the specific MCP tool names that should be blocked (e.g., mcp__discord__discord_send).
579
+
580
+ Your answer:"""
581
+
582
+ # Create messages for the analyzer
583
+ analysis_messages = [
584
+ {"role": "user", "content": analysis_prompt},
585
+ ]
586
+
587
+ try:
588
+ # Stream response from analyzer agent (but don't show to user)
589
+ response_text = ""
590
+ async for chunk in analyzer_agent.backend.stream_with_tools(
591
+ messages=analysis_messages,
592
+ tools=[], # No tools needed for simple analysis
593
+ agent_id=analyzer_agent_id,
594
+ ):
595
+ if chunk.type == "content" and chunk.content:
596
+ response_text += chunk.content
597
+
598
+ # Parse response
599
+ response_clean = response_text.strip()
600
+ has_irreversible = False
601
+ blocked_tools = set()
602
+
603
+ # Parse IRREVERSIBLE line
604
+ found_irreversible_line = False
605
+ for line in response_clean.split("\n"):
606
+ line = line.strip()
607
+ if line.startswith("IRREVERSIBLE:"):
608
+ found_irreversible_line = True
609
+ # Extract the value after the colon
610
+ value = line.split(":", 1)[1].strip().upper()
611
+ # Check if the first word is YES
612
+ has_irreversible = value.startswith("YES")
613
+ elif line.startswith("BLOCKED_TOOLS:"):
614
+ # Extract tool names after the colon
615
+ tools_part = line.split(":", 1)[1].strip()
616
+ if tools_part:
617
+ # Split by comma and clean up whitespace
618
+ blocked_tools = {tool.strip() for tool in tools_part.split(",") if tool.strip()}
619
+
620
+ # Fallback: If no structured format found, look for YES/NO in the response
621
+ if not found_irreversible_line:
622
+ print("⚠️ [WARNING] No 'IRREVERSIBLE:' line found, using fallback parsing", flush=True)
623
+ response_upper = response_clean.upper()
624
+ # Look for clear YES/NO indicators
625
+ if "YES" in response_upper and "NO" not in response_upper:
626
+ has_irreversible = True
627
+ elif "NO" in response_upper:
628
+ has_irreversible = False
629
+ else:
630
+ # Default to safe mode if unclear
631
+ has_irreversible = True
632
+
633
+ log_orchestrator_activity(
634
+ self.orchestrator_id,
635
+ "Irreversibility analysis complete",
636
+ {
637
+ "analyzer_agent": analyzer_agent_id,
638
+ "response": response_clean[:100],
639
+ "has_irreversible": has_irreversible,
640
+ "blocked_tools_count": len(blocked_tools),
641
+ },
642
+ )
643
+
644
+ # Display nice UI box for planning mode status
645
+ ui_box = self._format_planning_mode_ui(
646
+ has_irreversible=has_irreversible,
647
+ blocked_tools=blocked_tools,
648
+ has_isolated_workspaces=has_isolated_workspaces,
649
+ user_question=user_question,
650
+ )
651
+ print(ui_box, flush=True)
652
+
653
+ return {"has_irreversible": has_irreversible, "blocked_tools": blocked_tools}
654
+
655
+ except Exception as e:
656
+ # On error, default to safe mode (planning enabled, block ALL)
657
+ log_orchestrator_activity(
658
+ self.orchestrator_id,
659
+ "Irreversibility analysis failed, defaulting to planning mode",
660
+ {"error": str(e)},
661
+ )
662
+ return {"has_irreversible": True, "blocked_tools": set()}
663
+
339
664
  async def _coordinate_agents_with_timeout(self, conversation_context: Optional[Dict[str, Any]] = None) -> AsyncGenerator[StreamChunk, None]:
340
- """Execute coordination with orchestrator-level timeout protection."""
665
+ """Execute coordination with orchestrator-level timeout protection.
666
+
667
+ When restart is needed, this method completes and returns control to CLI,
668
+ which will call coordinate() again (similar to multiturn pattern).
669
+ """
670
+ # Reset timing and state for this attempt
341
671
  self.coordination_start_time = time.time()
342
672
  self.total_tokens = 0
343
673
  self.is_orchestrator_timeout = False
@@ -345,13 +675,19 @@ class Orchestrator(ChatAgent):
345
675
 
346
676
  log_orchestrator_activity(
347
677
  self.orchestrator_id,
348
- "Starting coordination with timeout",
678
+ f"Starting coordination attempt {self.current_attempt + 1}/{self.max_attempts}",
349
679
  {
350
680
  "timeout_seconds": self.config.timeout_config.orchestrator_timeout_seconds,
351
681
  "agents": list(self.agents.keys()),
682
+ "has_restart_context": bool(self.restart_reason),
352
683
  },
353
684
  )
354
685
 
686
+ # Set log attempt for directory organization
687
+ from massgen.logger_config import set_log_attempt
688
+
689
+ set_log_attempt(self.current_attempt + 1)
690
+
355
691
  # Track active coordination state for cleanup
356
692
  self._active_streams = {}
357
693
  self._active_tasks = {}
@@ -385,6 +721,8 @@ class Orchestrator(ChatAgent):
385
721
  async for chunk in self._handle_orchestrator_timeout():
386
722
  yield chunk
387
723
 
724
+ # Exit here - if restart is needed, CLI will call coordinate() again
725
+
388
726
  async def _coordinate_agents(self, conversation_context: Optional[Dict[str, Any]] = None) -> AsyncGenerator[StreamChunk, None]:
389
727
  """Execute unified MassGen coordination workflow with real-time streaming."""
390
728
  log_coordination_step(
@@ -1352,10 +1690,16 @@ class Orchestrator(ChatAgent):
1352
1690
 
1353
1691
  # Extract command execution parameters
1354
1692
  enable_command_execution = False
1693
+ docker_mode = False
1694
+ enable_sudo = False
1355
1695
  if hasattr(agent, "config") and agent.config:
1356
1696
  enable_command_execution = agent.config.backend_params.get("enable_mcp_command_line", False)
1697
+ docker_mode = agent.config.backend_params.get("command_line_execution_mode", "local") == "docker"
1698
+ enable_sudo = agent.config.backend_params.get("command_line_docker_enable_sudo", False)
1357
1699
  elif hasattr(agent, "backend") and hasattr(agent.backend, "backend_params"):
1358
1700
  enable_command_execution = agent.backend.backend_params.get("enable_mcp_command_line", False)
1701
+ docker_mode = agent.backend.backend_params.get("command_line_execution_mode", "local") == "docker"
1702
+ enable_sudo = agent.backend.backend_params.get("command_line_docker_enable_sudo", False)
1359
1703
 
1360
1704
  filesystem_system_message = self.message_templates.filesystem_system_message(
1361
1705
  main_workspace=main_workspace,
@@ -1366,6 +1710,8 @@ class Orchestrator(ChatAgent):
1366
1710
  enable_image_generation=enable_image_generation,
1367
1711
  agent_answers=answers,
1368
1712
  enable_command_execution=enable_command_execution,
1713
+ docker_mode=docker_mode,
1714
+ enable_sudo=enable_sudo,
1369
1715
  )
1370
1716
  agent_system_message = f"{agent_system_message}\n\n{filesystem_system_message}" if agent_system_message else filesystem_system_message
1371
1717
 
@@ -1379,17 +1725,17 @@ class Orchestrator(ChatAgent):
1379
1725
  logger.info(f"[Orchestrator] Agent {agent_id} sees no existing answers")
1380
1726
 
1381
1727
  # Check if planning mode is enabled for coordination phase
1728
+ # Use the ACTUAL backend planning mode status (set by intelligent analysis)
1729
+ # instead of the static config setting
1382
1730
  is_coordination_phase = self.workflow_phase == "coordinating"
1383
- planning_mode_enabled = (
1384
- self.config.coordination_config and self.config.coordination_config.enable_planning_mode and is_coordination_phase
1385
- if self.config and hasattr(self.config, "coordination_config")
1386
- else False
1387
- )
1731
+ planning_mode_enabled = agent.backend.is_planning_mode_enabled() if is_coordination_phase else False
1388
1732
 
1389
1733
  # Add planning mode instructions to system message if enabled
1390
- if planning_mode_enabled and self.config.coordination_config.planning_mode_instruction:
1734
+ # Only add instructions if we have a coordination config with planning instruction
1735
+ if planning_mode_enabled and self.config and hasattr(self.config, "coordination_config") and self.config.coordination_config and self.config.coordination_config.planning_mode_instruction:
1391
1736
  planning_instructions = f"\n\n{self.config.coordination_config.planning_mode_instruction}"
1392
1737
  agent_system_message = f"{agent_system_message}{planning_instructions}" if agent_system_message else planning_instructions.strip()
1738
+ print(f"📝 [{agent_id}] Adding planning mode instructions to system message", flush=True)
1393
1739
 
1394
1740
  # Build conversation with context support
1395
1741
  if conversation_context and conversation_context.get("conversation_history"):
@@ -1410,6 +1756,15 @@ class Orchestrator(ChatAgent):
1410
1756
  base_system_message=agent_system_message,
1411
1757
  )
1412
1758
 
1759
+ # Inject restart context if this is a restart attempt (like multi-turn context)
1760
+ if self.restart_reason and self.restart_instructions:
1761
+ restart_context = self.message_templates.format_restart_context(
1762
+ self.restart_reason,
1763
+ self.restart_instructions,
1764
+ )
1765
+ # Prepend restart context to user message
1766
+ conversation["user_message"] = restart_context + "\n\n" + conversation["user_message"]
1767
+
1413
1768
  # Track all the context used for this agent execution
1414
1769
  self.coordination_tracker.track_agent_context(
1415
1770
  agent_id,
@@ -1891,48 +2246,81 @@ class Orchestrator(ChatAgent):
1891
2246
  return ("error", str(e))
1892
2247
 
1893
2248
  async def _present_final_answer(self) -> AsyncGenerator[StreamChunk, None]:
1894
- """Present the final coordinated answer."""
1895
- log_stream_chunk("orchestrator", "content", "## 🎯 Final Coordinated Answer\n")
1896
- yield StreamChunk(type="content", content="## 🎯 Final Coordinated Answer\n")
2249
+ """Present the final coordinated answer with optional post-evaluation and restart loop."""
1897
2250
 
1898
2251
  # Select the best agent based on current state
1899
2252
  if not self._selected_agent:
1900
2253
  self._selected_agent = self._determine_final_agent_from_states()
1901
- if self._selected_agent:
1902
- log_stream_chunk(
1903
- "orchestrator",
1904
- "content",
1905
- f"🏆 Selected Agent: {self._selected_agent}\n",
1906
- )
1907
- yield StreamChunk(
1908
- type="content",
1909
- content=f"🏆 Selected Agent: {self._selected_agent}\n",
1910
- )
1911
-
1912
- if self._selected_agent and self._selected_agent in self.agent_states and self.agent_states[self._selected_agent].answer:
1913
- final_answer = self.agent_states[self._selected_agent].answer # NOTE: This is the raw answer from the winning agent, not the actual final answer.
1914
-
1915
- # Add to conversation history
1916
- self.add_to_history("assistant", final_answer)
1917
2254
 
1918
- log_stream_chunk("orchestrator", "content", f"🏆 Selected Agent: {self._selected_agent}\n")
1919
- yield StreamChunk(type="content", content=f"🏆 Selected Agent: {self._selected_agent}\n")
1920
- log_stream_chunk("orchestrator", "content", final_answer)
1921
- yield StreamChunk(type="content", content=final_answer)
1922
- log_stream_chunk(
1923
- "orchestrator",
1924
- "content",
1925
- f"\n\n---\n*Coordinated by {len(self.agents)} agents via MassGen framework*",
1926
- )
1927
- yield StreamChunk(
1928
- type="content",
1929
- content=f"\n\n---\n*Coordinated by {len(self.agents)} agents via MassGen framework*",
1930
- )
1931
- else:
2255
+ if not self._selected_agent:
1932
2256
  error_msg = "❌ Unable to provide coordinated answer - no successful agents"
1933
2257
  self.add_to_history("assistant", error_msg)
1934
2258
  log_stream_chunk("orchestrator", "error", error_msg)
1935
2259
  yield StreamChunk(type="content", content=error_msg)
2260
+ self.workflow_phase = "presenting"
2261
+ log_stream_chunk("orchestrator", "done", None)
2262
+ yield StreamChunk(type="done")
2263
+ return
2264
+
2265
+ # Get vote results for presentation
2266
+ vote_results = self._get_vote_results()
2267
+
2268
+ log_stream_chunk("orchestrator", "content", "## 🎯 Final Coordinated Answer\n")
2269
+ yield StreamChunk(type="content", content="## 🎯 Final Coordinated Answer\n")
2270
+
2271
+ # Stream final presentation from winning agent
2272
+ log_stream_chunk("orchestrator", "content", f"🏆 Selected Agent: {self._selected_agent}\n")
2273
+ yield StreamChunk(type="content", content=f"🏆 Selected Agent: {self._selected_agent}\n")
2274
+
2275
+ # Stream the final presentation (with full tool support)
2276
+ presentation_content = ""
2277
+ async for chunk in self.get_final_presentation(self._selected_agent, vote_results):
2278
+ if chunk.type == "content" and chunk.content:
2279
+ presentation_content += chunk.content
2280
+ yield chunk
2281
+
2282
+ # Check if post-evaluation should run
2283
+ # Skip post-evaluation on final attempt (user clarification #4)
2284
+ is_final_attempt = self.current_attempt >= (self.max_attempts - 1)
2285
+ should_evaluate = self.max_attempts > 1 and not is_final_attempt
2286
+
2287
+ if should_evaluate:
2288
+ # Run post-evaluation
2289
+ final_answer_to_evaluate = self._final_presentation_content or presentation_content
2290
+ async for chunk in self.post_evaluate_answer(self._selected_agent, final_answer_to_evaluate):
2291
+ yield chunk
2292
+
2293
+ # Check if restart was requested
2294
+ if self.restart_pending and self.current_attempt < (self.max_attempts - 1):
2295
+ # Show restart banner
2296
+ restart_banner = f"""
2297
+
2298
+ 🔄 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
2299
+ ORCHESTRATION RESTART (Attempt {self.current_attempt + 2}/{self.max_attempts})
2300
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
2301
+
2302
+ REASON:
2303
+ {self.restart_reason}
2304
+
2305
+ INSTRUCTIONS FOR NEXT ATTEMPT:
2306
+ {self.restart_instructions}
2307
+
2308
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
2309
+
2310
+ """
2311
+ log_stream_chunk("orchestrator", "status", restart_banner)
2312
+ yield StreamChunk(type="restart_banner", content=restart_banner, source="orchestrator")
2313
+
2314
+ # Reset state for restart (prepare for next coordinate() call)
2315
+ self.handle_restart()
2316
+
2317
+ # Don't add to history or set workflow phase - restart is pending
2318
+ # Exit here - CLI will detect restart_pending and call coordinate() again
2319
+ return
2320
+
2321
+ # No restart - add final answer to conversation history
2322
+ if self._final_presentation_content:
2323
+ self.add_to_history("assistant", self._final_presentation_content)
1936
2324
 
1937
2325
  # Update workflow phase
1938
2326
  self.workflow_phase = "presenting"
@@ -2108,10 +2496,16 @@ class Orchestrator(ChatAgent):
2108
2496
 
2109
2497
  # Extract command execution parameters
2110
2498
  enable_command_execution = False
2499
+ docker_mode = False
2500
+ enable_sudo = False
2111
2501
  if hasattr(agent, "config") and agent.config:
2112
2502
  enable_command_execution = agent.config.backend_params.get("enable_mcp_command_line", False)
2503
+ docker_mode = agent.config.backend_params.get("command_line_execution_mode", "local") == "docker"
2504
+ enable_sudo = agent.config.backend_params.get("command_line_docker_enable_sudo", False)
2113
2505
  elif hasattr(agent, "backend") and hasattr(agent.backend, "backend_params"):
2114
2506
  enable_command_execution = agent.backend.backend_params.get("enable_mcp_command_line", False)
2507
+ docker_mode = agent.backend.backend_params.get("command_line_execution_mode", "local") == "docker"
2508
+ enable_sudo = agent.backend.backend_params.get("command_line_docker_enable_sudo", False)
2115
2509
  # Check if audio generation is enabled for this agent
2116
2510
  enable_audio_generation = False
2117
2511
  if hasattr(agent, "config") and agent.config:
@@ -2169,6 +2563,8 @@ class Orchestrator(ChatAgent):
2169
2563
  enable_image_generation=enable_image_generation,
2170
2564
  agent_answers=all_answers,
2171
2565
  enable_command_execution=enable_command_execution,
2566
+ docker_mode=docker_mode,
2567
+ enable_sudo=enable_sudo,
2172
2568
  )
2173
2569
  + "\n\n## Instructions\n"
2174
2570
  + base_system_message
@@ -2360,6 +2756,204 @@ class Orchestrator(ChatAgent):
2360
2756
  # Save logs
2361
2757
  self.save_coordination_logs()
2362
2758
 
2759
+ # Don't yield done here - let _present_final_answer handle final done after post-evaluation
2760
+
2761
+ async def post_evaluate_answer(self, selected_agent_id: str, final_answer: str) -> AsyncGenerator[StreamChunk, None]:
2762
+ """Post-evaluation phase where winning agent evaluates its own answer.
2763
+
2764
+ The agent reviews the final answer and decides whether to submit or restart
2765
+ with specific improvement instructions.
2766
+
2767
+ Args:
2768
+ selected_agent_id: The agent that won the vote and presented the answer
2769
+ final_answer: The final answer that was presented
2770
+
2771
+ Yields:
2772
+ StreamChunk: Stream chunks from the evaluation process
2773
+ """
2774
+ if selected_agent_id not in self.agents:
2775
+ log_stream_chunk("orchestrator", "error", f"Selected agent {selected_agent_id} not found for post-evaluation")
2776
+ yield StreamChunk(type="error", error=f"Selected agent {selected_agent_id} not found")
2777
+ return
2778
+
2779
+ agent = self.agents[selected_agent_id]
2780
+
2781
+ # Use debug override on first attempt if configured
2782
+ eval_answer = final_answer
2783
+ if self.config.debug_final_answer and self.current_attempt == 0:
2784
+ eval_answer = self.config.debug_final_answer
2785
+ log_stream_chunk("orchestrator", "debug", f"Using debug override for post-evaluation: {self.config.debug_final_answer}")
2786
+ yield StreamChunk(
2787
+ type="debug",
2788
+ content=f"[DEBUG MODE] Overriding answer for evaluation: {self.config.debug_final_answer}",
2789
+ source="orchestrator",
2790
+ )
2791
+
2792
+ # Build evaluation message
2793
+ evaluation_content = f"""{self.message_templates.format_original_message(self.current_task or "Task")}
2794
+
2795
+ FINAL ANSWER TO EVALUATE:
2796
+ {eval_answer}
2797
+
2798
+ Review this answer carefully and determine if it fully addresses the original task. Use your available tools to verify claims and check files as needed.
2799
+ Then call either submit(confirmed=True) if the answer is satisfactory, or restart_orchestration(reason, instructions) if improvements are needed."""
2800
+
2801
+ # Get agent's configurable system message
2802
+ agent_system_message = agent.get_configurable_system_message()
2803
+
2804
+ # Build post-evaluation system message
2805
+ base_system_message = self.message_templates.post_evaluation_system_message(agent_system_message)
2806
+
2807
+ # Add filesystem context if available (same as final presentation)
2808
+ if agent.backend.filesystem_manager:
2809
+ main_workspace = str(agent.backend.filesystem_manager.get_current_workspace())
2810
+ temp_workspace = str(agent.backend.filesystem_manager.agent_temporary_workspace) if agent.backend.filesystem_manager.agent_temporary_workspace else None
2811
+ context_paths = agent.backend.filesystem_manager.path_permission_manager.get_context_paths() if agent.backend.filesystem_manager.path_permission_manager else []
2812
+ previous_turns_context = self._get_previous_turns_context_paths()
2813
+ current_turn_num = len(previous_turns_context) + 1 if previous_turns_context else 1
2814
+ turns_to_show = [t for t in previous_turns_context if t["turn"] < current_turn_num - 1]
2815
+ workspace_prepopulated = len(previous_turns_context) > 0
2816
+
2817
+ # Get all answers for context
2818
+ all_answers = {aid: s.answer for aid, s in self.agent_states.items() if s.answer}
2819
+
2820
+ base_system_message = (
2821
+ self.message_templates.filesystem_system_message(
2822
+ main_workspace=main_workspace,
2823
+ temp_workspace=temp_workspace,
2824
+ context_paths=context_paths,
2825
+ previous_turns=turns_to_show,
2826
+ workspace_prepopulated=workspace_prepopulated,
2827
+ enable_image_generation=False,
2828
+ agent_answers=all_answers,
2829
+ enable_command_execution=False,
2830
+ docker_mode=False,
2831
+ enable_sudo=False,
2832
+ )
2833
+ + "\n\n## Post-Evaluation Task\n"
2834
+ + base_system_message
2835
+ )
2836
+
2837
+ # Create evaluation messages
2838
+ evaluation_messages = [
2839
+ {"role": "system", "content": base_system_message},
2840
+ {"role": "user", "content": evaluation_content},
2841
+ ]
2842
+
2843
+ # Get post-evaluation tools
2844
+ api_format = "chat_completions" # Default format
2845
+ if hasattr(agent.backend, "api_format"):
2846
+ api_format = agent.backend.api_format
2847
+ post_eval_tools = get_post_evaluation_tools(api_format=api_format)
2848
+
2849
+ log_stream_chunk("orchestrator", "status", "🔍 Post-evaluation: Reviewing final answer\n")
2850
+ yield StreamChunk(type="status", content="🔍 Post-evaluation: Reviewing final answer\n", source="orchestrator")
2851
+
2852
+ # Stream evaluation with tools (with timeout protection)
2853
+ evaluation_complete = False
2854
+ tool_call_detected = False
2855
+
2856
+ try:
2857
+ timeout_seconds = self.config.timeout_config.orchestrator_timeout_seconds
2858
+ async with asyncio.timeout(timeout_seconds):
2859
+ async for chunk in agent.chat(messages=evaluation_messages, tools=post_eval_tools, reset_chat=True, current_stage=CoordinationStage.POST_EVALUATION):
2860
+ chunk_type = self._get_chunk_type_value(chunk)
2861
+
2862
+ if chunk_type == "content" and chunk.content:
2863
+ log_stream_chunk("orchestrator", "content", chunk.content, selected_agent_id)
2864
+ yield StreamChunk(type="content", content=chunk.content, source=selected_agent_id)
2865
+ elif chunk_type in ["reasoning", "reasoning_done", "reasoning_summary", "reasoning_summary_done"]:
2866
+ reasoning_chunk = StreamChunk(
2867
+ type=chunk_type,
2868
+ content=chunk.content,
2869
+ source=selected_agent_id,
2870
+ reasoning_delta=getattr(chunk, "reasoning_delta", None),
2871
+ reasoning_text=getattr(chunk, "reasoning_text", None),
2872
+ reasoning_summary_delta=getattr(chunk, "reasoning_summary_delta", None),
2873
+ reasoning_summary_text=getattr(chunk, "reasoning_summary_text", None),
2874
+ item_id=getattr(chunk, "item_id", None),
2875
+ content_index=getattr(chunk, "content_index", None),
2876
+ summary_index=getattr(chunk, "summary_index", None),
2877
+ )
2878
+ log_stream_chunk("orchestrator", chunk.type, chunk.content, selected_agent_id)
2879
+ yield reasoning_chunk
2880
+ elif chunk_type == "tool_calls":
2881
+ # Post-evaluation tool call detected
2882
+ tool_call_detected = True
2883
+ if hasattr(chunk, "tool_calls") and chunk.tool_calls:
2884
+ for tool_call in chunk.tool_calls:
2885
+ # Use backend's tool extraction (same as regular coordination)
2886
+ tool_name = agent.backend.extract_tool_name(tool_call)
2887
+ tool_args = agent.backend.extract_tool_arguments(tool_call)
2888
+
2889
+ if tool_name == "submit":
2890
+ log_stream_chunk("orchestrator", "status", "✅ Evaluation complete - answer approved\n")
2891
+ yield StreamChunk(type="status", content="✅ Evaluation complete - answer approved\n", source="orchestrator")
2892
+ evaluation_complete = True
2893
+ elif tool_name == "restart_orchestration":
2894
+ # Parse restart parameters from extracted args
2895
+ self.restart_reason = tool_args.get("reason", "No reason provided")
2896
+ self.restart_instructions = tool_args.get("instructions", "No instructions provided")
2897
+ self.restart_pending = True
2898
+
2899
+ log_stream_chunk("orchestrator", "status", "🔄 Restart requested\n")
2900
+ yield StreamChunk(type="status", content="🔄 Restart requested\n", source="orchestrator")
2901
+ evaluation_complete = True
2902
+ elif chunk_type == "done":
2903
+ log_stream_chunk("orchestrator", "done", None, selected_agent_id)
2904
+ yield StreamChunk(type="done", source=selected_agent_id)
2905
+ elif chunk_type == "error":
2906
+ log_stream_chunk("orchestrator", "error", chunk.error, selected_agent_id)
2907
+ yield StreamChunk(type="error", error=chunk.error, source=selected_agent_id)
2908
+ else:
2909
+ # Pass through other chunk types
2910
+ log_stream_chunk("orchestrator", chunk_type, getattr(chunk, "content", ""), selected_agent_id)
2911
+ yield StreamChunk(
2912
+ type=chunk_type,
2913
+ content=getattr(chunk, "content", ""),
2914
+ source=selected_agent_id,
2915
+ **{k: v for k, v in chunk.__dict__.items() if k not in ["type", "content", "source", "timestamp", "sequence_number"]},
2916
+ )
2917
+ except asyncio.TimeoutError:
2918
+ log_stream_chunk("orchestrator", "status", "⏱️ Post-evaluation timed out - auto-submitting answer\n")
2919
+ yield StreamChunk(type="status", content="⏱️ Post-evaluation timed out - auto-submitting answer\n", source="orchestrator")
2920
+ evaluation_complete = True
2921
+ # Don't set restart_pending - let it default to False (auto-submit)
2922
+ finally:
2923
+ # If no tool was called and evaluation didn't complete, auto-submit
2924
+ if not evaluation_complete and not tool_call_detected:
2925
+ log_stream_chunk("orchestrator", "status", "✅ Auto-submitting answer (no tool call detected)\n")
2926
+ yield StreamChunk(type="status", content="✅ Auto-submitting answer (no tool call detected)\n", source="orchestrator")
2927
+
2928
+ def handle_restart(self):
2929
+ """Reset orchestration state for restart attempt.
2930
+
2931
+ Clears agent states and coordination messages while preserving
2932
+ restart reason and instructions for the next attempt.
2933
+ """
2934
+ log_orchestrator_activity("handle_restart", f"Resetting state for restart attempt {self.current_attempt + 1}")
2935
+
2936
+ # Reset agent states
2937
+ for agent_id in self.agent_states:
2938
+ self.agent_states[agent_id] = AgentState()
2939
+
2940
+ # Clear coordination messages
2941
+ self._coordination_messages = []
2942
+ self._selected_agent = None
2943
+ self._final_presentation_content = None
2944
+
2945
+ # Reset coordination tracker for new attempt
2946
+ self.coordination_tracker = CoordinationTracker()
2947
+ self.coordination_tracker.initialize_session(list(self.agents.keys()))
2948
+
2949
+ # Reset workflow phase to idle so next coordinate() call starts fresh
2950
+ self.workflow_phase = "idle"
2951
+
2952
+ # Increment attempt counter
2953
+ self.current_attempt += 1
2954
+
2955
+ log_orchestrator_activity("handle_restart", f"State reset complete - starting attempt {self.current_attempt + 1}")
2956
+
2363
2957
  def _get_vote_results(self) -> Dict[str, Any]:
2364
2958
  """Get current vote results and statistics."""
2365
2959
  agent_answers = {aid: state.answer for aid, state in self.agent_states.items() if state.answer}
@@ -2428,6 +3022,19 @@ class Orchestrator(ChatAgent):
2428
3022
 
2429
3023
  async def _handle_followup(self, user_message: str, conversation_context: Optional[Dict[str, Any]] = None) -> AsyncGenerator[StreamChunk, None]:
2430
3024
  """Handle follow-up questions after presenting final answer with conversation context."""
3025
+ # Analyze the follow-up question for irreversibility before re-coordinating
3026
+ has_irreversible = await self._analyze_question_irreversibility(user_message, conversation_context or {})
3027
+
3028
+ # Set planning mode for all agents based on analysis
3029
+ for agent_id, agent in self.agents.items():
3030
+ if hasattr(agent.backend, "set_planning_mode"):
3031
+ agent.backend.set_planning_mode(has_irreversible)
3032
+ log_orchestrator_activity(
3033
+ self.orchestrator_id,
3034
+ f"Set planning mode for {agent_id} (follow-up)",
3035
+ {"planning_mode_enabled": has_irreversible, "reason": "follow-up irreversibility analysis"},
3036
+ )
3037
+
2431
3038
  # For now, acknowledge with context awareness
2432
3039
  # Future: implement full re-coordination with follow-up context
2433
3040
 
@@ -2540,8 +3147,9 @@ class Orchestrator(ChatAgent):
2540
3147
  """
2541
3148
  if self.config and hasattr(self.config, "get_configurable_system_message"):
2542
3149
  return self.config.get_configurable_system_message()
2543
- elif self.config and hasattr(self.config, "custom_system_instruction"):
2544
- return self.config.custom_system_instruction
3150
+ elif self.config and hasattr(self.config, "_custom_system_instruction"):
3151
+ # Access private attribute to avoid deprecation warning
3152
+ return self.config._custom_system_instruction
2545
3153
  elif self.config and self.config.backend_params:
2546
3154
  # Check for backend-specific system prompts
2547
3155
  backend_params = self.config.backend_params