massgen 0.1.0a3__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of massgen might be problematic. Click here for more details.

Files changed (120) hide show
  1. massgen/__init__.py +1 -1
  2. massgen/agent_config.py +17 -0
  3. massgen/api_params_handler/_api_params_handler_base.py +1 -0
  4. massgen/api_params_handler/_chat_completions_api_params_handler.py +15 -2
  5. massgen/api_params_handler/_claude_api_params_handler.py +8 -1
  6. massgen/api_params_handler/_gemini_api_params_handler.py +73 -0
  7. massgen/api_params_handler/_response_api_params_handler.py +8 -1
  8. massgen/backend/base.py +83 -0
  9. massgen/backend/{base_with_mcp.py → base_with_custom_tool_and_mcp.py} +286 -15
  10. massgen/backend/capabilities.py +6 -6
  11. massgen/backend/chat_completions.py +200 -103
  12. massgen/backend/claude.py +115 -18
  13. massgen/backend/claude_code.py +378 -14
  14. massgen/backend/docs/CLAUDE_API_RESEARCH.md +3 -3
  15. massgen/backend/gemini.py +1333 -1629
  16. massgen/backend/gemini_mcp_manager.py +545 -0
  17. massgen/backend/gemini_trackers.py +344 -0
  18. massgen/backend/gemini_utils.py +43 -0
  19. massgen/backend/grok.py +39 -6
  20. massgen/backend/response.py +147 -81
  21. massgen/cli.py +605 -110
  22. massgen/config_builder.py +376 -27
  23. massgen/configs/README.md +123 -80
  24. massgen/configs/basic/multi/three_agents_default.yaml +3 -3
  25. massgen/configs/basic/single/single_agent.yaml +1 -1
  26. massgen/configs/providers/openai/gpt5_nano.yaml +3 -3
  27. massgen/configs/tools/custom_tools/claude_code_custom_tool_example.yaml +32 -0
  28. massgen/configs/tools/custom_tools/claude_code_custom_tool_example_no_path.yaml +28 -0
  29. massgen/configs/tools/custom_tools/claude_code_custom_tool_with_mcp_example.yaml +40 -0
  30. massgen/configs/tools/custom_tools/claude_code_custom_tool_with_wrong_mcp_example.yaml +38 -0
  31. massgen/configs/tools/custom_tools/claude_code_wrong_custom_tool_with_mcp_example.yaml +38 -0
  32. massgen/configs/tools/custom_tools/claude_custom_tool_example.yaml +24 -0
  33. massgen/configs/tools/custom_tools/claude_custom_tool_example_no_path.yaml +22 -0
  34. massgen/configs/tools/custom_tools/claude_custom_tool_with_mcp_example.yaml +35 -0
  35. massgen/configs/tools/custom_tools/claude_custom_tool_with_wrong_mcp_example.yaml +33 -0
  36. massgen/configs/tools/custom_tools/claude_wrong_custom_tool_with_mcp_example.yaml +33 -0
  37. massgen/configs/tools/custom_tools/gemini_custom_tool_example.yaml +24 -0
  38. massgen/configs/tools/custom_tools/gemini_custom_tool_example_no_path.yaml +22 -0
  39. massgen/configs/tools/custom_tools/gemini_custom_tool_with_mcp_example.yaml +35 -0
  40. massgen/configs/tools/custom_tools/gemini_custom_tool_with_wrong_mcp_example.yaml +33 -0
  41. massgen/configs/tools/custom_tools/gemini_wrong_custom_tool_with_mcp_example.yaml +33 -0
  42. massgen/configs/tools/custom_tools/github_issue_market_analysis.yaml +94 -0
  43. massgen/configs/tools/custom_tools/gpt5_nano_custom_tool_example.yaml +24 -0
  44. massgen/configs/tools/custom_tools/gpt5_nano_custom_tool_example_no_path.yaml +22 -0
  45. massgen/configs/tools/custom_tools/gpt5_nano_custom_tool_with_mcp_example.yaml +35 -0
  46. massgen/configs/tools/custom_tools/gpt5_nano_custom_tool_with_wrong_mcp_example.yaml +33 -0
  47. massgen/configs/tools/custom_tools/gpt5_nano_wrong_custom_tool_with_mcp_example.yaml +33 -0
  48. massgen/configs/tools/custom_tools/gpt_oss_custom_tool_example.yaml +25 -0
  49. massgen/configs/tools/custom_tools/gpt_oss_custom_tool_example_no_path.yaml +23 -0
  50. massgen/configs/tools/custom_tools/gpt_oss_custom_tool_with_mcp_example.yaml +34 -0
  51. massgen/configs/tools/custom_tools/gpt_oss_custom_tool_with_wrong_mcp_example.yaml +34 -0
  52. massgen/configs/tools/custom_tools/gpt_oss_wrong_custom_tool_with_mcp_example.yaml +34 -0
  53. massgen/configs/tools/custom_tools/grok3_mini_custom_tool_example.yaml +24 -0
  54. massgen/configs/tools/custom_tools/grok3_mini_custom_tool_example_no_path.yaml +22 -0
  55. massgen/configs/tools/custom_tools/grok3_mini_custom_tool_with_mcp_example.yaml +35 -0
  56. massgen/configs/tools/custom_tools/grok3_mini_custom_tool_with_wrong_mcp_example.yaml +33 -0
  57. massgen/configs/tools/custom_tools/grok3_mini_wrong_custom_tool_with_mcp_example.yaml +33 -0
  58. massgen/configs/tools/custom_tools/qwen_api_custom_tool_example.yaml +25 -0
  59. massgen/configs/tools/custom_tools/qwen_api_custom_tool_example_no_path.yaml +23 -0
  60. massgen/configs/tools/custom_tools/qwen_api_custom_tool_with_mcp_example.yaml +36 -0
  61. massgen/configs/tools/custom_tools/qwen_api_custom_tool_with_wrong_mcp_example.yaml +34 -0
  62. massgen/configs/tools/custom_tools/qwen_api_wrong_custom_tool_with_mcp_example.yaml +34 -0
  63. massgen/configs/tools/custom_tools/qwen_local_custom_tool_example.yaml +24 -0
  64. massgen/configs/tools/custom_tools/qwen_local_custom_tool_example_no_path.yaml +22 -0
  65. massgen/configs/tools/custom_tools/qwen_local_custom_tool_with_mcp_example.yaml +35 -0
  66. massgen/configs/tools/custom_tools/qwen_local_custom_tool_with_wrong_mcp_example.yaml +33 -0
  67. massgen/configs/tools/custom_tools/qwen_local_wrong_custom_tool_with_mcp_example.yaml +33 -0
  68. massgen/configs/tools/filesystem/claude_code_context_sharing.yaml +1 -1
  69. massgen/configs/tools/planning/five_agents_discord_mcp_planning_mode.yaml +7 -29
  70. massgen/configs/tools/planning/five_agents_filesystem_mcp_planning_mode.yaml +5 -6
  71. massgen/configs/tools/planning/five_agents_notion_mcp_planning_mode.yaml +4 -4
  72. massgen/configs/tools/planning/five_agents_twitter_mcp_planning_mode.yaml +4 -4
  73. massgen/configs/tools/planning/gpt5_mini_case_study_mcp_planning_mode.yaml +2 -2
  74. massgen/configs/voting/gemini_gpt_voting_sensitivity.yaml +67 -0
  75. massgen/formatter/_chat_completions_formatter.py +104 -0
  76. massgen/formatter/_claude_formatter.py +120 -0
  77. massgen/formatter/_gemini_formatter.py +448 -0
  78. massgen/formatter/_response_formatter.py +88 -0
  79. massgen/frontend/coordination_ui.py +4 -2
  80. massgen/logger_config.py +35 -3
  81. massgen/message_templates.py +56 -6
  82. massgen/orchestrator.py +512 -16
  83. massgen/stream_chunk/base.py +3 -0
  84. massgen/tests/custom_tools_example.py +392 -0
  85. massgen/tests/mcp_test_server.py +17 -7
  86. massgen/tests/test_config_builder.py +423 -0
  87. massgen/tests/test_custom_tools.py +401 -0
  88. massgen/tests/test_intelligent_planning_mode.py +643 -0
  89. massgen/tests/test_tools.py +127 -0
  90. massgen/token_manager/token_manager.py +13 -4
  91. massgen/tool/README.md +935 -0
  92. massgen/tool/__init__.py +39 -0
  93. massgen/tool/_async_helpers.py +70 -0
  94. massgen/tool/_basic/__init__.py +8 -0
  95. massgen/tool/_basic/_two_num_tool.py +24 -0
  96. massgen/tool/_code_executors/__init__.py +10 -0
  97. massgen/tool/_code_executors/_python_executor.py +74 -0
  98. massgen/tool/_code_executors/_shell_executor.py +61 -0
  99. massgen/tool/_exceptions.py +39 -0
  100. massgen/tool/_file_handlers/__init__.py +10 -0
  101. massgen/tool/_file_handlers/_file_operations.py +218 -0
  102. massgen/tool/_manager.py +634 -0
  103. massgen/tool/_registered_tool.py +88 -0
  104. massgen/tool/_result.py +66 -0
  105. massgen/tool/_self_evolution/_github_issue_analyzer.py +369 -0
  106. massgen/tool/docs/builtin_tools.md +681 -0
  107. massgen/tool/docs/exceptions.md +794 -0
  108. massgen/tool/docs/execution_results.md +691 -0
  109. massgen/tool/docs/manager.md +887 -0
  110. massgen/tool/docs/workflow_toolkits.md +529 -0
  111. massgen/tool/workflow_toolkits/__init__.py +57 -0
  112. massgen/tool/workflow_toolkits/base.py +55 -0
  113. massgen/tool/workflow_toolkits/new_answer.py +126 -0
  114. massgen/tool/workflow_toolkits/vote.py +167 -0
  115. {massgen-0.1.0a3.dist-info → massgen-0.1.2.dist-info}/METADATA +87 -129
  116. {massgen-0.1.0a3.dist-info → massgen-0.1.2.dist-info}/RECORD +120 -44
  117. {massgen-0.1.0a3.dist-info → massgen-0.1.2.dist-info}/WHEEL +0 -0
  118. {massgen-0.1.0a3.dist-info → massgen-0.1.2.dist-info}/entry_points.txt +0 -0
  119. {massgen-0.1.0a3.dist-info → massgen-0.1.2.dist-info}/licenses/LICENSE +0 -0
  120. {massgen-0.1.0a3.dist-info → massgen-0.1.2.dist-info}/top_level.txt +0 -0
massgen/orchestrator.py CHANGED
@@ -44,6 +44,7 @@ from .logger_config import (
44
44
  )
45
45
  from .message_templates import MessageTemplates
46
46
  from .stream_chunk import ChunkType
47
+ from .tool import get_workflow_tools
47
48
  from .utils import ActionType, AgentStatus, CoordinationStage
48
49
 
49
50
 
@@ -137,9 +138,16 @@ class Orchestrator(ChatAgent):
137
138
  self.config = config or AgentConfig.create_openai_config()
138
139
 
139
140
  # Get message templates from config
140
- self.message_templates = self.config.message_templates or MessageTemplates()
141
- # Create workflow tools for agents (vote and new_answer)
142
- self.workflow_tools = self.message_templates.get_standard_tools(list(agents.keys()))
141
+ self.message_templates = self.config.message_templates or MessageTemplates(
142
+ voting_sensitivity=self.config.voting_sensitivity,
143
+ answer_novelty_requirement=self.config.answer_novelty_requirement,
144
+ )
145
+ # Create workflow tools for agents (vote and new_answer) using new toolkit system
146
+ self.workflow_tools = get_workflow_tools(
147
+ valid_agent_ids=list(agents.keys()),
148
+ template_overrides=getattr(self.message_templates, "_template_overrides", {}),
149
+ api_format="chat_completions", # Default format, will be overridden per backend
150
+ )
143
151
 
144
152
  # MassGen-specific state
145
153
  self.current_task: Optional[str] = None
@@ -260,6 +268,34 @@ class Orchestrator(ChatAgent):
260
268
  if conversation_context and conversation_context.get("conversation_history"):
261
269
  self._clear_agent_workspaces()
262
270
 
271
+ # Check if planning mode is enabled in config
272
+ planning_mode_config_exists = (
273
+ self.config.coordination_config and self.config.coordination_config.enable_planning_mode if self.config and hasattr(self.config, "coordination_config") else False
274
+ )
275
+
276
+ if planning_mode_config_exists:
277
+ # Analyze question for irreversibility and set planning mode accordingly
278
+ # This happens silently - users don't see this analysis
279
+ analysis_result = await self._analyze_question_irreversibility(user_message, conversation_context)
280
+ has_irreversible = analysis_result["has_irreversible"]
281
+ blocked_tools = analysis_result["blocked_tools"]
282
+
283
+ # Set planning mode and blocked tools for all agents based on analysis
284
+ for agent_id, agent in self.agents.items():
285
+ if hasattr(agent.backend, "set_planning_mode"):
286
+ agent.backend.set_planning_mode(has_irreversible)
287
+ if hasattr(agent.backend, "set_planning_mode_blocked_tools"):
288
+ agent.backend.set_planning_mode_blocked_tools(blocked_tools)
289
+ log_orchestrator_activity(
290
+ self.orchestrator_id,
291
+ f"Set planning mode for {agent_id}",
292
+ {
293
+ "planning_mode_enabled": has_irreversible,
294
+ "blocked_tools_count": len(blocked_tools),
295
+ "reason": "irreversibility analysis",
296
+ },
297
+ )
298
+
263
299
  async for chunk in self._coordinate_agents_with_timeout(conversation_context):
264
300
  yield chunk
265
301
 
@@ -328,6 +364,292 @@ class Orchestrator(ChatAgent):
328
364
  if log_session_dir:
329
365
  self.coordination_tracker.save_coordination_logs(log_session_dir)
330
366
 
367
+ def _format_planning_mode_ui(
368
+ self,
369
+ has_irreversible: bool,
370
+ blocked_tools: set,
371
+ has_isolated_workspaces: bool,
372
+ user_question: str,
373
+ ) -> str:
374
+ """
375
+ Format a nice UI box for planning mode status.
376
+
377
+ Args:
378
+ has_irreversible: Whether irreversible operations were detected
379
+ blocked_tools: Set of specific blocked tool names
380
+ has_isolated_workspaces: Whether agents have isolated workspaces
381
+ user_question: The user's question for context
382
+
383
+ Returns:
384
+ Formatted string with nice box UI
385
+ """
386
+ if not has_irreversible:
387
+ # Planning mode disabled - brief message
388
+ box = "\n╭─ Coordination Mode ────────────────────────────────────────╮\n"
389
+ box += "│ ✅ Planning Mode: DISABLED │\n"
390
+ box += "│ │\n"
391
+ box += "│ All tools available during coordination. │\n"
392
+ box += "│ No irreversible operations detected. │\n"
393
+ box += "╰────────────────────────────────────────────────────────────╯\n"
394
+ return box
395
+
396
+ # Planning mode enabled
397
+ box = "\n╭─ Coordination Mode ────────────────────────────────────────╮\n"
398
+ box += "│ 🧠 Planning Mode: ENABLED │\n"
399
+ box += "│ │\n"
400
+
401
+ if has_isolated_workspaces:
402
+ box += "│ 🔒 Workspace: Isolated (filesystem ops allowed) │\n"
403
+ box += "│ │\n"
404
+
405
+ # Description
406
+ box += "│ Agents will plan and coordinate without executing │\n"
407
+ box += "│ irreversible actions. The winning agent will implement │\n"
408
+ box += "│ the plan during final presentation. │\n"
409
+ box += "│ │\n"
410
+
411
+ # Blocked tools section
412
+ if blocked_tools:
413
+ box += "│ 🚫 Blocked Tools: │\n"
414
+ # Format tools into nice columns
415
+ sorted_tools = sorted(blocked_tools)
416
+ for i, tool in enumerate(sorted_tools[:5], 1): # Show max 5 tools
417
+ # Shorten tool name if too long
418
+ display_tool = tool if len(tool) <= 50 else tool[:47] + "..."
419
+ box += f"│ {i}. {display_tool:<54} │\n"
420
+
421
+ if len(sorted_tools) > 5:
422
+ remaining = len(sorted_tools) - 5
423
+ box += f"│ ... and {remaining} more tool(s) │\n"
424
+ box += "│ │\n"
425
+ else:
426
+ box += "│ 🚫 Blocking: ALL MCP tools │\n"
427
+ box += "│ │\n"
428
+
429
+ # Add brief analysis summary
430
+ box += "│ 📊 Analysis: │\n"
431
+ # Create a brief summary from the question
432
+ summary = user_question[:50] + "..." if len(user_question) > 50 else user_question
433
+ # Wrap text to fit in box
434
+ words = summary.split()
435
+ line = "│ "
436
+ for word in words:
437
+ if len(line) + len(word) + 1 > 60:
438
+ box += line.ljust(61) + "│\n"
439
+ line = "│ " + word + " "
440
+ else:
441
+ line += word + " "
442
+ if len(line) > 4: # If there's content
443
+ box += line.ljust(61) + "│\n"
444
+
445
+ box += "╰────────────────────────────────────────────────────────────╯\n"
446
+ return box
447
+
448
+ async def _analyze_question_irreversibility(self, user_question: str, conversation_context: Dict[str, Any]) -> Dict[str, Any]:
449
+ """
450
+ Analyze if the user's question involves MCP tools with irreversible outcomes.
451
+
452
+ This method randomly selects an available agent to analyze whether executing
453
+ the user's question would involve MCP tool operations with irreversible outcomes
454
+ (e.g., sending Discord messages, posting tweets, deleting files) vs reversible
455
+ read operations (e.g., reading Discord messages, searching tweets, listing files).
456
+
457
+ Args:
458
+ user_question: The user's question/request
459
+ conversation_context: Full conversation context including history
460
+
461
+ Returns:
462
+ Dict with:
463
+ - has_irreversible (bool): True if irreversible operations detected
464
+ - blocked_tools (set): Set of MCP tool names to block (e.g., {'mcp__discord__discord_send'})
465
+ Empty set means block ALL MCP tools
466
+ """
467
+ import random
468
+
469
+ print("=" * 80, flush=True)
470
+ print("🔍 [INTELLIGENT PLANNING MODE] Analyzing question for irreversibility...", flush=True)
471
+ print(f"📝 Question: {user_question[:100]}{'...' if len(user_question) > 100 else ''}", flush=True)
472
+ print("=" * 80, flush=True)
473
+
474
+ # Select a random agent for analysis
475
+ available_agents = [aid for aid, agent in self.agents.items() if agent.backend is not None]
476
+ if not available_agents:
477
+ # No agents available, default to safe mode (planning enabled, block ALL)
478
+ log_orchestrator_activity(
479
+ self.orchestrator_id,
480
+ "No agents available for irreversibility analysis, defaulting to planning mode",
481
+ {},
482
+ )
483
+ return {"has_irreversible": True, "blocked_tools": set()}
484
+
485
+ analyzer_agent_id = random.choice(available_agents)
486
+ analyzer_agent = self.agents[analyzer_agent_id]
487
+
488
+ print(f"🤖 Selected analyzer agent: {analyzer_agent_id}", flush=True)
489
+
490
+ # Check if agents have isolated workspaces
491
+ has_isolated_workspaces = False
492
+ workspace_info = []
493
+ for agent_id, agent in self.agents.items():
494
+ if agent.backend and agent.backend.filesystem_manager:
495
+ cwd = agent.backend.filesystem_manager.cwd
496
+ if cwd and "workspace" in os.path.basename(cwd).lower():
497
+ has_isolated_workspaces = True
498
+ workspace_info.append(f"{agent_id}: {cwd}")
499
+
500
+ if has_isolated_workspaces:
501
+ print("🔒 Detected isolated agent workspaces - filesystem ops will be allowed", flush=True)
502
+
503
+ log_orchestrator_activity(
504
+ self.orchestrator_id,
505
+ "Analyzing question irreversibility",
506
+ {
507
+ "analyzer_agent": analyzer_agent_id,
508
+ "question_preview": user_question[:100] + "..." if len(user_question) > 100 else user_question,
509
+ "has_isolated_workspaces": has_isolated_workspaces,
510
+ },
511
+ )
512
+
513
+ # Build analysis prompt - now asking for specific tool names
514
+ workspace_context = ""
515
+ if has_isolated_workspaces:
516
+ workspace_context = """
517
+ IMPORTANT - ISOLATED WORKSPACES:
518
+ The agents are working in isolated temporary workspaces (directories containing "workspace" in their name).
519
+ Filesystem operations (read_file, write_file, delete_file, list_files, etc.) within these isolated workspaces are SAFE and REVERSIBLE.
520
+ They should NOT be blocked because:
521
+ - These are temporary directories specific to this coordination session
522
+ - Files created/modified are isolated from external systems
523
+ - Changes are contained within the agent's sandbox
524
+ - The workspace can be cleared after coordination
525
+
526
+ Only block filesystem operations if they explicitly target paths OUTSIDE the isolated workspace.
527
+ """
528
+
529
+ analysis_prompt = f"""You are analyzing whether a user's request involves operations with irreversible outcomes.
530
+
531
+ USER REQUEST:
532
+ {user_question}
533
+ {workspace_context}
534
+ CONTEXT:
535
+ Your task is to determine if executing this request would involve MCP (Model Context Protocol) tools that have irreversible outcomes, and if so, identify which specific tools should be blocked.
536
+
537
+ MCP tools follow the naming convention: mcp__<server>__<tool_name>
538
+ Examples:
539
+ - mcp__discord__discord_send (irreversible - sends messages)
540
+ - mcp__discord__discord_read_channel (reversible - reads messages)
541
+ - mcp__twitter__post_tweet (irreversible - posts publicly)
542
+ - mcp__twitter__search_tweets (reversible - searches)
543
+ - mcp__filesystem__write_file (SAFE in isolated workspace - writes to temporary files)
544
+ - mcp__filesystem__read_file (reversible - reads files)
545
+
546
+ IRREVERSIBLE OPERATIONS:
547
+ - Sending messages (discord_send, slack_send, etc.)
548
+ - Posting content publicly (post_tweet, create_post, etc.)
549
+ - Deleting files or data OUTSIDE isolated workspace (delete_file on external paths, remove_data, etc.)
550
+ - Modifying external systems (write_file to external paths, update_record, etc.)
551
+ - Creating permanent records (create_issue, add_comment, etc.)
552
+ - Executing commands that change state (run_command, execute_script, etc.)
553
+
554
+ REVERSIBLE OPERATIONS (DO NOT BLOCK):
555
+ - Reading messages or data (read_channel, get_messages, etc.)
556
+ - Searching or querying information (search_tweets, query_data, etc.)
557
+ - Listing files or resources (list_files, list_channels, etc.)
558
+ - Fetching data from APIs (get_user, fetch_data, etc.)
559
+ - Viewing information (view_channel, get_info, etc.)
560
+ - Filesystem operations IN ISOLATED WORKSPACE (write_file, read_file, delete_file, list_files when in workspace*)
561
+
562
+ Respond in this EXACT format:
563
+ IRREVERSIBLE: YES/NO
564
+ BLOCKED_TOOLS: tool1, tool2, tool3
565
+
566
+ If IRREVERSIBLE is NO, leave BLOCKED_TOOLS empty.
567
+ If IRREVERSIBLE is YES, list the specific MCP tool names that should be blocked (e.g., mcp__discord__discord_send).
568
+
569
+ Your answer:"""
570
+
571
+ # Create messages for the analyzer
572
+ analysis_messages = [
573
+ {"role": "user", "content": analysis_prompt},
574
+ ]
575
+
576
+ try:
577
+ # Stream response from analyzer agent (but don't show to user)
578
+ response_text = ""
579
+ async for chunk in analyzer_agent.backend.stream_with_tools(
580
+ messages=analysis_messages,
581
+ tools=[], # No tools needed for simple analysis
582
+ agent_id=analyzer_agent_id,
583
+ ):
584
+ if chunk.type == "content" and chunk.content:
585
+ response_text += chunk.content
586
+
587
+ # Parse response
588
+ response_clean = response_text.strip()
589
+ has_irreversible = False
590
+ blocked_tools = set()
591
+
592
+ # Parse IRREVERSIBLE line
593
+ found_irreversible_line = False
594
+ for line in response_clean.split("\n"):
595
+ line = line.strip()
596
+ if line.startswith("IRREVERSIBLE:"):
597
+ found_irreversible_line = True
598
+ # Extract the value after the colon
599
+ value = line.split(":", 1)[1].strip().upper()
600
+ # Check if the first word is YES
601
+ has_irreversible = value.startswith("YES")
602
+ elif line.startswith("BLOCKED_TOOLS:"):
603
+ # Extract tool names after the colon
604
+ tools_part = line.split(":", 1)[1].strip()
605
+ if tools_part:
606
+ # Split by comma and clean up whitespace
607
+ blocked_tools = {tool.strip() for tool in tools_part.split(",") if tool.strip()}
608
+
609
+ # Fallback: If no structured format found, look for YES/NO in the response
610
+ if not found_irreversible_line:
611
+ print("⚠️ [WARNING] No 'IRREVERSIBLE:' line found, using fallback parsing", flush=True)
612
+ response_upper = response_clean.upper()
613
+ # Look for clear YES/NO indicators
614
+ if "YES" in response_upper and "NO" not in response_upper:
615
+ has_irreversible = True
616
+ elif "NO" in response_upper:
617
+ has_irreversible = False
618
+ else:
619
+ # Default to safe mode if unclear
620
+ has_irreversible = True
621
+
622
+ log_orchestrator_activity(
623
+ self.orchestrator_id,
624
+ "Irreversibility analysis complete",
625
+ {
626
+ "analyzer_agent": analyzer_agent_id,
627
+ "response": response_clean[:100],
628
+ "has_irreversible": has_irreversible,
629
+ "blocked_tools_count": len(blocked_tools),
630
+ },
631
+ )
632
+
633
+ # Display nice UI box for planning mode status
634
+ ui_box = self._format_planning_mode_ui(
635
+ has_irreversible=has_irreversible,
636
+ blocked_tools=blocked_tools,
637
+ has_isolated_workspaces=has_isolated_workspaces,
638
+ user_question=user_question,
639
+ )
640
+ print(ui_box, flush=True)
641
+
642
+ return {"has_irreversible": has_irreversible, "blocked_tools": blocked_tools}
643
+
644
+ except Exception as e:
645
+ # On error, default to safe mode (planning enabled, block ALL)
646
+ log_orchestrator_activity(
647
+ self.orchestrator_id,
648
+ "Irreversibility analysis failed, defaulting to planning mode",
649
+ {"error": str(e)},
650
+ )
651
+ return {"has_irreversible": True, "blocked_tools": set()}
652
+
331
653
  async def _coordinate_agents_with_timeout(self, conversation_context: Optional[Dict[str, Any]] = None) -> AsyncGenerator[StreamChunk, None]:
332
654
  """Execute coordination with orchestrator-level timeout protection."""
333
655
  self.coordination_start_time = time.time()
@@ -841,8 +1163,8 @@ class Orchestrator(ChatAgent):
841
1163
  # Generate single timestamp for answer/vote and workspace
842
1164
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
843
1165
 
844
- # Save answer if provided
845
- if answer_content:
1166
+ # Save answer if provided (or create final directory structure even if empty)
1167
+ if answer_content is not None or is_final:
846
1168
  try:
847
1169
  log_session_dir = get_log_session_dir()
848
1170
  if log_session_dir:
@@ -855,8 +1177,9 @@ class Orchestrator(ChatAgent):
855
1177
  timestamped_dir.mkdir(parents=True, exist_ok=True)
856
1178
  answer_file = timestamped_dir / "answer.txt"
857
1179
 
858
- # Write the answer content
859
- answer_file.write_text(answer_content)
1180
+ # Write the answer content (even if empty for final snapshots)
1181
+ content_to_write = answer_content if answer_content is not None else ""
1182
+ answer_file.write_text(content_to_write)
860
1183
  logger.info(f"[Orchestrator._save_agent_snapshot] Saved answer to {answer_file}")
861
1184
 
862
1185
  except Exception as e:
@@ -935,7 +1258,7 @@ class Orchestrator(ChatAgent):
935
1258
  logger.info(f"[Orchestrator._save_agent_snapshot] Agent {agent_id} does not have filesystem_manager")
936
1259
 
937
1260
  # Save context if provided (unified context saving)
938
- if context_data and (answer_content or vote_data):
1261
+ if context_data:
939
1262
  try:
940
1263
  log_session_dir = get_log_session_dir()
941
1264
  if log_session_dir:
@@ -944,6 +1267,8 @@ class Orchestrator(ChatAgent):
944
1267
  else:
945
1268
  timestamped_dir = log_session_dir / agent_id / timestamp
946
1269
 
1270
+ # Ensure directory exists (may not have been created if no answer/vote)
1271
+ timestamped_dir.mkdir(parents=True, exist_ok=True)
947
1272
  context_file = timestamped_dir / "context.txt"
948
1273
 
949
1274
  # Handle different types of context data
@@ -1122,6 +1447,91 @@ class Orchestrator(ChatAgent):
1122
1447
  # # Implementation will check against PermissionManager
1123
1448
  # pass
1124
1449
 
1450
+ def _calculate_jaccard_similarity(self, text1: str, text2: str) -> float:
1451
+ """Calculate Jaccard similarity between two texts based on word tokens.
1452
+
1453
+ Args:
1454
+ text1: First text to compare
1455
+ text2: Second text to compare
1456
+
1457
+ Returns:
1458
+ Similarity score between 0.0 and 1.0
1459
+ """
1460
+ # Tokenize and normalize - simple word-based approach
1461
+ words1 = set(text1.lower().split())
1462
+ words2 = set(text2.lower().split())
1463
+
1464
+ if not words1 and not words2:
1465
+ return 1.0 # Both empty, consider identical
1466
+ if not words1 or not words2:
1467
+ return 0.0 # One empty, one not
1468
+
1469
+ intersection = len(words1 & words2)
1470
+ union = len(words1 | words2)
1471
+
1472
+ return intersection / union if union > 0 else 0.0
1473
+
1474
+ def _check_answer_novelty(self, new_answer: str, existing_answers: Dict[str, str]) -> tuple[bool, Optional[str]]:
1475
+ """Check if a new answer is sufficiently different from existing answers.
1476
+
1477
+ Args:
1478
+ new_answer: The proposed new answer
1479
+ existing_answers: Dictionary of existing answers {agent_id: answer_content}
1480
+
1481
+ Returns:
1482
+ Tuple of (is_novel, error_message). is_novel=True if answer passes novelty check.
1483
+ """
1484
+ # Lenient mode: no checks (current behavior)
1485
+ if self.config.answer_novelty_requirement == "lenient":
1486
+ return (True, None)
1487
+
1488
+ # Determine threshold based on setting
1489
+ if self.config.answer_novelty_requirement == "strict":
1490
+ threshold = 0.50 # Reject if >50% overlap (strict)
1491
+ error_msg = (
1492
+ "Your answer is too similar to existing answers (>50% overlap). Please use a fundamentally different approach, employ different tools/techniques, or vote for an existing answer."
1493
+ )
1494
+ else: # balanced
1495
+ threshold = 0.70 # Reject if >70% overlap (balanced)
1496
+ error_msg = (
1497
+ "Your answer is too similar to existing answers (>70% overlap). "
1498
+ "Please provide a meaningfully different solution with new insights, "
1499
+ "approaches, or tools, or vote for an existing answer."
1500
+ )
1501
+
1502
+ # Check similarity against all existing answers
1503
+ for agent_id, existing_answer in existing_answers.items():
1504
+ similarity = self._calculate_jaccard_similarity(new_answer, existing_answer)
1505
+ if similarity > threshold:
1506
+ logger.info(f"[Orchestrator] Answer rejected: {similarity:.2%} similar to {agent_id}'s answer (threshold: {threshold:.0%})")
1507
+ return (False, error_msg)
1508
+
1509
+ # Answer is sufficiently novel
1510
+ return (True, None)
1511
+
1512
+ def _check_answer_count_limit(self, agent_id: str) -> tuple[bool, Optional[str]]:
1513
+ """Check if agent has reached their answer count limit.
1514
+
1515
+ Args:
1516
+ agent_id: The agent attempting to provide a new answer
1517
+
1518
+ Returns:
1519
+ Tuple of (can_answer, error_message). can_answer=True if agent can provide another answer.
1520
+ """
1521
+ # No limit set
1522
+ if self.config.max_new_answers_per_agent is None:
1523
+ return (True, None)
1524
+
1525
+ # Count how many answers this agent has provided
1526
+ answer_count = len(self.coordination_tracker.answers_by_agent.get(agent_id, []))
1527
+
1528
+ if answer_count >= self.config.max_new_answers_per_agent:
1529
+ error_msg = f"You've reached the maximum of {self.config.max_new_answers_per_agent} new answer(s). Please vote for the best existing answer using the `vote` tool."
1530
+ logger.info(f"[Orchestrator] Answer rejected: {agent_id} has reached limit ({answer_count}/{self.config.max_new_answers_per_agent})")
1531
+ return (False, error_msg)
1532
+
1533
+ return (True, None)
1534
+
1125
1535
  def _create_tool_error_messages(
1126
1536
  self,
1127
1537
  agent: "ChatAgent",
@@ -1283,17 +1693,17 @@ class Orchestrator(ChatAgent):
1283
1693
  logger.info(f"[Orchestrator] Agent {agent_id} sees no existing answers")
1284
1694
 
1285
1695
  # Check if planning mode is enabled for coordination phase
1696
+ # Use the ACTUAL backend planning mode status (set by intelligent analysis)
1697
+ # instead of the static config setting
1286
1698
  is_coordination_phase = self.workflow_phase == "coordinating"
1287
- planning_mode_enabled = (
1288
- self.config.coordination_config and self.config.coordination_config.enable_planning_mode and is_coordination_phase
1289
- if self.config and hasattr(self.config, "coordination_config")
1290
- else False
1291
- )
1699
+ planning_mode_enabled = agent.backend.is_planning_mode_enabled() if is_coordination_phase else False
1292
1700
 
1293
1701
  # Add planning mode instructions to system message if enabled
1294
- if planning_mode_enabled and self.config.coordination_config.planning_mode_instruction:
1702
+ # Only add instructions if we have a coordination config with planning instruction
1703
+ if planning_mode_enabled and self.config and hasattr(self.config, "coordination_config") and self.config.coordination_config and self.config.coordination_config.planning_mode_instruction:
1295
1704
  planning_instructions = f"\n\n{self.config.coordination_config.planning_mode_instruction}"
1296
1705
  agent_system_message = f"{agent_system_message}{planning_instructions}" if agent_system_message else planning_instructions.strip()
1706
+ print(f"📝 [{agent_id}] Adding planning mode instructions to system message", flush=True)
1297
1707
 
1298
1708
  # Build conversation with context support
1299
1709
  if conversation_context and conversation_context.get("conversation_history"):
@@ -1443,6 +1853,10 @@ class Orchestrator(ChatAgent):
1443
1853
  # Forward MCP status messages with proper formatting
1444
1854
  mcp_content = f"🔧 MCP: {chunk.content}"
1445
1855
  yield ("content", mcp_content)
1856
+ elif chunk_type == "custom_tool_status":
1857
+ # Forward custom tool status messages with proper formatting
1858
+ custom_tool_content = f"🔧 Custom Tool: {chunk.content}"
1859
+ yield ("content", custom_tool_content)
1446
1860
  elif chunk_type == "debug":
1447
1861
  # Forward debug chunks
1448
1862
  yield ("debug", chunk.content)
@@ -1660,6 +2074,54 @@ class Orchestrator(ChatAgent):
1660
2074
  # Agent provided new answer
1661
2075
  content = tool_args.get("content", response_text.strip())
1662
2076
 
2077
+ # Check answer count limit
2078
+ can_answer, count_error = self._check_answer_count_limit(agent_id)
2079
+ if not can_answer:
2080
+ if attempt < max_attempts - 1:
2081
+ if self._check_restart_pending(agent_id):
2082
+ await self._save_partial_work_on_restart(agent_id)
2083
+ yield (
2084
+ "content",
2085
+ f"🔁 [{agent_id}] gracefully restarting due to new answer detected\n",
2086
+ )
2087
+ yield ("done", None)
2088
+ return
2089
+ yield ("content", f"❌ {count_error}")
2090
+ # Create proper tool error message for retry
2091
+ enforcement_msg = self._create_tool_error_messages(agent, [tool_call], count_error)
2092
+ continue
2093
+ else:
2094
+ yield (
2095
+ "error",
2096
+ f"Answer count limit reached after {max_attempts} attempts",
2097
+ )
2098
+ yield ("done", None)
2099
+ return
2100
+
2101
+ # Check answer novelty (similarity to existing answers)
2102
+ is_novel, novelty_error = self._check_answer_novelty(content, answers)
2103
+ if not is_novel:
2104
+ if attempt < max_attempts - 1:
2105
+ if self._check_restart_pending(agent_id):
2106
+ await self._save_partial_work_on_restart(agent_id)
2107
+ yield (
2108
+ "content",
2109
+ f"🔁 [{agent_id}] gracefully restarting due to new answer detected\n",
2110
+ )
2111
+ yield ("done", None)
2112
+ return
2113
+ yield ("content", f"❌ {novelty_error}")
2114
+ # Create proper tool error message for retry
2115
+ enforcement_msg = self._create_tool_error_messages(agent, [tool_call], novelty_error)
2116
+ continue
2117
+ else:
2118
+ yield (
2119
+ "error",
2120
+ f"Answer novelty requirement not met after {max_attempts} attempts",
2121
+ )
2122
+ yield ("done", None)
2123
+ return
2124
+
1663
2125
  # Check for duplicate answer
1664
2126
  # Normalize both new content and existing content to neutral paths for comparison
1665
2127
  normalized_new_content = self._normalize_workspace_paths_for_comparison(content)
@@ -1695,6 +2157,9 @@ class Orchestrator(ChatAgent):
1695
2157
  return
1696
2158
  elif tool_name.startswith("mcp"):
1697
2159
  pass
2160
+ elif tool_name.startswith("custom_tool"):
2161
+ # Custom tools are handled by the backend and their results are streamed separately
2162
+ pass
1698
2163
  else:
1699
2164
  # Non-workflow tools not yet implemented
1700
2165
  yield (
@@ -2056,6 +2521,7 @@ class Orchestrator(ChatAgent):
2056
2521
 
2057
2522
  # Use agent's chat method with proper system message (reset chat for clean presentation)
2058
2523
  presentation_content = ""
2524
+ final_snapshot_saved = False # Track whether snapshot was saved during stream
2059
2525
 
2060
2526
  try:
2061
2527
  # Track final round iterations (each chunk is like an iteration)
@@ -2121,6 +2587,9 @@ class Orchestrator(ChatAgent):
2121
2587
  # Track the final answer in coordination tracker
2122
2588
  self.coordination_tracker.set_final_answer(selected_agent_id, final_answer, snapshot_timestamp="final")
2123
2589
 
2590
+ # Mark snapshot as saved
2591
+ final_snapshot_saved = True
2592
+
2124
2593
  log_stream_chunk("orchestrator", "done", None, selected_agent_id)
2125
2594
  yield StreamChunk(type="done", source=selected_agent_id)
2126
2595
  elif chunk_type == "error":
@@ -2139,7 +2608,7 @@ class Orchestrator(ChatAgent):
2139
2608
  type=chunk_type,
2140
2609
  content=getattr(chunk, "content", ""),
2141
2610
  source=selected_agent_id,
2142
- **{k: v for k, v in chunk.__dict__.items() if k not in ["type", "content", "source"]},
2611
+ **{k: v for k, v in chunk.__dict__.items() if k not in ["type", "content", "source", "timestamp", "sequence_number"]},
2143
2612
  )
2144
2613
  else:
2145
2614
  log_stream_chunk(
@@ -2152,10 +2621,24 @@ class Orchestrator(ChatAgent):
2152
2621
  type=chunk_type,
2153
2622
  content=getattr(chunk, "content", ""),
2154
2623
  source=selected_agent_id,
2155
- **{k: v for k, v in chunk.__dict__.items() if k not in ["type", "content", "source"]},
2624
+ **{k: v for k, v in chunk.__dict__.items() if k not in ["type", "content", "source", "timestamp", "sequence_number"]},
2156
2625
  )
2157
2626
 
2158
2627
  finally:
2628
+ # Ensure final snapshot is always saved (even if "done" chunk wasn't yielded)
2629
+ if not final_snapshot_saved:
2630
+ final_answer = presentation_content.strip() if presentation_content.strip() else self.agent_states[selected_agent_id].answer
2631
+ final_context = self.get_last_context(selected_agent_id)
2632
+ await self._save_agent_snapshot(
2633
+ self._selected_agent,
2634
+ answer_content=final_answer,
2635
+ is_final=True,
2636
+ context_data=final_context,
2637
+ )
2638
+
2639
+ # Track the final answer in coordination tracker
2640
+ self.coordination_tracker.set_final_answer(selected_agent_id, final_answer, snapshot_timestamp="final")
2641
+
2159
2642
  # Store the final presentation content for logging
2160
2643
  if presentation_content.strip():
2161
2644
  # Store the synthesized final answer
@@ -2259,6 +2742,19 @@ class Orchestrator(ChatAgent):
2259
2742
 
2260
2743
  async def _handle_followup(self, user_message: str, conversation_context: Optional[Dict[str, Any]] = None) -> AsyncGenerator[StreamChunk, None]:
2261
2744
  """Handle follow-up questions after presenting final answer with conversation context."""
2745
+ # Analyze the follow-up question for irreversibility before re-coordinating
2746
+ has_irreversible = await self._analyze_question_irreversibility(user_message, conversation_context or {})
2747
+
2748
+ # Set planning mode for all agents based on analysis
2749
+ for agent_id, agent in self.agents.items():
2750
+ if hasattr(agent.backend, "set_planning_mode"):
2751
+ agent.backend.set_planning_mode(has_irreversible)
2752
+ log_orchestrator_activity(
2753
+ self.orchestrator_id,
2754
+ f"Set planning mode for {agent_id} (follow-up)",
2755
+ {"planning_mode_enabled": has_irreversible, "reason": "follow-up irreversibility analysis"},
2756
+ )
2757
+
2262
2758
  # For now, acknowledge with context awareness
2263
2759
  # Future: implement full re-coordination with follow-up context
2264
2760
 
@@ -33,6 +33,9 @@ class ChunkType(Enum):
33
33
  # MCP-related chunks
34
34
  MCP_STATUS = "mcp_status"
35
35
 
36
+ # Custom tool chunks
37
+ CUSTOM_TOOL_STATUS = "custom_tool_status"
38
+
36
39
  # Multimodal chunks
37
40
  MEDIA = "media"
38
41
  MEDIA_PROGRESS = "media_progress"