massgen 0.1.0a2__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of massgen might be problematic. Click here for more details.

Files changed (111) hide show
  1. massgen/__init__.py +1 -1
  2. massgen/agent_config.py +17 -0
  3. massgen/api_params_handler/_api_params_handler_base.py +1 -0
  4. massgen/api_params_handler/_chat_completions_api_params_handler.py +8 -1
  5. massgen/api_params_handler/_claude_api_params_handler.py +8 -1
  6. massgen/api_params_handler/_gemini_api_params_handler.py +73 -0
  7. massgen/api_params_handler/_response_api_params_handler.py +8 -1
  8. massgen/backend/base.py +31 -0
  9. massgen/backend/{base_with_mcp.py → base_with_custom_tool_and_mcp.py} +282 -11
  10. massgen/backend/chat_completions.py +182 -92
  11. massgen/backend/claude.py +115 -18
  12. massgen/backend/claude_code.py +378 -14
  13. massgen/backend/docs/CLAUDE_API_RESEARCH.md +3 -3
  14. massgen/backend/gemini.py +1275 -1607
  15. massgen/backend/gemini_mcp_manager.py +545 -0
  16. massgen/backend/gemini_trackers.py +344 -0
  17. massgen/backend/gemini_utils.py +43 -0
  18. massgen/backend/response.py +129 -70
  19. massgen/cli.py +643 -132
  20. massgen/config_builder.py +381 -32
  21. massgen/configs/README.md +111 -80
  22. massgen/configs/basic/multi/three_agents_default.yaml +1 -1
  23. massgen/configs/basic/single/single_agent.yaml +1 -1
  24. massgen/configs/providers/openai/gpt5_nano.yaml +3 -3
  25. massgen/configs/tools/custom_tools/claude_code_custom_tool_example.yaml +32 -0
  26. massgen/configs/tools/custom_tools/claude_code_custom_tool_example_no_path.yaml +28 -0
  27. massgen/configs/tools/custom_tools/claude_code_custom_tool_with_mcp_example.yaml +40 -0
  28. massgen/configs/tools/custom_tools/claude_code_custom_tool_with_wrong_mcp_example.yaml +38 -0
  29. massgen/configs/tools/custom_tools/claude_code_wrong_custom_tool_with_mcp_example.yaml +38 -0
  30. massgen/configs/tools/custom_tools/claude_custom_tool_example.yaml +24 -0
  31. massgen/configs/tools/custom_tools/claude_custom_tool_example_no_path.yaml +22 -0
  32. massgen/configs/tools/custom_tools/claude_custom_tool_with_mcp_example.yaml +35 -0
  33. massgen/configs/tools/custom_tools/claude_custom_tool_with_wrong_mcp_example.yaml +33 -0
  34. massgen/configs/tools/custom_tools/claude_wrong_custom_tool_with_mcp_example.yaml +33 -0
  35. massgen/configs/tools/custom_tools/gemini_custom_tool_example.yaml +24 -0
  36. massgen/configs/tools/custom_tools/gemini_custom_tool_example_no_path.yaml +22 -0
  37. massgen/configs/tools/custom_tools/gemini_custom_tool_with_mcp_example.yaml +35 -0
  38. massgen/configs/tools/custom_tools/gemini_custom_tool_with_wrong_mcp_example.yaml +33 -0
  39. massgen/configs/tools/custom_tools/gemini_wrong_custom_tool_with_mcp_example.yaml +33 -0
  40. massgen/configs/tools/custom_tools/github_issue_market_analysis.yaml +94 -0
  41. massgen/configs/tools/custom_tools/gpt5_nano_custom_tool_example.yaml +24 -0
  42. massgen/configs/tools/custom_tools/gpt5_nano_custom_tool_example_no_path.yaml +22 -0
  43. massgen/configs/tools/custom_tools/gpt5_nano_custom_tool_with_mcp_example.yaml +35 -0
  44. massgen/configs/tools/custom_tools/gpt5_nano_custom_tool_with_wrong_mcp_example.yaml +33 -0
  45. massgen/configs/tools/custom_tools/gpt5_nano_wrong_custom_tool_with_mcp_example.yaml +33 -0
  46. massgen/configs/tools/custom_tools/gpt_oss_custom_tool_example.yaml +25 -0
  47. massgen/configs/tools/custom_tools/gpt_oss_custom_tool_example_no_path.yaml +23 -0
  48. massgen/configs/tools/custom_tools/gpt_oss_custom_tool_with_mcp_example.yaml +34 -0
  49. massgen/configs/tools/custom_tools/gpt_oss_custom_tool_with_wrong_mcp_example.yaml +34 -0
  50. massgen/configs/tools/custom_tools/gpt_oss_wrong_custom_tool_with_mcp_example.yaml +34 -0
  51. massgen/configs/tools/custom_tools/grok3_mini_custom_tool_example.yaml +24 -0
  52. massgen/configs/tools/custom_tools/grok3_mini_custom_tool_example_no_path.yaml +22 -0
  53. massgen/configs/tools/custom_tools/grok3_mini_custom_tool_with_mcp_example.yaml +35 -0
  54. massgen/configs/tools/custom_tools/grok3_mini_custom_tool_with_wrong_mcp_example.yaml +33 -0
  55. massgen/configs/tools/custom_tools/grok3_mini_wrong_custom_tool_with_mcp_example.yaml +33 -0
  56. massgen/configs/tools/custom_tools/qwen_api_custom_tool_example.yaml +25 -0
  57. massgen/configs/tools/custom_tools/qwen_api_custom_tool_example_no_path.yaml +23 -0
  58. massgen/configs/tools/custom_tools/qwen_api_custom_tool_with_mcp_example.yaml +36 -0
  59. massgen/configs/tools/custom_tools/qwen_api_custom_tool_with_wrong_mcp_example.yaml +34 -0
  60. massgen/configs/tools/custom_tools/qwen_api_wrong_custom_tool_with_mcp_example.yaml +34 -0
  61. massgen/configs/tools/custom_tools/qwen_local_custom_tool_example.yaml +24 -0
  62. massgen/configs/tools/custom_tools/qwen_local_custom_tool_example_no_path.yaml +22 -0
  63. massgen/configs/tools/custom_tools/qwen_local_custom_tool_with_mcp_example.yaml +35 -0
  64. massgen/configs/tools/custom_tools/qwen_local_custom_tool_with_wrong_mcp_example.yaml +33 -0
  65. massgen/configs/tools/custom_tools/qwen_local_wrong_custom_tool_with_mcp_example.yaml +33 -0
  66. massgen/configs/tools/filesystem/claude_code_context_sharing.yaml +1 -1
  67. massgen/configs/voting/gemini_gpt_voting_sensitivity.yaml +67 -0
  68. massgen/formatter/_chat_completions_formatter.py +104 -0
  69. massgen/formatter/_claude_formatter.py +120 -0
  70. massgen/formatter/_gemini_formatter.py +448 -0
  71. massgen/formatter/_response_formatter.py +88 -0
  72. massgen/frontend/coordination_ui.py +4 -2
  73. massgen/logger_config.py +35 -3
  74. massgen/message_templates.py +56 -6
  75. massgen/orchestrator.py +179 -10
  76. massgen/stream_chunk/base.py +3 -0
  77. massgen/tests/custom_tools_example.py +392 -0
  78. massgen/tests/mcp_test_server.py +17 -7
  79. massgen/tests/test_config_builder.py +423 -0
  80. massgen/tests/test_custom_tools.py +401 -0
  81. massgen/tests/test_tools.py +127 -0
  82. massgen/tool/README.md +935 -0
  83. massgen/tool/__init__.py +39 -0
  84. massgen/tool/_async_helpers.py +70 -0
  85. massgen/tool/_basic/__init__.py +8 -0
  86. massgen/tool/_basic/_two_num_tool.py +24 -0
  87. massgen/tool/_code_executors/__init__.py +10 -0
  88. massgen/tool/_code_executors/_python_executor.py +74 -0
  89. massgen/tool/_code_executors/_shell_executor.py +61 -0
  90. massgen/tool/_exceptions.py +39 -0
  91. massgen/tool/_file_handlers/__init__.py +10 -0
  92. massgen/tool/_file_handlers/_file_operations.py +218 -0
  93. massgen/tool/_manager.py +634 -0
  94. massgen/tool/_registered_tool.py +88 -0
  95. massgen/tool/_result.py +66 -0
  96. massgen/tool/_self_evolution/_github_issue_analyzer.py +369 -0
  97. massgen/tool/docs/builtin_tools.md +681 -0
  98. massgen/tool/docs/exceptions.md +794 -0
  99. massgen/tool/docs/execution_results.md +691 -0
  100. massgen/tool/docs/manager.md +887 -0
  101. massgen/tool/docs/workflow_toolkits.md +529 -0
  102. massgen/tool/workflow_toolkits/__init__.py +57 -0
  103. massgen/tool/workflow_toolkits/base.py +55 -0
  104. massgen/tool/workflow_toolkits/new_answer.py +126 -0
  105. massgen/tool/workflow_toolkits/vote.py +167 -0
  106. {massgen-0.1.0a2.dist-info → massgen-0.1.1.dist-info}/METADATA +89 -131
  107. {massgen-0.1.0a2.dist-info → massgen-0.1.1.dist-info}/RECORD +111 -36
  108. {massgen-0.1.0a2.dist-info → massgen-0.1.1.dist-info}/WHEEL +0 -0
  109. {massgen-0.1.0a2.dist-info → massgen-0.1.1.dist-info}/entry_points.txt +0 -0
  110. {massgen-0.1.0a2.dist-info → massgen-0.1.1.dist-info}/licenses/LICENSE +0 -0
  111. {massgen-0.1.0a2.dist-info → massgen-0.1.1.dist-info}/top_level.txt +0 -0
@@ -241,6 +241,94 @@ class ResponseFormatter(FormatterBase):
241
241
 
242
242
  return converted_tools
243
243
 
244
+ def format_custom_tools(self, custom_tools: Dict[str, Any]) -> List[Dict[str, Any]]:
245
+ """
246
+ Convert custom tools from RegisteredToolEntry format to Response API format.
247
+
248
+ Custom tools are provided as a dictionary where:
249
+ - Keys are tool names (str)
250
+ - Values are RegisteredToolEntry objects with:
251
+ - tool_name: str
252
+ - schema_def: dict with structure {"type": "function", "function": {...}}
253
+ - get_extended_schema: property that returns the schema with extensions
254
+
255
+ Response API expects: {"type": "function", "name": ..., "description": ..., "parameters": ...}
256
+
257
+ Args:
258
+ custom_tools: Dictionary of tool_name -> RegisteredToolEntry objects
259
+
260
+ Returns:
261
+ List of tools in Response API format
262
+ """
263
+ if not custom_tools:
264
+ return []
265
+
266
+ converted_tools = []
267
+
268
+ # Handle dictionary format: {tool_name: RegisteredToolEntry, ...}
269
+ if isinstance(custom_tools, dict):
270
+ for tool_name, tool_entry in custom_tools.items():
271
+ # Check if it's a RegisteredToolEntry object with schema_def
272
+ if hasattr(tool_entry, "schema_def"):
273
+ tool_schema = tool_entry.schema_def
274
+
275
+ # Extract function details from Chat Completions format
276
+ if tool_schema.get("type") == "function" and "function" in tool_schema:
277
+ func = tool_schema["function"]
278
+ converted_tools.append(
279
+ {
280
+ "type": "function",
281
+ "name": func.get("name", tool_entry.tool_name if hasattr(tool_entry, "tool_name") else tool_name),
282
+ "description": func.get("description", ""),
283
+ "parameters": func.get("parameters", {}),
284
+ },
285
+ )
286
+ # Check if it has get_extended_schema property
287
+ elif hasattr(tool_entry, "get_extended_schema"):
288
+ tool_schema = tool_entry.get_extended_schema
289
+
290
+ if tool_schema.get("type") == "function" and "function" in tool_schema:
291
+ func = tool_schema["function"]
292
+ converted_tools.append(
293
+ {
294
+ "type": "function",
295
+ "name": func.get("name", tool_entry.tool_name if hasattr(tool_entry, "tool_name") else tool_name),
296
+ "description": func.get("description", ""),
297
+ "parameters": func.get("parameters", {}),
298
+ },
299
+ )
300
+ # Handle list format for backward compatibility
301
+ elif isinstance(custom_tools, list):
302
+ for tool in custom_tools:
303
+ if hasattr(tool, "schema_def"):
304
+ tool_schema = tool.schema_def
305
+
306
+ if tool_schema.get("type") == "function" and "function" in tool_schema:
307
+ func = tool_schema["function"]
308
+ converted_tools.append(
309
+ {
310
+ "type": "function",
311
+ "name": func.get("name", tool.tool_name),
312
+ "description": func.get("description", ""),
313
+ "parameters": func.get("parameters", {}),
314
+ },
315
+ )
316
+ elif hasattr(tool, "get_extended_schema"):
317
+ tool_schema = tool.get_extended_schema
318
+
319
+ if tool_schema.get("type") == "function" and "function" in tool_schema:
320
+ func = tool_schema["function"]
321
+ converted_tools.append(
322
+ {
323
+ "type": "function",
324
+ "name": func.get("name", tool.tool_name),
325
+ "description": func.get("description", ""),
326
+ "parameters": func.get("parameters", {}),
327
+ },
328
+ )
329
+
330
+ return converted_tools
331
+
244
332
  def format_mcp_tools(self, mcp_functions: Dict[str, Any]) -> List[Dict[str, Any]]:
245
333
  """Convert MCP tools to Response API format (OpenAI function declarations)."""
246
334
  if not mcp_functions:
@@ -315,7 +315,8 @@ class CoordinationUI:
315
315
  # time.sleep(1.0)
316
316
 
317
317
  # Get final presentation from winning agent
318
- if self.enable_final_presentation and selected_agent and vote_results.get("vote_counts"):
318
+ # Run final presentation if enabled and there's a selected agent (regardless of votes)
319
+ if self.enable_final_presentation and selected_agent:
319
320
  # Don't print - let the display handle it
320
321
  # print(f"\n🎤 Final Presentation from {selected_agent}:")
321
322
  # print("=" * 60)
@@ -691,7 +692,8 @@ class CoordinationUI:
691
692
  # time.sleep(1.0)
692
693
 
693
694
  # Get final presentation from winning agent
694
- if self.enable_final_presentation and selected_agent and vote_results.get("vote_counts"):
695
+ # Run final presentation if enabled and there's a selected agent (regardless of votes)
696
+ if self.enable_final_presentation and selected_agent:
695
697
  # Don't print - let the display handle it
696
698
  # print(f"\n🎤 Final Presentation from {selected_agent}:")
697
699
  # print("=" * 60)
massgen/logger_config.py CHANGED
@@ -16,13 +16,21 @@ Color Scheme for Debug Logging:
16
16
  """
17
17
 
18
18
  import inspect
19
+ import subprocess
19
20
  import sys
20
21
  from datetime import datetime
21
22
  from pathlib import Path
22
23
  from typing import Any, Optional
23
24
 
25
+ import yaml
24
26
  from loguru import logger
25
27
 
28
+ # Try to import massgen for version info (optional)
29
+ try:
30
+ import massgen
31
+ except ImportError:
32
+ massgen = None
33
+
26
34
  # Remove default logger to have full control
27
35
  logger.remove()
28
36
 
@@ -93,7 +101,12 @@ def get_log_session_dir(turn: Optional[int] = None) -> Path:
93
101
  return _LOG_SESSION_DIR
94
102
 
95
103
 
96
- def save_execution_metadata(query: str, config_path: Optional[str] = None, config_content: Optional[dict] = None):
104
+ def save_execution_metadata(
105
+ query: str,
106
+ config_path: Optional[str] = None,
107
+ config_content: Optional[dict] = None,
108
+ cli_args: Optional[dict] = None,
109
+ ):
97
110
  """Save the query and config metadata to the log directory.
98
111
 
99
112
  This allows reconstructing what was executed in this session.
@@ -102,9 +115,8 @@ def save_execution_metadata(query: str, config_path: Optional[str] = None, confi
102
115
  query: The user's query/prompt
103
116
  config_path: Path to the config file that was used (optional)
104
117
  config_content: The actual config dictionary (optional)
118
+ cli_args: Command line arguments as dict (optional)
105
119
  """
106
- import yaml
107
-
108
120
  log_dir = get_log_session_dir()
109
121
 
110
122
  # Create a single metadata file with all execution info
@@ -119,6 +131,26 @@ def save_execution_metadata(query: str, config_path: Optional[str] = None, confi
119
131
  if config_content:
120
132
  metadata["config"] = config_content
121
133
 
134
+ if cli_args:
135
+ metadata["cli_args"] = cli_args
136
+
137
+ # Try to get git information if in a git repository
138
+ try:
139
+ git_commit = subprocess.check_output(["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL, text=True).strip()
140
+ git_branch = subprocess.check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"], stderr=subprocess.DEVNULL, text=True).strip()
141
+ metadata["git"] = {"commit": git_commit, "branch": git_branch}
142
+ except (subprocess.CalledProcessError, FileNotFoundError):
143
+ # Not in a git repo or git not available
144
+ pass
145
+
146
+ # Add Python version and package version
147
+ metadata["python_version"] = sys.version
148
+ if massgen is not None:
149
+ metadata["massgen_version"] = getattr(massgen, "__version__", "unknown")
150
+
151
+ # Add working directory
152
+ metadata["working_directory"] = str(Path.cwd())
153
+
122
154
  metadata_file = log_dir / "execution_metadata.yaml"
123
155
  try:
124
156
  with open(metadata_file, "w", encoding="utf-8") as f:
@@ -10,8 +10,22 @@ from typing import Any, Dict, List, Optional
10
10
  class MessageTemplates:
11
11
  """Message templates implementing the proven MassGen approach."""
12
12
 
13
- def __init__(self, **template_overrides):
14
- """Initialize with optional template overrides."""
13
+ def __init__(self, voting_sensitivity: str = "lenient", answer_novelty_requirement: str = "lenient", **template_overrides):
14
+ """Initialize with optional template overrides.
15
+
16
+ Args:
17
+ voting_sensitivity: Controls how critical agents are when voting.
18
+ - "lenient": Agents vote YES more easily, fewer new answers (default)
19
+ - "balanced": Agents apply detailed criteria (comprehensive, accurate, complete?)
20
+ - "strict": Agents apply high standards of excellence (all aspects, edge cases, reference-quality)
21
+ answer_novelty_requirement: Controls how different new answers must be.
22
+ - "lenient": No additional checks (default)
23
+ - "balanced": Require meaningful differences
24
+ - "strict": Require substantially different solutions
25
+ **template_overrides: Custom template strings to override defaults
26
+ """
27
+ self._voting_sensitivity = voting_sensitivity
28
+ self._answer_novelty_requirement = answer_novelty_requirement
15
29
  self._template_overrides = template_overrides
16
30
 
17
31
  # =============================================================================
@@ -57,14 +71,50 @@ class MessageTemplates:
57
71
  # Make sure you actually call `vote` or `new_answer` (in tool call format).
58
72
  #
59
73
  # *Note*: The CURRENT TIME is **{time.strftime("%Y-%m-%d %H:%M:%S")}**."""
74
+ # Determine evaluation criteria based on voting sensitivity
75
+ if self._voting_sensitivity == "strict":
76
+ evaluation_section = """Does the best CURRENT ANSWER address the ORIGINAL MESSAGE exceptionally well? Consider:
77
+ - Is it comprehensive, addressing ALL aspects and edge cases?
78
+ - Is it technically accurate and well-reasoned?
79
+ - Does it provide clear explanations and proper justification?
80
+ - Is it complete with no significant gaps or weaknesses?
81
+ - Could it serve as a reference-quality solution?
82
+
83
+ Only use the `vote` tool if the best answer meets high standards of excellence."""
84
+ elif self._voting_sensitivity == "balanced":
85
+ evaluation_section = """Does the best CURRENT ANSWER address the ORIGINAL MESSAGE well? Consider:
86
+ - Is it comprehensive, accurate, and complete?
87
+ - Could it be meaningfully improved, refined, or expanded?
88
+ - Are there weaknesses, gaps, or better approaches?
89
+
90
+ Only use the `vote` tool if the best answer is strong and complete."""
91
+ else:
92
+ # Default to lenient (including explicit "lenient" or any other value)
93
+ evaluation_section = """Does the best CURRENT ANSWER address the ORIGINAL MESSAGE well?
94
+
95
+ If YES, use the `vote` tool to record your vote and skip the `new_answer` tool."""
96
+
97
+ # Add novelty requirement instructions if not lenient
98
+ novelty_section = ""
99
+ if self._answer_novelty_requirement == "balanced":
100
+ novelty_section = """
101
+ IMPORTANT: If you provide a new answer, it must be meaningfully different from existing answers.
102
+ - Don't just rephrase or reword existing solutions
103
+ - Introduce new insights, approaches, or tools
104
+ - Make substantive improvements, not cosmetic changes"""
105
+ elif self._answer_novelty_requirement == "strict":
106
+ novelty_section = """
107
+ CRITICAL: New answers must be SUBSTANTIALLY different from existing answers.
108
+ - Use a fundamentally different approach or methodology
109
+ - Employ different tools or techniques
110
+ - Provide significantly more depth or novel perspectives
111
+ - If you cannot provide a truly novel solution, vote instead"""
60
112
 
61
113
  return f"""You are evaluating answers from multiple agents for final response to a message.
62
114
  Different agents may have different builtin tools and capabilities.
63
- Does the best CURRENT ANSWER address the ORIGINAL MESSAGE well?
64
-
65
- If YES, use the `vote` tool to record your vote and skip the `new_answer` tool.
115
+ {evaluation_section}
66
116
  Otherwise, digest existing answers, combine their strengths, and do additional work to address their weaknesses,
67
- then use the `new_answer` tool to record a better answer to the ORIGINAL MESSAGE.
117
+ then use the `new_answer` tool to record a better answer to the ORIGINAL MESSAGE.{novelty_section}
68
118
  Make sure you actually call `vote` or `new_answer` (in tool call format).
69
119
 
70
120
  *Note*: The CURRENT TIME is **{time.strftime("%Y-%m-%d %H:%M:%S")}**."""
massgen/orchestrator.py CHANGED
@@ -44,6 +44,7 @@ from .logger_config import (
44
44
  )
45
45
  from .message_templates import MessageTemplates
46
46
  from .stream_chunk import ChunkType
47
+ from .tool import get_workflow_tools
47
48
  from .utils import ActionType, AgentStatus, CoordinationStage
48
49
 
49
50
 
@@ -137,9 +138,16 @@ class Orchestrator(ChatAgent):
137
138
  self.config = config or AgentConfig.create_openai_config()
138
139
 
139
140
  # Get message templates from config
140
- self.message_templates = self.config.message_templates or MessageTemplates()
141
- # Create workflow tools for agents (vote and new_answer)
142
- self.workflow_tools = self.message_templates.get_standard_tools(list(agents.keys()))
141
+ self.message_templates = self.config.message_templates or MessageTemplates(
142
+ voting_sensitivity=self.config.voting_sensitivity,
143
+ answer_novelty_requirement=self.config.answer_novelty_requirement,
144
+ )
145
+ # Create workflow tools for agents (vote and new_answer) using new toolkit system
146
+ self.workflow_tools = get_workflow_tools(
147
+ valid_agent_ids=list(agents.keys()),
148
+ template_overrides=getattr(self.message_templates, "_template_overrides", {}),
149
+ api_format="chat_completions", # Default format, will be overridden per backend
150
+ )
143
151
 
144
152
  # MassGen-specific state
145
153
  self.current_task: Optional[str] = None
@@ -841,8 +849,8 @@ class Orchestrator(ChatAgent):
841
849
  # Generate single timestamp for answer/vote and workspace
842
850
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
843
851
 
844
- # Save answer if provided
845
- if answer_content:
852
+ # Save answer if provided (or create final directory structure even if empty)
853
+ if answer_content is not None or is_final:
846
854
  try:
847
855
  log_session_dir = get_log_session_dir()
848
856
  if log_session_dir:
@@ -855,8 +863,9 @@ class Orchestrator(ChatAgent):
855
863
  timestamped_dir.mkdir(parents=True, exist_ok=True)
856
864
  answer_file = timestamped_dir / "answer.txt"
857
865
 
858
- # Write the answer content
859
- answer_file.write_text(answer_content)
866
+ # Write the answer content (even if empty for final snapshots)
867
+ content_to_write = answer_content if answer_content is not None else ""
868
+ answer_file.write_text(content_to_write)
860
869
  logger.info(f"[Orchestrator._save_agent_snapshot] Saved answer to {answer_file}")
861
870
 
862
871
  except Exception as e:
@@ -935,7 +944,7 @@ class Orchestrator(ChatAgent):
935
944
  logger.info(f"[Orchestrator._save_agent_snapshot] Agent {agent_id} does not have filesystem_manager")
936
945
 
937
946
  # Save context if provided (unified context saving)
938
- if context_data and (answer_content or vote_data):
947
+ if context_data:
939
948
  try:
940
949
  log_session_dir = get_log_session_dir()
941
950
  if log_session_dir:
@@ -944,6 +953,8 @@ class Orchestrator(ChatAgent):
944
953
  else:
945
954
  timestamped_dir = log_session_dir / agent_id / timestamp
946
955
 
956
+ # Ensure directory exists (may not have been created if no answer/vote)
957
+ timestamped_dir.mkdir(parents=True, exist_ok=True)
947
958
  context_file = timestamped_dir / "context.txt"
948
959
 
949
960
  # Handle different types of context data
@@ -1122,6 +1133,91 @@ class Orchestrator(ChatAgent):
1122
1133
  # # Implementation will check against PermissionManager
1123
1134
  # pass
1124
1135
 
1136
+ def _calculate_jaccard_similarity(self, text1: str, text2: str) -> float:
1137
+ """Calculate Jaccard similarity between two texts based on word tokens.
1138
+
1139
+ Args:
1140
+ text1: First text to compare
1141
+ text2: Second text to compare
1142
+
1143
+ Returns:
1144
+ Similarity score between 0.0 and 1.0
1145
+ """
1146
+ # Tokenize and normalize - simple word-based approach
1147
+ words1 = set(text1.lower().split())
1148
+ words2 = set(text2.lower().split())
1149
+
1150
+ if not words1 and not words2:
1151
+ return 1.0 # Both empty, consider identical
1152
+ if not words1 or not words2:
1153
+ return 0.0 # One empty, one not
1154
+
1155
+ intersection = len(words1 & words2)
1156
+ union = len(words1 | words2)
1157
+
1158
+ return intersection / union if union > 0 else 0.0
1159
+
1160
+ def _check_answer_novelty(self, new_answer: str, existing_answers: Dict[str, str]) -> tuple[bool, Optional[str]]:
1161
+ """Check if a new answer is sufficiently different from existing answers.
1162
+
1163
+ Args:
1164
+ new_answer: The proposed new answer
1165
+ existing_answers: Dictionary of existing answers {agent_id: answer_content}
1166
+
1167
+ Returns:
1168
+ Tuple of (is_novel, error_message). is_novel=True if answer passes novelty check.
1169
+ """
1170
+ # Lenient mode: no checks (current behavior)
1171
+ if self.config.answer_novelty_requirement == "lenient":
1172
+ return (True, None)
1173
+
1174
+ # Determine threshold based on setting
1175
+ if self.config.answer_novelty_requirement == "strict":
1176
+ threshold = 0.50 # Reject if >50% overlap (strict)
1177
+ error_msg = (
1178
+ "Your answer is too similar to existing answers (>50% overlap). Please use a fundamentally different approach, employ different tools/techniques, or vote for an existing answer."
1179
+ )
1180
+ else: # balanced
1181
+ threshold = 0.70 # Reject if >70% overlap (balanced)
1182
+ error_msg = (
1183
+ "Your answer is too similar to existing answers (>70% overlap). "
1184
+ "Please provide a meaningfully different solution with new insights, "
1185
+ "approaches, or tools, or vote for an existing answer."
1186
+ )
1187
+
1188
+ # Check similarity against all existing answers
1189
+ for agent_id, existing_answer in existing_answers.items():
1190
+ similarity = self._calculate_jaccard_similarity(new_answer, existing_answer)
1191
+ if similarity > threshold:
1192
+ logger.info(f"[Orchestrator] Answer rejected: {similarity:.2%} similar to {agent_id}'s answer (threshold: {threshold:.0%})")
1193
+ return (False, error_msg)
1194
+
1195
+ # Answer is sufficiently novel
1196
+ return (True, None)
1197
+
1198
+ def _check_answer_count_limit(self, agent_id: str) -> tuple[bool, Optional[str]]:
1199
+ """Check if agent has reached their answer count limit.
1200
+
1201
+ Args:
1202
+ agent_id: The agent attempting to provide a new answer
1203
+
1204
+ Returns:
1205
+ Tuple of (can_answer, error_message). can_answer=True if agent can provide another answer.
1206
+ """
1207
+ # No limit set
1208
+ if self.config.max_new_answers_per_agent is None:
1209
+ return (True, None)
1210
+
1211
+ # Count how many answers this agent has provided
1212
+ answer_count = len(self.coordination_tracker.answers_by_agent.get(agent_id, []))
1213
+
1214
+ if answer_count >= self.config.max_new_answers_per_agent:
1215
+ error_msg = f"You've reached the maximum of {self.config.max_new_answers_per_agent} new answer(s). Please vote for the best existing answer using the `vote` tool."
1216
+ logger.info(f"[Orchestrator] Answer rejected: {agent_id} has reached limit ({answer_count}/{self.config.max_new_answers_per_agent})")
1217
+ return (False, error_msg)
1218
+
1219
+ return (True, None)
1220
+
1125
1221
  def _create_tool_error_messages(
1126
1222
  self,
1127
1223
  agent: "ChatAgent",
@@ -1443,6 +1539,10 @@ class Orchestrator(ChatAgent):
1443
1539
  # Forward MCP status messages with proper formatting
1444
1540
  mcp_content = f"🔧 MCP: {chunk.content}"
1445
1541
  yield ("content", mcp_content)
1542
+ elif chunk_type == "custom_tool_status":
1543
+ # Forward custom tool status messages with proper formatting
1544
+ custom_tool_content = f"🔧 Custom Tool: {chunk.content}"
1545
+ yield ("content", custom_tool_content)
1446
1546
  elif chunk_type == "debug":
1447
1547
  # Forward debug chunks
1448
1548
  yield ("debug", chunk.content)
@@ -1660,6 +1760,54 @@ class Orchestrator(ChatAgent):
1660
1760
  # Agent provided new answer
1661
1761
  content = tool_args.get("content", response_text.strip())
1662
1762
 
1763
+ # Check answer count limit
1764
+ can_answer, count_error = self._check_answer_count_limit(agent_id)
1765
+ if not can_answer:
1766
+ if attempt < max_attempts - 1:
1767
+ if self._check_restart_pending(agent_id):
1768
+ await self._save_partial_work_on_restart(agent_id)
1769
+ yield (
1770
+ "content",
1771
+ f"🔁 [{agent_id}] gracefully restarting due to new answer detected\n",
1772
+ )
1773
+ yield ("done", None)
1774
+ return
1775
+ yield ("content", f"❌ {count_error}")
1776
+ # Create proper tool error message for retry
1777
+ enforcement_msg = self._create_tool_error_messages(agent, [tool_call], count_error)
1778
+ continue
1779
+ else:
1780
+ yield (
1781
+ "error",
1782
+ f"Answer count limit reached after {max_attempts} attempts",
1783
+ )
1784
+ yield ("done", None)
1785
+ return
1786
+
1787
+ # Check answer novelty (similarity to existing answers)
1788
+ is_novel, novelty_error = self._check_answer_novelty(content, answers)
1789
+ if not is_novel:
1790
+ if attempt < max_attempts - 1:
1791
+ if self._check_restart_pending(agent_id):
1792
+ await self._save_partial_work_on_restart(agent_id)
1793
+ yield (
1794
+ "content",
1795
+ f"🔁 [{agent_id}] gracefully restarting due to new answer detected\n",
1796
+ )
1797
+ yield ("done", None)
1798
+ return
1799
+ yield ("content", f"❌ {novelty_error}")
1800
+ # Create proper tool error message for retry
1801
+ enforcement_msg = self._create_tool_error_messages(agent, [tool_call], novelty_error)
1802
+ continue
1803
+ else:
1804
+ yield (
1805
+ "error",
1806
+ f"Answer novelty requirement not met after {max_attempts} attempts",
1807
+ )
1808
+ yield ("done", None)
1809
+ return
1810
+
1663
1811
  # Check for duplicate answer
1664
1812
  # Normalize both new content and existing content to neutral paths for comparison
1665
1813
  normalized_new_content = self._normalize_workspace_paths_for_comparison(content)
@@ -1695,6 +1843,9 @@ class Orchestrator(ChatAgent):
1695
1843
  return
1696
1844
  elif tool_name.startswith("mcp"):
1697
1845
  pass
1846
+ elif tool_name.startswith("custom_tool"):
1847
+ # Custom tools are handled by the backend and their results are streamed separately
1848
+ pass
1698
1849
  else:
1699
1850
  # Non-workflow tools not yet implemented
1700
1851
  yield (
@@ -2056,6 +2207,7 @@ class Orchestrator(ChatAgent):
2056
2207
 
2057
2208
  # Use agent's chat method with proper system message (reset chat for clean presentation)
2058
2209
  presentation_content = ""
2210
+ final_snapshot_saved = False # Track whether snapshot was saved during stream
2059
2211
 
2060
2212
  try:
2061
2213
  # Track final round iterations (each chunk is like an iteration)
@@ -2121,6 +2273,9 @@ class Orchestrator(ChatAgent):
2121
2273
  # Track the final answer in coordination tracker
2122
2274
  self.coordination_tracker.set_final_answer(selected_agent_id, final_answer, snapshot_timestamp="final")
2123
2275
 
2276
+ # Mark snapshot as saved
2277
+ final_snapshot_saved = True
2278
+
2124
2279
  log_stream_chunk("orchestrator", "done", None, selected_agent_id)
2125
2280
  yield StreamChunk(type="done", source=selected_agent_id)
2126
2281
  elif chunk_type == "error":
@@ -2139,7 +2294,7 @@ class Orchestrator(ChatAgent):
2139
2294
  type=chunk_type,
2140
2295
  content=getattr(chunk, "content", ""),
2141
2296
  source=selected_agent_id,
2142
- **{k: v for k, v in chunk.__dict__.items() if k not in ["type", "content", "source"]},
2297
+ **{k: v for k, v in chunk.__dict__.items() if k not in ["type", "content", "source", "timestamp", "sequence_number"]},
2143
2298
  )
2144
2299
  else:
2145
2300
  log_stream_chunk(
@@ -2152,10 +2307,24 @@ class Orchestrator(ChatAgent):
2152
2307
  type=chunk_type,
2153
2308
  content=getattr(chunk, "content", ""),
2154
2309
  source=selected_agent_id,
2155
- **{k: v for k, v in chunk.__dict__.items() if k not in ["type", "content", "source"]},
2310
+ **{k: v for k, v in chunk.__dict__.items() if k not in ["type", "content", "source", "timestamp", "sequence_number"]},
2156
2311
  )
2157
2312
 
2158
2313
  finally:
2314
+ # Ensure final snapshot is always saved (even if "done" chunk wasn't yielded)
2315
+ if not final_snapshot_saved:
2316
+ final_answer = presentation_content.strip() if presentation_content.strip() else self.agent_states[selected_agent_id].answer
2317
+ final_context = self.get_last_context(selected_agent_id)
2318
+ await self._save_agent_snapshot(
2319
+ self._selected_agent,
2320
+ answer_content=final_answer,
2321
+ is_final=True,
2322
+ context_data=final_context,
2323
+ )
2324
+
2325
+ # Track the final answer in coordination tracker
2326
+ self.coordination_tracker.set_final_answer(selected_agent_id, final_answer, snapshot_timestamp="final")
2327
+
2159
2328
  # Store the final presentation content for logging
2160
2329
  if presentation_content.strip():
2161
2330
  # Store the synthesized final answer
@@ -33,6 +33,9 @@ class ChunkType(Enum):
33
33
  # MCP-related chunks
34
34
  MCP_STATUS = "mcp_status"
35
35
 
36
+ # Custom tool chunks
37
+ CUSTOM_TOOL_STATUS = "custom_tool_status"
38
+
36
39
  # Multimodal chunks
37
40
  MEDIA = "media"
38
41
  MEDIA_PROGRESS = "media_progress"