massgen 0.1.0a3__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of massgen might be problematic. Click here for more details.
- massgen/__init__.py +1 -1
- massgen/agent_config.py +17 -0
- massgen/api_params_handler/_api_params_handler_base.py +1 -0
- massgen/api_params_handler/_chat_completions_api_params_handler.py +8 -1
- massgen/api_params_handler/_claude_api_params_handler.py +8 -1
- massgen/api_params_handler/_gemini_api_params_handler.py +73 -0
- massgen/api_params_handler/_response_api_params_handler.py +8 -1
- massgen/backend/base.py +31 -0
- massgen/backend/{base_with_mcp.py → base_with_custom_tool_and_mcp.py} +282 -11
- massgen/backend/chat_completions.py +182 -92
- massgen/backend/claude.py +115 -18
- massgen/backend/claude_code.py +378 -14
- massgen/backend/docs/CLAUDE_API_RESEARCH.md +3 -3
- massgen/backend/gemini.py +1275 -1607
- massgen/backend/gemini_mcp_manager.py +545 -0
- massgen/backend/gemini_trackers.py +344 -0
- massgen/backend/gemini_utils.py +43 -0
- massgen/backend/response.py +129 -70
- massgen/cli.py +577 -110
- massgen/config_builder.py +376 -27
- massgen/configs/README.md +111 -80
- massgen/configs/basic/multi/three_agents_default.yaml +1 -1
- massgen/configs/basic/single/single_agent.yaml +1 -1
- massgen/configs/providers/openai/gpt5_nano.yaml +3 -3
- massgen/configs/tools/custom_tools/claude_code_custom_tool_example.yaml +32 -0
- massgen/configs/tools/custom_tools/claude_code_custom_tool_example_no_path.yaml +28 -0
- massgen/configs/tools/custom_tools/claude_code_custom_tool_with_mcp_example.yaml +40 -0
- massgen/configs/tools/custom_tools/claude_code_custom_tool_with_wrong_mcp_example.yaml +38 -0
- massgen/configs/tools/custom_tools/claude_code_wrong_custom_tool_with_mcp_example.yaml +38 -0
- massgen/configs/tools/custom_tools/claude_custom_tool_example.yaml +24 -0
- massgen/configs/tools/custom_tools/claude_custom_tool_example_no_path.yaml +22 -0
- massgen/configs/tools/custom_tools/claude_custom_tool_with_mcp_example.yaml +35 -0
- massgen/configs/tools/custom_tools/claude_custom_tool_with_wrong_mcp_example.yaml +33 -0
- massgen/configs/tools/custom_tools/claude_wrong_custom_tool_with_mcp_example.yaml +33 -0
- massgen/configs/tools/custom_tools/gemini_custom_tool_example.yaml +24 -0
- massgen/configs/tools/custom_tools/gemini_custom_tool_example_no_path.yaml +22 -0
- massgen/configs/tools/custom_tools/gemini_custom_tool_with_mcp_example.yaml +35 -0
- massgen/configs/tools/custom_tools/gemini_custom_tool_with_wrong_mcp_example.yaml +33 -0
- massgen/configs/tools/custom_tools/gemini_wrong_custom_tool_with_mcp_example.yaml +33 -0
- massgen/configs/tools/custom_tools/github_issue_market_analysis.yaml +94 -0
- massgen/configs/tools/custom_tools/gpt5_nano_custom_tool_example.yaml +24 -0
- massgen/configs/tools/custom_tools/gpt5_nano_custom_tool_example_no_path.yaml +22 -0
- massgen/configs/tools/custom_tools/gpt5_nano_custom_tool_with_mcp_example.yaml +35 -0
- massgen/configs/tools/custom_tools/gpt5_nano_custom_tool_with_wrong_mcp_example.yaml +33 -0
- massgen/configs/tools/custom_tools/gpt5_nano_wrong_custom_tool_with_mcp_example.yaml +33 -0
- massgen/configs/tools/custom_tools/gpt_oss_custom_tool_example.yaml +25 -0
- massgen/configs/tools/custom_tools/gpt_oss_custom_tool_example_no_path.yaml +23 -0
- massgen/configs/tools/custom_tools/gpt_oss_custom_tool_with_mcp_example.yaml +34 -0
- massgen/configs/tools/custom_tools/gpt_oss_custom_tool_with_wrong_mcp_example.yaml +34 -0
- massgen/configs/tools/custom_tools/gpt_oss_wrong_custom_tool_with_mcp_example.yaml +34 -0
- massgen/configs/tools/custom_tools/grok3_mini_custom_tool_example.yaml +24 -0
- massgen/configs/tools/custom_tools/grok3_mini_custom_tool_example_no_path.yaml +22 -0
- massgen/configs/tools/custom_tools/grok3_mini_custom_tool_with_mcp_example.yaml +35 -0
- massgen/configs/tools/custom_tools/grok3_mini_custom_tool_with_wrong_mcp_example.yaml +33 -0
- massgen/configs/tools/custom_tools/grok3_mini_wrong_custom_tool_with_mcp_example.yaml +33 -0
- massgen/configs/tools/custom_tools/qwen_api_custom_tool_example.yaml +25 -0
- massgen/configs/tools/custom_tools/qwen_api_custom_tool_example_no_path.yaml +23 -0
- massgen/configs/tools/custom_tools/qwen_api_custom_tool_with_mcp_example.yaml +36 -0
- massgen/configs/tools/custom_tools/qwen_api_custom_tool_with_wrong_mcp_example.yaml +34 -0
- massgen/configs/tools/custom_tools/qwen_api_wrong_custom_tool_with_mcp_example.yaml +34 -0
- massgen/configs/tools/custom_tools/qwen_local_custom_tool_example.yaml +24 -0
- massgen/configs/tools/custom_tools/qwen_local_custom_tool_example_no_path.yaml +22 -0
- massgen/configs/tools/custom_tools/qwen_local_custom_tool_with_mcp_example.yaml +35 -0
- massgen/configs/tools/custom_tools/qwen_local_custom_tool_with_wrong_mcp_example.yaml +33 -0
- massgen/configs/tools/custom_tools/qwen_local_wrong_custom_tool_with_mcp_example.yaml +33 -0
- massgen/configs/tools/filesystem/claude_code_context_sharing.yaml +1 -1
- massgen/configs/voting/gemini_gpt_voting_sensitivity.yaml +67 -0
- massgen/formatter/_chat_completions_formatter.py +104 -0
- massgen/formatter/_claude_formatter.py +120 -0
- massgen/formatter/_gemini_formatter.py +448 -0
- massgen/formatter/_response_formatter.py +88 -0
- massgen/frontend/coordination_ui.py +4 -2
- massgen/logger_config.py +35 -3
- massgen/message_templates.py +56 -6
- massgen/orchestrator.py +179 -10
- massgen/stream_chunk/base.py +3 -0
- massgen/tests/custom_tools_example.py +392 -0
- massgen/tests/mcp_test_server.py +17 -7
- massgen/tests/test_config_builder.py +423 -0
- massgen/tests/test_custom_tools.py +401 -0
- massgen/tests/test_tools.py +127 -0
- massgen/tool/README.md +935 -0
- massgen/tool/__init__.py +39 -0
- massgen/tool/_async_helpers.py +70 -0
- massgen/tool/_basic/__init__.py +8 -0
- massgen/tool/_basic/_two_num_tool.py +24 -0
- massgen/tool/_code_executors/__init__.py +10 -0
- massgen/tool/_code_executors/_python_executor.py +74 -0
- massgen/tool/_code_executors/_shell_executor.py +61 -0
- massgen/tool/_exceptions.py +39 -0
- massgen/tool/_file_handlers/__init__.py +10 -0
- massgen/tool/_file_handlers/_file_operations.py +218 -0
- massgen/tool/_manager.py +634 -0
- massgen/tool/_registered_tool.py +88 -0
- massgen/tool/_result.py +66 -0
- massgen/tool/_self_evolution/_github_issue_analyzer.py +369 -0
- massgen/tool/docs/builtin_tools.md +681 -0
- massgen/tool/docs/exceptions.md +794 -0
- massgen/tool/docs/execution_results.md +691 -0
- massgen/tool/docs/manager.md +887 -0
- massgen/tool/docs/workflow_toolkits.md +529 -0
- massgen/tool/workflow_toolkits/__init__.py +57 -0
- massgen/tool/workflow_toolkits/base.py +55 -0
- massgen/tool/workflow_toolkits/new_answer.py +126 -0
- massgen/tool/workflow_toolkits/vote.py +167 -0
- {massgen-0.1.0a3.dist-info → massgen-0.1.1.dist-info}/METADATA +89 -131
- {massgen-0.1.0a3.dist-info → massgen-0.1.1.dist-info}/RECORD +111 -36
- {massgen-0.1.0a3.dist-info → massgen-0.1.1.dist-info}/WHEEL +0 -0
- {massgen-0.1.0a3.dist-info → massgen-0.1.1.dist-info}/entry_points.txt +0 -0
- {massgen-0.1.0a3.dist-info → massgen-0.1.1.dist-info}/licenses/LICENSE +0 -0
- {massgen-0.1.0a3.dist-info → massgen-0.1.1.dist-info}/top_level.txt +0 -0
|
@@ -241,6 +241,94 @@ class ResponseFormatter(FormatterBase):
|
|
|
241
241
|
|
|
242
242
|
return converted_tools
|
|
243
243
|
|
|
244
|
+
def format_custom_tools(self, custom_tools: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
245
|
+
"""
|
|
246
|
+
Convert custom tools from RegisteredToolEntry format to Response API format.
|
|
247
|
+
|
|
248
|
+
Custom tools are provided as a dictionary where:
|
|
249
|
+
- Keys are tool names (str)
|
|
250
|
+
- Values are RegisteredToolEntry objects with:
|
|
251
|
+
- tool_name: str
|
|
252
|
+
- schema_def: dict with structure {"type": "function", "function": {...}}
|
|
253
|
+
- get_extended_schema: property that returns the schema with extensions
|
|
254
|
+
|
|
255
|
+
Response API expects: {"type": "function", "name": ..., "description": ..., "parameters": ...}
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
custom_tools: Dictionary of tool_name -> RegisteredToolEntry objects
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
List of tools in Response API format
|
|
262
|
+
"""
|
|
263
|
+
if not custom_tools:
|
|
264
|
+
return []
|
|
265
|
+
|
|
266
|
+
converted_tools = []
|
|
267
|
+
|
|
268
|
+
# Handle dictionary format: {tool_name: RegisteredToolEntry, ...}
|
|
269
|
+
if isinstance(custom_tools, dict):
|
|
270
|
+
for tool_name, tool_entry in custom_tools.items():
|
|
271
|
+
# Check if it's a RegisteredToolEntry object with schema_def
|
|
272
|
+
if hasattr(tool_entry, "schema_def"):
|
|
273
|
+
tool_schema = tool_entry.schema_def
|
|
274
|
+
|
|
275
|
+
# Extract function details from Chat Completions format
|
|
276
|
+
if tool_schema.get("type") == "function" and "function" in tool_schema:
|
|
277
|
+
func = tool_schema["function"]
|
|
278
|
+
converted_tools.append(
|
|
279
|
+
{
|
|
280
|
+
"type": "function",
|
|
281
|
+
"name": func.get("name", tool_entry.tool_name if hasattr(tool_entry, "tool_name") else tool_name),
|
|
282
|
+
"description": func.get("description", ""),
|
|
283
|
+
"parameters": func.get("parameters", {}),
|
|
284
|
+
},
|
|
285
|
+
)
|
|
286
|
+
# Check if it has get_extended_schema property
|
|
287
|
+
elif hasattr(tool_entry, "get_extended_schema"):
|
|
288
|
+
tool_schema = tool_entry.get_extended_schema
|
|
289
|
+
|
|
290
|
+
if tool_schema.get("type") == "function" and "function" in tool_schema:
|
|
291
|
+
func = tool_schema["function"]
|
|
292
|
+
converted_tools.append(
|
|
293
|
+
{
|
|
294
|
+
"type": "function",
|
|
295
|
+
"name": func.get("name", tool_entry.tool_name if hasattr(tool_entry, "tool_name") else tool_name),
|
|
296
|
+
"description": func.get("description", ""),
|
|
297
|
+
"parameters": func.get("parameters", {}),
|
|
298
|
+
},
|
|
299
|
+
)
|
|
300
|
+
# Handle list format for backward compatibility
|
|
301
|
+
elif isinstance(custom_tools, list):
|
|
302
|
+
for tool in custom_tools:
|
|
303
|
+
if hasattr(tool, "schema_def"):
|
|
304
|
+
tool_schema = tool.schema_def
|
|
305
|
+
|
|
306
|
+
if tool_schema.get("type") == "function" and "function" in tool_schema:
|
|
307
|
+
func = tool_schema["function"]
|
|
308
|
+
converted_tools.append(
|
|
309
|
+
{
|
|
310
|
+
"type": "function",
|
|
311
|
+
"name": func.get("name", tool.tool_name),
|
|
312
|
+
"description": func.get("description", ""),
|
|
313
|
+
"parameters": func.get("parameters", {}),
|
|
314
|
+
},
|
|
315
|
+
)
|
|
316
|
+
elif hasattr(tool, "get_extended_schema"):
|
|
317
|
+
tool_schema = tool.get_extended_schema
|
|
318
|
+
|
|
319
|
+
if tool_schema.get("type") == "function" and "function" in tool_schema:
|
|
320
|
+
func = tool_schema["function"]
|
|
321
|
+
converted_tools.append(
|
|
322
|
+
{
|
|
323
|
+
"type": "function",
|
|
324
|
+
"name": func.get("name", tool.tool_name),
|
|
325
|
+
"description": func.get("description", ""),
|
|
326
|
+
"parameters": func.get("parameters", {}),
|
|
327
|
+
},
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
return converted_tools
|
|
331
|
+
|
|
244
332
|
def format_mcp_tools(self, mcp_functions: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
245
333
|
"""Convert MCP tools to Response API format (OpenAI function declarations)."""
|
|
246
334
|
if not mcp_functions:
|
|
@@ -315,7 +315,8 @@ class CoordinationUI:
|
|
|
315
315
|
# time.sleep(1.0)
|
|
316
316
|
|
|
317
317
|
# Get final presentation from winning agent
|
|
318
|
-
if
|
|
318
|
+
# Run final presentation if enabled and there's a selected agent (regardless of votes)
|
|
319
|
+
if self.enable_final_presentation and selected_agent:
|
|
319
320
|
# Don't print - let the display handle it
|
|
320
321
|
# print(f"\n🎤 Final Presentation from {selected_agent}:")
|
|
321
322
|
# print("=" * 60)
|
|
@@ -691,7 +692,8 @@ class CoordinationUI:
|
|
|
691
692
|
# time.sleep(1.0)
|
|
692
693
|
|
|
693
694
|
# Get final presentation from winning agent
|
|
694
|
-
if
|
|
695
|
+
# Run final presentation if enabled and there's a selected agent (regardless of votes)
|
|
696
|
+
if self.enable_final_presentation and selected_agent:
|
|
695
697
|
# Don't print - let the display handle it
|
|
696
698
|
# print(f"\n🎤 Final Presentation from {selected_agent}:")
|
|
697
699
|
# print("=" * 60)
|
massgen/logger_config.py
CHANGED
|
@@ -16,13 +16,21 @@ Color Scheme for Debug Logging:
|
|
|
16
16
|
"""
|
|
17
17
|
|
|
18
18
|
import inspect
|
|
19
|
+
import subprocess
|
|
19
20
|
import sys
|
|
20
21
|
from datetime import datetime
|
|
21
22
|
from pathlib import Path
|
|
22
23
|
from typing import Any, Optional
|
|
23
24
|
|
|
25
|
+
import yaml
|
|
24
26
|
from loguru import logger
|
|
25
27
|
|
|
28
|
+
# Try to import massgen for version info (optional)
|
|
29
|
+
try:
|
|
30
|
+
import massgen
|
|
31
|
+
except ImportError:
|
|
32
|
+
massgen = None
|
|
33
|
+
|
|
26
34
|
# Remove default logger to have full control
|
|
27
35
|
logger.remove()
|
|
28
36
|
|
|
@@ -93,7 +101,12 @@ def get_log_session_dir(turn: Optional[int] = None) -> Path:
|
|
|
93
101
|
return _LOG_SESSION_DIR
|
|
94
102
|
|
|
95
103
|
|
|
96
|
-
def save_execution_metadata(
|
|
104
|
+
def save_execution_metadata(
|
|
105
|
+
query: str,
|
|
106
|
+
config_path: Optional[str] = None,
|
|
107
|
+
config_content: Optional[dict] = None,
|
|
108
|
+
cli_args: Optional[dict] = None,
|
|
109
|
+
):
|
|
97
110
|
"""Save the query and config metadata to the log directory.
|
|
98
111
|
|
|
99
112
|
This allows reconstructing what was executed in this session.
|
|
@@ -102,9 +115,8 @@ def save_execution_metadata(query: str, config_path: Optional[str] = None, confi
|
|
|
102
115
|
query: The user's query/prompt
|
|
103
116
|
config_path: Path to the config file that was used (optional)
|
|
104
117
|
config_content: The actual config dictionary (optional)
|
|
118
|
+
cli_args: Command line arguments as dict (optional)
|
|
105
119
|
"""
|
|
106
|
-
import yaml
|
|
107
|
-
|
|
108
120
|
log_dir = get_log_session_dir()
|
|
109
121
|
|
|
110
122
|
# Create a single metadata file with all execution info
|
|
@@ -119,6 +131,26 @@ def save_execution_metadata(query: str, config_path: Optional[str] = None, confi
|
|
|
119
131
|
if config_content:
|
|
120
132
|
metadata["config"] = config_content
|
|
121
133
|
|
|
134
|
+
if cli_args:
|
|
135
|
+
metadata["cli_args"] = cli_args
|
|
136
|
+
|
|
137
|
+
# Try to get git information if in a git repository
|
|
138
|
+
try:
|
|
139
|
+
git_commit = subprocess.check_output(["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL, text=True).strip()
|
|
140
|
+
git_branch = subprocess.check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"], stderr=subprocess.DEVNULL, text=True).strip()
|
|
141
|
+
metadata["git"] = {"commit": git_commit, "branch": git_branch}
|
|
142
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
143
|
+
# Not in a git repo or git not available
|
|
144
|
+
pass
|
|
145
|
+
|
|
146
|
+
# Add Python version and package version
|
|
147
|
+
metadata["python_version"] = sys.version
|
|
148
|
+
if massgen is not None:
|
|
149
|
+
metadata["massgen_version"] = getattr(massgen, "__version__", "unknown")
|
|
150
|
+
|
|
151
|
+
# Add working directory
|
|
152
|
+
metadata["working_directory"] = str(Path.cwd())
|
|
153
|
+
|
|
122
154
|
metadata_file = log_dir / "execution_metadata.yaml"
|
|
123
155
|
try:
|
|
124
156
|
with open(metadata_file, "w", encoding="utf-8") as f:
|
massgen/message_templates.py
CHANGED
|
@@ -10,8 +10,22 @@ from typing import Any, Dict, List, Optional
|
|
|
10
10
|
class MessageTemplates:
|
|
11
11
|
"""Message templates implementing the proven MassGen approach."""
|
|
12
12
|
|
|
13
|
-
def __init__(self, **template_overrides):
|
|
14
|
-
"""Initialize with optional template overrides.
|
|
13
|
+
def __init__(self, voting_sensitivity: str = "lenient", answer_novelty_requirement: str = "lenient", **template_overrides):
|
|
14
|
+
"""Initialize with optional template overrides.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
voting_sensitivity: Controls how critical agents are when voting.
|
|
18
|
+
- "lenient": Agents vote YES more easily, fewer new answers (default)
|
|
19
|
+
- "balanced": Agents apply detailed criteria (comprehensive, accurate, complete?)
|
|
20
|
+
- "strict": Agents apply high standards of excellence (all aspects, edge cases, reference-quality)
|
|
21
|
+
answer_novelty_requirement: Controls how different new answers must be.
|
|
22
|
+
- "lenient": No additional checks (default)
|
|
23
|
+
- "balanced": Require meaningful differences
|
|
24
|
+
- "strict": Require substantially different solutions
|
|
25
|
+
**template_overrides: Custom template strings to override defaults
|
|
26
|
+
"""
|
|
27
|
+
self._voting_sensitivity = voting_sensitivity
|
|
28
|
+
self._answer_novelty_requirement = answer_novelty_requirement
|
|
15
29
|
self._template_overrides = template_overrides
|
|
16
30
|
|
|
17
31
|
# =============================================================================
|
|
@@ -57,14 +71,50 @@ class MessageTemplates:
|
|
|
57
71
|
# Make sure you actually call `vote` or `new_answer` (in tool call format).
|
|
58
72
|
#
|
|
59
73
|
# *Note*: The CURRENT TIME is **{time.strftime("%Y-%m-%d %H:%M:%S")}**."""
|
|
74
|
+
# Determine evaluation criteria based on voting sensitivity
|
|
75
|
+
if self._voting_sensitivity == "strict":
|
|
76
|
+
evaluation_section = """Does the best CURRENT ANSWER address the ORIGINAL MESSAGE exceptionally well? Consider:
|
|
77
|
+
- Is it comprehensive, addressing ALL aspects and edge cases?
|
|
78
|
+
- Is it technically accurate and well-reasoned?
|
|
79
|
+
- Does it provide clear explanations and proper justification?
|
|
80
|
+
- Is it complete with no significant gaps or weaknesses?
|
|
81
|
+
- Could it serve as a reference-quality solution?
|
|
82
|
+
|
|
83
|
+
Only use the `vote` tool if the best answer meets high standards of excellence."""
|
|
84
|
+
elif self._voting_sensitivity == "balanced":
|
|
85
|
+
evaluation_section = """Does the best CURRENT ANSWER address the ORIGINAL MESSAGE well? Consider:
|
|
86
|
+
- Is it comprehensive, accurate, and complete?
|
|
87
|
+
- Could it be meaningfully improved, refined, or expanded?
|
|
88
|
+
- Are there weaknesses, gaps, or better approaches?
|
|
89
|
+
|
|
90
|
+
Only use the `vote` tool if the best answer is strong and complete."""
|
|
91
|
+
else:
|
|
92
|
+
# Default to lenient (including explicit "lenient" or any other value)
|
|
93
|
+
evaluation_section = """Does the best CURRENT ANSWER address the ORIGINAL MESSAGE well?
|
|
94
|
+
|
|
95
|
+
If YES, use the `vote` tool to record your vote and skip the `new_answer` tool."""
|
|
96
|
+
|
|
97
|
+
# Add novelty requirement instructions if not lenient
|
|
98
|
+
novelty_section = ""
|
|
99
|
+
if self._answer_novelty_requirement == "balanced":
|
|
100
|
+
novelty_section = """
|
|
101
|
+
IMPORTANT: If you provide a new answer, it must be meaningfully different from existing answers.
|
|
102
|
+
- Don't just rephrase or reword existing solutions
|
|
103
|
+
- Introduce new insights, approaches, or tools
|
|
104
|
+
- Make substantive improvements, not cosmetic changes"""
|
|
105
|
+
elif self._answer_novelty_requirement == "strict":
|
|
106
|
+
novelty_section = """
|
|
107
|
+
CRITICAL: New answers must be SUBSTANTIALLY different from existing answers.
|
|
108
|
+
- Use a fundamentally different approach or methodology
|
|
109
|
+
- Employ different tools or techniques
|
|
110
|
+
- Provide significantly more depth or novel perspectives
|
|
111
|
+
- If you cannot provide a truly novel solution, vote instead"""
|
|
60
112
|
|
|
61
113
|
return f"""You are evaluating answers from multiple agents for final response to a message.
|
|
62
114
|
Different agents may have different builtin tools and capabilities.
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
If YES, use the `vote` tool to record your vote and skip the `new_answer` tool.
|
|
115
|
+
{evaluation_section}
|
|
66
116
|
Otherwise, digest existing answers, combine their strengths, and do additional work to address their weaknesses,
|
|
67
|
-
then use the `new_answer` tool to record a better answer to the ORIGINAL MESSAGE.
|
|
117
|
+
then use the `new_answer` tool to record a better answer to the ORIGINAL MESSAGE.{novelty_section}
|
|
68
118
|
Make sure you actually call `vote` or `new_answer` (in tool call format).
|
|
69
119
|
|
|
70
120
|
*Note*: The CURRENT TIME is **{time.strftime("%Y-%m-%d %H:%M:%S")}**."""
|
massgen/orchestrator.py
CHANGED
|
@@ -44,6 +44,7 @@ from .logger_config import (
|
|
|
44
44
|
)
|
|
45
45
|
from .message_templates import MessageTemplates
|
|
46
46
|
from .stream_chunk import ChunkType
|
|
47
|
+
from .tool import get_workflow_tools
|
|
47
48
|
from .utils import ActionType, AgentStatus, CoordinationStage
|
|
48
49
|
|
|
49
50
|
|
|
@@ -137,9 +138,16 @@ class Orchestrator(ChatAgent):
|
|
|
137
138
|
self.config = config or AgentConfig.create_openai_config()
|
|
138
139
|
|
|
139
140
|
# Get message templates from config
|
|
140
|
-
self.message_templates = self.config.message_templates or MessageTemplates(
|
|
141
|
-
|
|
142
|
-
|
|
141
|
+
self.message_templates = self.config.message_templates or MessageTemplates(
|
|
142
|
+
voting_sensitivity=self.config.voting_sensitivity,
|
|
143
|
+
answer_novelty_requirement=self.config.answer_novelty_requirement,
|
|
144
|
+
)
|
|
145
|
+
# Create workflow tools for agents (vote and new_answer) using new toolkit system
|
|
146
|
+
self.workflow_tools = get_workflow_tools(
|
|
147
|
+
valid_agent_ids=list(agents.keys()),
|
|
148
|
+
template_overrides=getattr(self.message_templates, "_template_overrides", {}),
|
|
149
|
+
api_format="chat_completions", # Default format, will be overridden per backend
|
|
150
|
+
)
|
|
143
151
|
|
|
144
152
|
# MassGen-specific state
|
|
145
153
|
self.current_task: Optional[str] = None
|
|
@@ -841,8 +849,8 @@ class Orchestrator(ChatAgent):
|
|
|
841
849
|
# Generate single timestamp for answer/vote and workspace
|
|
842
850
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
|
843
851
|
|
|
844
|
-
# Save answer if provided
|
|
845
|
-
if answer_content:
|
|
852
|
+
# Save answer if provided (or create final directory structure even if empty)
|
|
853
|
+
if answer_content is not None or is_final:
|
|
846
854
|
try:
|
|
847
855
|
log_session_dir = get_log_session_dir()
|
|
848
856
|
if log_session_dir:
|
|
@@ -855,8 +863,9 @@ class Orchestrator(ChatAgent):
|
|
|
855
863
|
timestamped_dir.mkdir(parents=True, exist_ok=True)
|
|
856
864
|
answer_file = timestamped_dir / "answer.txt"
|
|
857
865
|
|
|
858
|
-
# Write the answer content
|
|
859
|
-
|
|
866
|
+
# Write the answer content (even if empty for final snapshots)
|
|
867
|
+
content_to_write = answer_content if answer_content is not None else ""
|
|
868
|
+
answer_file.write_text(content_to_write)
|
|
860
869
|
logger.info(f"[Orchestrator._save_agent_snapshot] Saved answer to {answer_file}")
|
|
861
870
|
|
|
862
871
|
except Exception as e:
|
|
@@ -935,7 +944,7 @@ class Orchestrator(ChatAgent):
|
|
|
935
944
|
logger.info(f"[Orchestrator._save_agent_snapshot] Agent {agent_id} does not have filesystem_manager")
|
|
936
945
|
|
|
937
946
|
# Save context if provided (unified context saving)
|
|
938
|
-
if context_data
|
|
947
|
+
if context_data:
|
|
939
948
|
try:
|
|
940
949
|
log_session_dir = get_log_session_dir()
|
|
941
950
|
if log_session_dir:
|
|
@@ -944,6 +953,8 @@ class Orchestrator(ChatAgent):
|
|
|
944
953
|
else:
|
|
945
954
|
timestamped_dir = log_session_dir / agent_id / timestamp
|
|
946
955
|
|
|
956
|
+
# Ensure directory exists (may not have been created if no answer/vote)
|
|
957
|
+
timestamped_dir.mkdir(parents=True, exist_ok=True)
|
|
947
958
|
context_file = timestamped_dir / "context.txt"
|
|
948
959
|
|
|
949
960
|
# Handle different types of context data
|
|
@@ -1122,6 +1133,91 @@ class Orchestrator(ChatAgent):
|
|
|
1122
1133
|
# # Implementation will check against PermissionManager
|
|
1123
1134
|
# pass
|
|
1124
1135
|
|
|
1136
|
+
def _calculate_jaccard_similarity(self, text1: str, text2: str) -> float:
|
|
1137
|
+
"""Calculate Jaccard similarity between two texts based on word tokens.
|
|
1138
|
+
|
|
1139
|
+
Args:
|
|
1140
|
+
text1: First text to compare
|
|
1141
|
+
text2: Second text to compare
|
|
1142
|
+
|
|
1143
|
+
Returns:
|
|
1144
|
+
Similarity score between 0.0 and 1.0
|
|
1145
|
+
"""
|
|
1146
|
+
# Tokenize and normalize - simple word-based approach
|
|
1147
|
+
words1 = set(text1.lower().split())
|
|
1148
|
+
words2 = set(text2.lower().split())
|
|
1149
|
+
|
|
1150
|
+
if not words1 and not words2:
|
|
1151
|
+
return 1.0 # Both empty, consider identical
|
|
1152
|
+
if not words1 or not words2:
|
|
1153
|
+
return 0.0 # One empty, one not
|
|
1154
|
+
|
|
1155
|
+
intersection = len(words1 & words2)
|
|
1156
|
+
union = len(words1 | words2)
|
|
1157
|
+
|
|
1158
|
+
return intersection / union if union > 0 else 0.0
|
|
1159
|
+
|
|
1160
|
+
def _check_answer_novelty(self, new_answer: str, existing_answers: Dict[str, str]) -> tuple[bool, Optional[str]]:
|
|
1161
|
+
"""Check if a new answer is sufficiently different from existing answers.
|
|
1162
|
+
|
|
1163
|
+
Args:
|
|
1164
|
+
new_answer: The proposed new answer
|
|
1165
|
+
existing_answers: Dictionary of existing answers {agent_id: answer_content}
|
|
1166
|
+
|
|
1167
|
+
Returns:
|
|
1168
|
+
Tuple of (is_novel, error_message). is_novel=True if answer passes novelty check.
|
|
1169
|
+
"""
|
|
1170
|
+
# Lenient mode: no checks (current behavior)
|
|
1171
|
+
if self.config.answer_novelty_requirement == "lenient":
|
|
1172
|
+
return (True, None)
|
|
1173
|
+
|
|
1174
|
+
# Determine threshold based on setting
|
|
1175
|
+
if self.config.answer_novelty_requirement == "strict":
|
|
1176
|
+
threshold = 0.50 # Reject if >50% overlap (strict)
|
|
1177
|
+
error_msg = (
|
|
1178
|
+
"Your answer is too similar to existing answers (>50% overlap). Please use a fundamentally different approach, employ different tools/techniques, or vote for an existing answer."
|
|
1179
|
+
)
|
|
1180
|
+
else: # balanced
|
|
1181
|
+
threshold = 0.70 # Reject if >70% overlap (balanced)
|
|
1182
|
+
error_msg = (
|
|
1183
|
+
"Your answer is too similar to existing answers (>70% overlap). "
|
|
1184
|
+
"Please provide a meaningfully different solution with new insights, "
|
|
1185
|
+
"approaches, or tools, or vote for an existing answer."
|
|
1186
|
+
)
|
|
1187
|
+
|
|
1188
|
+
# Check similarity against all existing answers
|
|
1189
|
+
for agent_id, existing_answer in existing_answers.items():
|
|
1190
|
+
similarity = self._calculate_jaccard_similarity(new_answer, existing_answer)
|
|
1191
|
+
if similarity > threshold:
|
|
1192
|
+
logger.info(f"[Orchestrator] Answer rejected: {similarity:.2%} similar to {agent_id}'s answer (threshold: {threshold:.0%})")
|
|
1193
|
+
return (False, error_msg)
|
|
1194
|
+
|
|
1195
|
+
# Answer is sufficiently novel
|
|
1196
|
+
return (True, None)
|
|
1197
|
+
|
|
1198
|
+
def _check_answer_count_limit(self, agent_id: str) -> tuple[bool, Optional[str]]:
|
|
1199
|
+
"""Check if agent has reached their answer count limit.
|
|
1200
|
+
|
|
1201
|
+
Args:
|
|
1202
|
+
agent_id: The agent attempting to provide a new answer
|
|
1203
|
+
|
|
1204
|
+
Returns:
|
|
1205
|
+
Tuple of (can_answer, error_message). can_answer=True if agent can provide another answer.
|
|
1206
|
+
"""
|
|
1207
|
+
# No limit set
|
|
1208
|
+
if self.config.max_new_answers_per_agent is None:
|
|
1209
|
+
return (True, None)
|
|
1210
|
+
|
|
1211
|
+
# Count how many answers this agent has provided
|
|
1212
|
+
answer_count = len(self.coordination_tracker.answers_by_agent.get(agent_id, []))
|
|
1213
|
+
|
|
1214
|
+
if answer_count >= self.config.max_new_answers_per_agent:
|
|
1215
|
+
error_msg = f"You've reached the maximum of {self.config.max_new_answers_per_agent} new answer(s). Please vote for the best existing answer using the `vote` tool."
|
|
1216
|
+
logger.info(f"[Orchestrator] Answer rejected: {agent_id} has reached limit ({answer_count}/{self.config.max_new_answers_per_agent})")
|
|
1217
|
+
return (False, error_msg)
|
|
1218
|
+
|
|
1219
|
+
return (True, None)
|
|
1220
|
+
|
|
1125
1221
|
def _create_tool_error_messages(
|
|
1126
1222
|
self,
|
|
1127
1223
|
agent: "ChatAgent",
|
|
@@ -1443,6 +1539,10 @@ class Orchestrator(ChatAgent):
|
|
|
1443
1539
|
# Forward MCP status messages with proper formatting
|
|
1444
1540
|
mcp_content = f"🔧 MCP: {chunk.content}"
|
|
1445
1541
|
yield ("content", mcp_content)
|
|
1542
|
+
elif chunk_type == "custom_tool_status":
|
|
1543
|
+
# Forward custom tool status messages with proper formatting
|
|
1544
|
+
custom_tool_content = f"🔧 Custom Tool: {chunk.content}"
|
|
1545
|
+
yield ("content", custom_tool_content)
|
|
1446
1546
|
elif chunk_type == "debug":
|
|
1447
1547
|
# Forward debug chunks
|
|
1448
1548
|
yield ("debug", chunk.content)
|
|
@@ -1660,6 +1760,54 @@ class Orchestrator(ChatAgent):
|
|
|
1660
1760
|
# Agent provided new answer
|
|
1661
1761
|
content = tool_args.get("content", response_text.strip())
|
|
1662
1762
|
|
|
1763
|
+
# Check answer count limit
|
|
1764
|
+
can_answer, count_error = self._check_answer_count_limit(agent_id)
|
|
1765
|
+
if not can_answer:
|
|
1766
|
+
if attempt < max_attempts - 1:
|
|
1767
|
+
if self._check_restart_pending(agent_id):
|
|
1768
|
+
await self._save_partial_work_on_restart(agent_id)
|
|
1769
|
+
yield (
|
|
1770
|
+
"content",
|
|
1771
|
+
f"🔁 [{agent_id}] gracefully restarting due to new answer detected\n",
|
|
1772
|
+
)
|
|
1773
|
+
yield ("done", None)
|
|
1774
|
+
return
|
|
1775
|
+
yield ("content", f"❌ {count_error}")
|
|
1776
|
+
# Create proper tool error message for retry
|
|
1777
|
+
enforcement_msg = self._create_tool_error_messages(agent, [tool_call], count_error)
|
|
1778
|
+
continue
|
|
1779
|
+
else:
|
|
1780
|
+
yield (
|
|
1781
|
+
"error",
|
|
1782
|
+
f"Answer count limit reached after {max_attempts} attempts",
|
|
1783
|
+
)
|
|
1784
|
+
yield ("done", None)
|
|
1785
|
+
return
|
|
1786
|
+
|
|
1787
|
+
# Check answer novelty (similarity to existing answers)
|
|
1788
|
+
is_novel, novelty_error = self._check_answer_novelty(content, answers)
|
|
1789
|
+
if not is_novel:
|
|
1790
|
+
if attempt < max_attempts - 1:
|
|
1791
|
+
if self._check_restart_pending(agent_id):
|
|
1792
|
+
await self._save_partial_work_on_restart(agent_id)
|
|
1793
|
+
yield (
|
|
1794
|
+
"content",
|
|
1795
|
+
f"🔁 [{agent_id}] gracefully restarting due to new answer detected\n",
|
|
1796
|
+
)
|
|
1797
|
+
yield ("done", None)
|
|
1798
|
+
return
|
|
1799
|
+
yield ("content", f"❌ {novelty_error}")
|
|
1800
|
+
# Create proper tool error message for retry
|
|
1801
|
+
enforcement_msg = self._create_tool_error_messages(agent, [tool_call], novelty_error)
|
|
1802
|
+
continue
|
|
1803
|
+
else:
|
|
1804
|
+
yield (
|
|
1805
|
+
"error",
|
|
1806
|
+
f"Answer novelty requirement not met after {max_attempts} attempts",
|
|
1807
|
+
)
|
|
1808
|
+
yield ("done", None)
|
|
1809
|
+
return
|
|
1810
|
+
|
|
1663
1811
|
# Check for duplicate answer
|
|
1664
1812
|
# Normalize both new content and existing content to neutral paths for comparison
|
|
1665
1813
|
normalized_new_content = self._normalize_workspace_paths_for_comparison(content)
|
|
@@ -1695,6 +1843,9 @@ class Orchestrator(ChatAgent):
|
|
|
1695
1843
|
return
|
|
1696
1844
|
elif tool_name.startswith("mcp"):
|
|
1697
1845
|
pass
|
|
1846
|
+
elif tool_name.startswith("custom_tool"):
|
|
1847
|
+
# Custom tools are handled by the backend and their results are streamed separately
|
|
1848
|
+
pass
|
|
1698
1849
|
else:
|
|
1699
1850
|
# Non-workflow tools not yet implemented
|
|
1700
1851
|
yield (
|
|
@@ -2056,6 +2207,7 @@ class Orchestrator(ChatAgent):
|
|
|
2056
2207
|
|
|
2057
2208
|
# Use agent's chat method with proper system message (reset chat for clean presentation)
|
|
2058
2209
|
presentation_content = ""
|
|
2210
|
+
final_snapshot_saved = False # Track whether snapshot was saved during stream
|
|
2059
2211
|
|
|
2060
2212
|
try:
|
|
2061
2213
|
# Track final round iterations (each chunk is like an iteration)
|
|
@@ -2121,6 +2273,9 @@ class Orchestrator(ChatAgent):
|
|
|
2121
2273
|
# Track the final answer in coordination tracker
|
|
2122
2274
|
self.coordination_tracker.set_final_answer(selected_agent_id, final_answer, snapshot_timestamp="final")
|
|
2123
2275
|
|
|
2276
|
+
# Mark snapshot as saved
|
|
2277
|
+
final_snapshot_saved = True
|
|
2278
|
+
|
|
2124
2279
|
log_stream_chunk("orchestrator", "done", None, selected_agent_id)
|
|
2125
2280
|
yield StreamChunk(type="done", source=selected_agent_id)
|
|
2126
2281
|
elif chunk_type == "error":
|
|
@@ -2139,7 +2294,7 @@ class Orchestrator(ChatAgent):
|
|
|
2139
2294
|
type=chunk_type,
|
|
2140
2295
|
content=getattr(chunk, "content", ""),
|
|
2141
2296
|
source=selected_agent_id,
|
|
2142
|
-
**{k: v for k, v in chunk.__dict__.items() if k not in ["type", "content", "source"]},
|
|
2297
|
+
**{k: v for k, v in chunk.__dict__.items() if k not in ["type", "content", "source", "timestamp", "sequence_number"]},
|
|
2143
2298
|
)
|
|
2144
2299
|
else:
|
|
2145
2300
|
log_stream_chunk(
|
|
@@ -2152,10 +2307,24 @@ class Orchestrator(ChatAgent):
|
|
|
2152
2307
|
type=chunk_type,
|
|
2153
2308
|
content=getattr(chunk, "content", ""),
|
|
2154
2309
|
source=selected_agent_id,
|
|
2155
|
-
**{k: v for k, v in chunk.__dict__.items() if k not in ["type", "content", "source"]},
|
|
2310
|
+
**{k: v for k, v in chunk.__dict__.items() if k not in ["type", "content", "source", "timestamp", "sequence_number"]},
|
|
2156
2311
|
)
|
|
2157
2312
|
|
|
2158
2313
|
finally:
|
|
2314
|
+
# Ensure final snapshot is always saved (even if "done" chunk wasn't yielded)
|
|
2315
|
+
if not final_snapshot_saved:
|
|
2316
|
+
final_answer = presentation_content.strip() if presentation_content.strip() else self.agent_states[selected_agent_id].answer
|
|
2317
|
+
final_context = self.get_last_context(selected_agent_id)
|
|
2318
|
+
await self._save_agent_snapshot(
|
|
2319
|
+
self._selected_agent,
|
|
2320
|
+
answer_content=final_answer,
|
|
2321
|
+
is_final=True,
|
|
2322
|
+
context_data=final_context,
|
|
2323
|
+
)
|
|
2324
|
+
|
|
2325
|
+
# Track the final answer in coordination tracker
|
|
2326
|
+
self.coordination_tracker.set_final_answer(selected_agent_id, final_answer, snapshot_timestamp="final")
|
|
2327
|
+
|
|
2159
2328
|
# Store the final presentation content for logging
|
|
2160
2329
|
if presentation_content.strip():
|
|
2161
2330
|
# Store the synthesized final answer
|
massgen/stream_chunk/base.py
CHANGED