massgen 0.0.3__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of massgen might be problematic. Click here for more details.
- massgen/__init__.py +142 -8
- massgen/adapters/__init__.py +29 -0
- massgen/adapters/ag2_adapter.py +483 -0
- massgen/adapters/base.py +183 -0
- massgen/adapters/tests/__init__.py +0 -0
- massgen/adapters/tests/test_ag2_adapter.py +439 -0
- massgen/adapters/tests/test_agent_adapter.py +128 -0
- massgen/adapters/utils/__init__.py +2 -0
- massgen/adapters/utils/ag2_utils.py +236 -0
- massgen/adapters/utils/tests/__init__.py +0 -0
- massgen/adapters/utils/tests/test_ag2_utils.py +138 -0
- massgen/agent_config.py +329 -55
- massgen/api_params_handler/__init__.py +10 -0
- massgen/api_params_handler/_api_params_handler_base.py +99 -0
- massgen/api_params_handler/_chat_completions_api_params_handler.py +176 -0
- massgen/api_params_handler/_claude_api_params_handler.py +113 -0
- massgen/api_params_handler/_response_api_params_handler.py +130 -0
- massgen/backend/__init__.py +39 -4
- massgen/backend/azure_openai.py +385 -0
- massgen/backend/base.py +341 -69
- massgen/backend/base_with_mcp.py +1102 -0
- massgen/backend/capabilities.py +386 -0
- massgen/backend/chat_completions.py +577 -130
- massgen/backend/claude.py +1033 -537
- massgen/backend/claude_code.py +1203 -0
- massgen/backend/cli_base.py +209 -0
- massgen/backend/docs/BACKEND_ARCHITECTURE.md +126 -0
- massgen/backend/{CLAUDE_API_RESEARCH.md → docs/CLAUDE_API_RESEARCH.md} +18 -18
- massgen/backend/{GEMINI_API_DOCUMENTATION.md → docs/GEMINI_API_DOCUMENTATION.md} +9 -9
- massgen/backend/docs/Gemini MCP Integration Analysis.md +1050 -0
- massgen/backend/docs/MCP_IMPLEMENTATION_CLAUDE_BACKEND.md +177 -0
- massgen/backend/docs/MCP_INTEGRATION_RESPONSE_BACKEND.md +352 -0
- massgen/backend/docs/OPENAI_GPT5_MODELS.md +211 -0
- massgen/backend/{OPENAI_RESPONSES_API_FORMAT.md → docs/OPENAI_RESPONSE_API_TOOL_CALLS.md} +3 -3
- massgen/backend/docs/OPENAI_response_streaming.md +20654 -0
- massgen/backend/docs/inference_backend.md +257 -0
- massgen/backend/docs/permissions_and_context_files.md +1085 -0
- massgen/backend/external.py +126 -0
- massgen/backend/gemini.py +1850 -241
- massgen/backend/grok.py +40 -156
- massgen/backend/inference.py +156 -0
- massgen/backend/lmstudio.py +171 -0
- massgen/backend/response.py +1095 -322
- massgen/chat_agent.py +131 -113
- massgen/cli.py +1560 -275
- massgen/config_builder.py +2396 -0
- massgen/configs/BACKEND_CONFIGURATION.md +458 -0
- massgen/configs/README.md +559 -216
- massgen/configs/ag2/ag2_case_study.yaml +27 -0
- massgen/configs/ag2/ag2_coder.yaml +34 -0
- massgen/configs/ag2/ag2_coder_case_study.yaml +36 -0
- massgen/configs/ag2/ag2_gemini.yaml +27 -0
- massgen/configs/ag2/ag2_groupchat.yaml +108 -0
- massgen/configs/ag2/ag2_groupchat_gpt.yaml +118 -0
- massgen/configs/ag2/ag2_single_agent.yaml +21 -0
- massgen/configs/basic/multi/fast_timeout_example.yaml +37 -0
- massgen/configs/basic/multi/gemini_4o_claude.yaml +31 -0
- massgen/configs/basic/multi/gemini_gpt5nano_claude.yaml +36 -0
- massgen/configs/{gemini_4o_claude.yaml → basic/multi/geminicode_4o_claude.yaml} +3 -3
- massgen/configs/basic/multi/geminicode_gpt5nano_claude.yaml +36 -0
- massgen/configs/basic/multi/glm_gemini_claude.yaml +25 -0
- massgen/configs/basic/multi/gpt4o_audio_generation.yaml +30 -0
- massgen/configs/basic/multi/gpt4o_image_generation.yaml +31 -0
- massgen/configs/basic/multi/gpt5nano_glm_qwen.yaml +26 -0
- massgen/configs/basic/multi/gpt5nano_image_understanding.yaml +26 -0
- massgen/configs/{three_agents_default.yaml → basic/multi/three_agents_default.yaml} +8 -4
- massgen/configs/basic/multi/three_agents_opensource.yaml +27 -0
- massgen/configs/basic/multi/three_agents_vllm.yaml +20 -0
- massgen/configs/basic/multi/two_agents_gemini.yaml +19 -0
- massgen/configs/{two_agents.yaml → basic/multi/two_agents_gpt5.yaml} +14 -6
- massgen/configs/basic/multi/two_agents_opensource_lmstudio.yaml +31 -0
- massgen/configs/basic/multi/two_qwen_vllm_sglang.yaml +28 -0
- massgen/configs/{single_agent.yaml → basic/single/single_agent.yaml} +1 -1
- massgen/configs/{single_flash2.5.yaml → basic/single/single_flash2.5.yaml} +1 -2
- massgen/configs/basic/single/single_gemini2.5pro.yaml +16 -0
- massgen/configs/basic/single/single_gpt4o_audio_generation.yaml +22 -0
- massgen/configs/basic/single/single_gpt4o_image_generation.yaml +22 -0
- massgen/configs/basic/single/single_gpt4o_video_generation.yaml +24 -0
- massgen/configs/basic/single/single_gpt5nano.yaml +20 -0
- massgen/configs/basic/single/single_gpt5nano_file_search.yaml +18 -0
- massgen/configs/basic/single/single_gpt5nano_image_understanding.yaml +17 -0
- massgen/configs/basic/single/single_gptoss120b.yaml +15 -0
- massgen/configs/basic/single/single_openrouter_audio_understanding.yaml +15 -0
- massgen/configs/basic/single/single_qwen_video_understanding.yaml +15 -0
- massgen/configs/debug/code_execution/command_filtering_blacklist.yaml +29 -0
- massgen/configs/debug/code_execution/command_filtering_whitelist.yaml +28 -0
- massgen/configs/debug/code_execution/docker_verification.yaml +29 -0
- massgen/configs/debug/skip_coordination_test.yaml +27 -0
- massgen/configs/debug/test_sdk_migration.yaml +17 -0
- massgen/configs/docs/DISCORD_MCP_SETUP.md +208 -0
- massgen/configs/docs/TWITTER_MCP_ENESCINAR_SETUP.md +82 -0
- massgen/configs/providers/azure/azure_openai_multi.yaml +21 -0
- massgen/configs/providers/azure/azure_openai_single.yaml +19 -0
- massgen/configs/providers/claude/claude.yaml +14 -0
- massgen/configs/providers/gemini/gemini_gpt5nano.yaml +28 -0
- massgen/configs/providers/local/lmstudio.yaml +11 -0
- massgen/configs/providers/openai/gpt5.yaml +46 -0
- massgen/configs/providers/openai/gpt5_nano.yaml +46 -0
- massgen/configs/providers/others/grok_single_agent.yaml +19 -0
- massgen/configs/providers/others/zai_coding_team.yaml +108 -0
- massgen/configs/providers/others/zai_glm45.yaml +12 -0
- massgen/configs/{creative_team.yaml → teams/creative/creative_team.yaml} +16 -6
- massgen/configs/{travel_planning.yaml → teams/creative/travel_planning.yaml} +16 -6
- massgen/configs/{news_analysis.yaml → teams/research/news_analysis.yaml} +16 -6
- massgen/configs/{research_team.yaml → teams/research/research_team.yaml} +15 -7
- massgen/configs/{technical_analysis.yaml → teams/research/technical_analysis.yaml} +16 -6
- massgen/configs/tools/code-execution/basic_command_execution.yaml +25 -0
- massgen/configs/tools/code-execution/code_execution_use_case_simple.yaml +41 -0
- massgen/configs/tools/code-execution/docker_claude_code.yaml +32 -0
- massgen/configs/tools/code-execution/docker_multi_agent.yaml +32 -0
- massgen/configs/tools/code-execution/docker_simple.yaml +29 -0
- massgen/configs/tools/code-execution/docker_with_resource_limits.yaml +32 -0
- massgen/configs/tools/code-execution/multi_agent_playwright_automation.yaml +57 -0
- massgen/configs/tools/filesystem/cc_gpt5_gemini_filesystem.yaml +34 -0
- massgen/configs/tools/filesystem/claude_code_context_sharing.yaml +68 -0
- massgen/configs/tools/filesystem/claude_code_flash2.5.yaml +43 -0
- massgen/configs/tools/filesystem/claude_code_flash2.5_gptoss.yaml +49 -0
- massgen/configs/tools/filesystem/claude_code_gpt5nano.yaml +31 -0
- massgen/configs/tools/filesystem/claude_code_single.yaml +40 -0
- massgen/configs/tools/filesystem/fs_permissions_test.yaml +87 -0
- massgen/configs/tools/filesystem/gemini_gemini_workspace_cleanup.yaml +54 -0
- massgen/configs/tools/filesystem/gemini_gpt5_filesystem_casestudy.yaml +30 -0
- massgen/configs/tools/filesystem/gemini_gpt5nano_file_context_path.yaml +43 -0
- massgen/configs/tools/filesystem/gemini_gpt5nano_protected_paths.yaml +45 -0
- massgen/configs/tools/filesystem/gpt5mini_cc_fs_context_path.yaml +31 -0
- massgen/configs/tools/filesystem/grok4_gpt5_gemini_filesystem.yaml +32 -0
- massgen/configs/tools/filesystem/multiturn/grok4_gpt5_claude_code_filesystem_multiturn.yaml +58 -0
- massgen/configs/tools/filesystem/multiturn/grok4_gpt5_gemini_filesystem_multiturn.yaml +58 -0
- massgen/configs/tools/filesystem/multiturn/two_claude_code_filesystem_multiturn.yaml +47 -0
- massgen/configs/tools/filesystem/multiturn/two_gemini_flash_filesystem_multiturn.yaml +48 -0
- massgen/configs/tools/mcp/claude_code_discord_mcp_example.yaml +27 -0
- massgen/configs/tools/mcp/claude_code_simple_mcp.yaml +35 -0
- massgen/configs/tools/mcp/claude_code_twitter_mcp_example.yaml +32 -0
- massgen/configs/tools/mcp/claude_mcp_example.yaml +24 -0
- massgen/configs/tools/mcp/claude_mcp_test.yaml +27 -0
- massgen/configs/tools/mcp/five_agents_travel_mcp_test.yaml +157 -0
- massgen/configs/tools/mcp/five_agents_weather_mcp_test.yaml +103 -0
- massgen/configs/tools/mcp/gemini_mcp_example.yaml +24 -0
- massgen/configs/tools/mcp/gemini_mcp_filesystem_test.yaml +23 -0
- massgen/configs/tools/mcp/gemini_mcp_filesystem_test_sharing.yaml +23 -0
- massgen/configs/tools/mcp/gemini_mcp_filesystem_test_single_agent.yaml +17 -0
- massgen/configs/tools/mcp/gemini_mcp_filesystem_test_with_claude_code.yaml +24 -0
- massgen/configs/tools/mcp/gemini_mcp_test.yaml +27 -0
- massgen/configs/tools/mcp/gemini_notion_mcp.yaml +52 -0
- massgen/configs/tools/mcp/gpt5_nano_mcp_example.yaml +24 -0
- massgen/configs/tools/mcp/gpt5_nano_mcp_test.yaml +27 -0
- massgen/configs/tools/mcp/gpt5mini_claude_code_discord_mcp_example.yaml +38 -0
- massgen/configs/tools/mcp/gpt_oss_mcp_example.yaml +25 -0
- massgen/configs/tools/mcp/gpt_oss_mcp_test.yaml +28 -0
- massgen/configs/tools/mcp/grok3_mini_mcp_example.yaml +24 -0
- massgen/configs/tools/mcp/grok3_mini_mcp_test.yaml +27 -0
- massgen/configs/tools/mcp/multimcp_gemini.yaml +111 -0
- massgen/configs/tools/mcp/qwen_api_mcp_example.yaml +25 -0
- massgen/configs/tools/mcp/qwen_api_mcp_test.yaml +28 -0
- massgen/configs/tools/mcp/qwen_local_mcp_example.yaml +24 -0
- massgen/configs/tools/mcp/qwen_local_mcp_test.yaml +27 -0
- massgen/configs/tools/planning/five_agents_discord_mcp_planning_mode.yaml +140 -0
- massgen/configs/tools/planning/five_agents_filesystem_mcp_planning_mode.yaml +151 -0
- massgen/configs/tools/planning/five_agents_notion_mcp_planning_mode.yaml +151 -0
- massgen/configs/tools/planning/five_agents_twitter_mcp_planning_mode.yaml +155 -0
- massgen/configs/tools/planning/gpt5_mini_case_study_mcp_planning_mode.yaml +73 -0
- massgen/configs/tools/web-search/claude_streamable_http_test.yaml +43 -0
- massgen/configs/tools/web-search/gemini_streamable_http_test.yaml +43 -0
- massgen/configs/tools/web-search/gpt5_mini_streamable_http_test.yaml +43 -0
- massgen/configs/tools/web-search/gpt_oss_streamable_http_test.yaml +44 -0
- massgen/configs/tools/web-search/grok3_mini_streamable_http_test.yaml +43 -0
- massgen/configs/tools/web-search/qwen_api_streamable_http_test.yaml +44 -0
- massgen/configs/tools/web-search/qwen_local_streamable_http_test.yaml +43 -0
- massgen/coordination_tracker.py +708 -0
- massgen/docker/README.md +462 -0
- massgen/filesystem_manager/__init__.py +21 -0
- massgen/filesystem_manager/_base.py +9 -0
- massgen/filesystem_manager/_code_execution_server.py +545 -0
- massgen/filesystem_manager/_docker_manager.py +477 -0
- massgen/filesystem_manager/_file_operation_tracker.py +248 -0
- massgen/filesystem_manager/_filesystem_manager.py +813 -0
- massgen/filesystem_manager/_path_permission_manager.py +1261 -0
- massgen/filesystem_manager/_workspace_tools_server.py +1815 -0
- massgen/formatter/__init__.py +10 -0
- massgen/formatter/_chat_completions_formatter.py +284 -0
- massgen/formatter/_claude_formatter.py +235 -0
- massgen/formatter/_formatter_base.py +156 -0
- massgen/formatter/_response_formatter.py +263 -0
- massgen/frontend/__init__.py +1 -2
- massgen/frontend/coordination_ui.py +471 -286
- massgen/frontend/displays/base_display.py +56 -11
- massgen/frontend/displays/create_coordination_table.py +1956 -0
- massgen/frontend/displays/rich_terminal_display.py +1259 -619
- massgen/frontend/displays/simple_display.py +9 -4
- massgen/frontend/displays/terminal_display.py +27 -68
- massgen/logger_config.py +681 -0
- massgen/mcp_tools/README.md +232 -0
- massgen/mcp_tools/__init__.py +105 -0
- massgen/mcp_tools/backend_utils.py +1035 -0
- massgen/mcp_tools/circuit_breaker.py +195 -0
- massgen/mcp_tools/client.py +894 -0
- massgen/mcp_tools/config_validator.py +138 -0
- massgen/mcp_tools/docs/circuit_breaker.md +646 -0
- massgen/mcp_tools/docs/client.md +950 -0
- massgen/mcp_tools/docs/config_validator.md +478 -0
- massgen/mcp_tools/docs/exceptions.md +1165 -0
- massgen/mcp_tools/docs/security.md +854 -0
- massgen/mcp_tools/exceptions.py +338 -0
- massgen/mcp_tools/hooks.py +212 -0
- massgen/mcp_tools/security.py +780 -0
- massgen/message_templates.py +342 -64
- massgen/orchestrator.py +1515 -241
- massgen/stream_chunk/__init__.py +35 -0
- massgen/stream_chunk/base.py +92 -0
- massgen/stream_chunk/multimodal.py +237 -0
- massgen/stream_chunk/text.py +162 -0
- massgen/tests/mcp_test_server.py +150 -0
- massgen/tests/multi_turn_conversation_design.md +0 -8
- massgen/tests/test_azure_openai_backend.py +156 -0
- massgen/tests/test_backend_capabilities.py +262 -0
- massgen/tests/test_backend_event_loop_all.py +179 -0
- massgen/tests/test_chat_completions_refactor.py +142 -0
- massgen/tests/test_claude_backend.py +15 -28
- massgen/tests/test_claude_code.py +268 -0
- massgen/tests/test_claude_code_context_sharing.py +233 -0
- massgen/tests/test_claude_code_orchestrator.py +175 -0
- massgen/tests/test_cli_backends.py +180 -0
- massgen/tests/test_code_execution.py +679 -0
- massgen/tests/test_external_agent_backend.py +134 -0
- massgen/tests/test_final_presentation_fallback.py +237 -0
- massgen/tests/test_gemini_planning_mode.py +351 -0
- massgen/tests/test_grok_backend.py +7 -10
- massgen/tests/test_http_mcp_server.py +42 -0
- massgen/tests/test_integration_simple.py +198 -0
- massgen/tests/test_mcp_blocking.py +125 -0
- massgen/tests/test_message_context_building.py +29 -47
- massgen/tests/test_orchestrator_final_presentation.py +48 -0
- massgen/tests/test_path_permission_manager.py +2087 -0
- massgen/tests/test_rich_terminal_display.py +14 -13
- massgen/tests/test_timeout.py +133 -0
- massgen/tests/test_v3_3agents.py +11 -12
- massgen/tests/test_v3_simple.py +8 -13
- massgen/tests/test_v3_three_agents.py +11 -18
- massgen/tests/test_v3_two_agents.py +8 -13
- massgen/token_manager/__init__.py +7 -0
- massgen/token_manager/token_manager.py +400 -0
- massgen/utils.py +52 -16
- massgen/v1/agent.py +45 -91
- massgen/v1/agents.py +18 -53
- massgen/v1/backends/gemini.py +50 -153
- massgen/v1/backends/grok.py +21 -54
- massgen/v1/backends/oai.py +39 -111
- massgen/v1/cli.py +36 -93
- massgen/v1/config.py +8 -12
- massgen/v1/logging.py +43 -127
- massgen/v1/main.py +18 -32
- massgen/v1/orchestrator.py +68 -209
- massgen/v1/streaming_display.py +62 -163
- massgen/v1/tools.py +8 -12
- massgen/v1/types.py +9 -23
- massgen/v1/utils.py +5 -23
- massgen-0.1.0.dist-info/METADATA +1245 -0
- massgen-0.1.0.dist-info/RECORD +273 -0
- massgen-0.1.0.dist-info/entry_points.txt +2 -0
- massgen/frontend/logging/__init__.py +0 -9
- massgen/frontend/logging/realtime_logger.py +0 -197
- massgen-0.0.3.dist-info/METADATA +0 -568
- massgen-0.0.3.dist-info/RECORD +0 -76
- massgen-0.0.3.dist-info/entry_points.txt +0 -2
- /massgen/backend/{Function calling openai responses.md → docs/Function calling openai responses.md} +0 -0
- {massgen-0.0.3.dist-info → massgen-0.1.0.dist-info}/WHEEL +0 -0
- {massgen-0.0.3.dist-info → massgen-0.1.0.dist-info}/licenses/LICENSE +0 -0
- {massgen-0.0.3.dist-info → massgen-0.1.0.dist-info}/top_level.txt +0 -0
massgen/message_templates.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
1
2
|
"""
|
|
2
3
|
Message templates for MassGen framework following input_cases_reference.md
|
|
3
4
|
Implements proven binary decision framework that eliminates perfectionism loops.
|
|
4
5
|
"""
|
|
5
6
|
|
|
6
|
-
from typing import Dict,
|
|
7
|
+
from typing import Any, Dict, List, Optional
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
class MessageTemplates:
|
|
@@ -25,27 +26,48 @@ class MessageTemplates:
|
|
|
25
26
|
import time
|
|
26
27
|
|
|
27
28
|
# return f"""You are evaluating answers from multiple agents for final response to a message.
|
|
28
|
-
|
|
29
|
+
# For every aspect, claim, and reasoning step in the CURRENT ANSWERS, verify correctness, factual accuracy, and completeness using your expertise, reasoning, and **available tools**.
|
|
30
|
+
# **You must use at least one tool in every evaluation round**—this is mandatory.
|
|
31
|
+
# - If the CURRENT ANSWERS fully address the ORIGINAL MESSAGE, use the `vote` tool to record your vote and skip the `new_answer` tool.
|
|
32
|
+
# - If the CURRENT ANSWERS are incomplete, incorrect, or do not fully address the ORIGINAL MESSAGE,
|
|
33
|
+
# conduct any necessary reasoning or research using tools (such as `search`), and then use the
|
|
34
|
+
# `new_answer` tool to submit a new response.
|
|
35
|
+
# Your new answer must be self-contained, process-complete, well-sourced, and compelling—ready to serve as the final reply.
|
|
36
|
+
# **Important**:
|
|
37
|
+
# - You must actually call at least one tool per round.
|
|
38
|
+
# - If no other tools are relevant or available, you must use either `new_answer` or `vote` to fulfill the tool-use requirement.
|
|
39
|
+
# *Note*: The CURRENT TIME is **{time.strftime("%Y-%m-%d %H:%M:%S")}**.
|
|
40
|
+
# For any time-sensitive requests, use the `search` tool (if available) rather than relying on prior knowledge.
|
|
41
|
+
# """
|
|
42
|
+
# return f"""You are evaluating answers from multiple agents for final response to a message.
|
|
29
43
|
# For every aspect, claim, reasoning steps in the CURRENT ANSWERS, verify correctness, factual accuracy, and completeness using your expertise, reasoning, and available tools.
|
|
30
|
-
|
|
31
44
|
# If the CURRENT ANSWERS fully address the ORIGINAL MESSAGE, use the `vote` tool to record your vote and skip the `new_answer` tool.
|
|
32
|
-
|
|
33
|
-
#
|
|
34
|
-
|
|
45
|
+
# If the CURRENT ANSWERS are incomplete, incorrect, or not fully address the ORIGINAL MESSAGE,
|
|
46
|
+
# conduct any necessary reasoning or research. Then, use the `new_answer` tool to submit a new response.
|
|
35
47
|
# Your new answer must be self-contained, process-complete, well-sourced, and compelling—ready to serve as the final reply.
|
|
36
|
-
|
|
37
48
|
# **Important**: Be sure to actually call the `new_answer` tool to submit your new answer (use native tool call format).
|
|
38
|
-
|
|
39
49
|
# *Note*: The CURRENT TIME is **{time.strftime("%Y-%m-%d %H:%M:%S")}**.
|
|
40
50
|
# For any time-sensitive requests, use the search tool (if available) rather than relying on prior knowledge."""
|
|
41
|
-
|
|
42
|
-
return f"""You are evaluating answers from multiple agents for final response to a message. Does the best CURRENT ANSWER address the ORIGINAL MESSAGE?
|
|
51
|
+
# BACKUP - Original evaluation message (pre-synthesis-encouragement update):
|
|
52
|
+
# return f"""You are evaluating answers from multiple agents for final response to a message. Does the best CURRENT ANSWER address the ORIGINAL MESSAGE?
|
|
53
|
+
#
|
|
54
|
+
# If YES, use the `vote` tool to record your vote and skip the `new_answer` tool.
|
|
55
|
+
# Otherwise, digest existing answers, combine their strengths, and do additional work to address their
|
|
56
|
+
# weaknesses, then use the `new_answer` tool to record a better answer to the ORIGINAL MESSAGE.
|
|
57
|
+
# Make sure you actually call `vote` or `new_answer` (in tool call format).
|
|
58
|
+
#
|
|
59
|
+
# *Note*: The CURRENT TIME is **{time.strftime("%Y-%m-%d %H:%M:%S")}**."""
|
|
60
|
+
|
|
61
|
+
return f"""You are evaluating answers from multiple agents for final response to a message.
|
|
62
|
+
Different agents may have different builtin tools and capabilities.
|
|
63
|
+
Does the best CURRENT ANSWER address the ORIGINAL MESSAGE well?
|
|
43
64
|
|
|
44
65
|
If YES, use the `vote` tool to record your vote and skip the `new_answer` tool.
|
|
45
|
-
Otherwise,
|
|
66
|
+
Otherwise, digest existing answers, combine their strengths, and do additional work to address their weaknesses,
|
|
67
|
+
then use the `new_answer` tool to record a better answer to the ORIGINAL MESSAGE.
|
|
68
|
+
Make sure you actually call `vote` or `new_answer` (in tool call format).
|
|
46
69
|
|
|
47
|
-
*Note*: The CURRENT TIME is **{time.strftime("%Y-%m-%d %H:%M:%S")}**.
|
|
48
|
-
"""
|
|
70
|
+
*Note*: The CURRENT TIME is **{time.strftime("%Y-%m-%d %H:%M:%S")}**."""
|
|
49
71
|
|
|
50
72
|
# =============================================================================
|
|
51
73
|
# USER MESSAGE TEMPLATES
|
|
@@ -61,9 +83,7 @@ Otherwise, do additional work first, then use the `new_answer` tool to record a
|
|
|
61
83
|
|
|
62
84
|
return f"<ORIGINAL MESSAGE> {task} <END OF ORIGINAL MESSAGE>"
|
|
63
85
|
|
|
64
|
-
def format_conversation_history(
|
|
65
|
-
self, conversation_history: List[Dict[str, str]]
|
|
66
|
-
) -> str:
|
|
86
|
+
def format_conversation_history(self, conversation_history: List[Dict[str, str]]) -> str:
|
|
67
87
|
"""Format conversation history for agent context."""
|
|
68
88
|
if "format_conversation_history" in self._template_overrides:
|
|
69
89
|
override = self._template_overrides["format_conversation_history"]
|
|
@@ -88,9 +108,7 @@ Otherwise, do additional work first, then use the `new_answer` tool to record a
|
|
|
88
108
|
lines.append("<END OF CONVERSATION_HISTORY>")
|
|
89
109
|
return "\n".join(lines)
|
|
90
110
|
|
|
91
|
-
def system_message_with_context(
|
|
92
|
-
self, conversation_history: Optional[List[Dict[str, str]]] = None
|
|
93
|
-
) -> str:
|
|
111
|
+
def system_message_with_context(self, conversation_history: Optional[List[Dict[str, str]]] = None) -> str:
|
|
94
112
|
"""Evaluation system message with conversation context awareness."""
|
|
95
113
|
if "system_message_with_context" in self._template_overrides:
|
|
96
114
|
override = self._template_overrides["system_message_with_context"]
|
|
@@ -102,7 +120,7 @@ Otherwise, do additional work first, then use the `new_answer` tool to record a
|
|
|
102
120
|
|
|
103
121
|
if conversation_history and len(conversation_history) > 0:
|
|
104
122
|
context_note = """
|
|
105
|
-
|
|
123
|
+
|
|
106
124
|
IMPORTANT: You are responding to the latest message in an ongoing conversation. Consider the full conversation context when evaluating answers and providing your response."""
|
|
107
125
|
return base_message + context_note
|
|
108
126
|
|
|
@@ -117,9 +135,7 @@ IMPORTANT: You are responding to the latest message in an ongoing conversation.
|
|
|
117
135
|
(no answers available yet)
|
|
118
136
|
<END OF CURRENT ANSWERS>"""
|
|
119
137
|
|
|
120
|
-
def format_current_answers_with_summaries(
|
|
121
|
-
self, agent_summaries: Dict[str, str]
|
|
122
|
-
) -> str:
|
|
138
|
+
def format_current_answers_with_summaries(self, agent_summaries: Dict[str, str]) -> str:
|
|
123
139
|
"""Format current answers section with agent summaries (Case 2) using anonymous agent IDs."""
|
|
124
140
|
if "format_current_answers_with_summaries" in self._template_overrides:
|
|
125
141
|
override = self._template_overrides["format_current_answers_with_summaries"]
|
|
@@ -160,7 +176,15 @@ IMPORTANT: You are responding to the latest message in an ongoing conversation.
|
|
|
160
176
|
# =============================================================================
|
|
161
177
|
|
|
162
178
|
def get_new_answer_tool(self) -> Dict[str, Any]:
|
|
163
|
-
"""Get new_answer tool definition.
|
|
179
|
+
"""Get new_answer tool definition.
|
|
180
|
+
|
|
181
|
+
TODO: Consider extending with optional context parameters for stateful backends:
|
|
182
|
+
- cwd: Working directory for Claude Code sessions
|
|
183
|
+
- session_id: Backend session identifier for continuity
|
|
184
|
+
- model: Model used to generate the answer
|
|
185
|
+
- tools_used: List of tools actually utilized
|
|
186
|
+
This would enable better context preservation in multi-iteration workflows.
|
|
187
|
+
"""
|
|
164
188
|
if "new_answer_tool" in self._template_overrides:
|
|
165
189
|
return self._template_overrides["new_answer_tool"]
|
|
166
190
|
|
|
@@ -174,17 +198,15 @@ IMPORTANT: You are responding to the latest message in an ongoing conversation.
|
|
|
174
198
|
"properties": {
|
|
175
199
|
"content": {
|
|
176
200
|
"type": "string",
|
|
177
|
-
"description": "Your improved answer. If any builtin tools like search or code execution were used,
|
|
178
|
-
}
|
|
201
|
+
"description": "Your improved answer. If any builtin tools like search or code execution were used, mention how they are used here.",
|
|
202
|
+
},
|
|
179
203
|
},
|
|
180
204
|
"required": ["content"],
|
|
181
205
|
},
|
|
182
206
|
},
|
|
183
207
|
}
|
|
184
208
|
|
|
185
|
-
def get_vote_tool(
|
|
186
|
-
self, valid_agent_ids: Optional[List[str]] = None
|
|
187
|
-
) -> Dict[str, Any]:
|
|
209
|
+
def get_vote_tool(self, valid_agent_ids: Optional[List[str]] = None) -> Dict[str, Any]:
|
|
188
210
|
"""Get vote tool definition with anonymous agent IDs."""
|
|
189
211
|
if "vote_tool" in self._template_overrides:
|
|
190
212
|
override = self._template_overrides["vote_tool"]
|
|
@@ -217,44 +239,91 @@ IMPORTANT: You are responding to the latest message in an ongoing conversation.
|
|
|
217
239
|
# Create anonymous mapping for enum constraint
|
|
218
240
|
if valid_agent_ids:
|
|
219
241
|
anon_agent_ids = [f"agent{i}" for i in range(1, len(valid_agent_ids) + 1)]
|
|
220
|
-
tool_def["function"]["parameters"]["properties"]["agent_id"][
|
|
221
|
-
"enum"
|
|
222
|
-
] = anon_agent_ids
|
|
242
|
+
tool_def["function"]["parameters"]["properties"]["agent_id"]["enum"] = anon_agent_ids
|
|
223
243
|
|
|
224
244
|
return tool_def
|
|
225
245
|
|
|
226
|
-
def get_standard_tools(
|
|
227
|
-
self, valid_agent_ids: Optional[List[str]] = None
|
|
228
|
-
) -> List[Dict[str, Any]]:
|
|
246
|
+
def get_standard_tools(self, valid_agent_ids: Optional[List[str]] = None) -> List[Dict[str, Any]]:
|
|
229
247
|
"""Get standard tools for MassGen framework."""
|
|
230
248
|
return [self.get_new_answer_tool(), self.get_vote_tool(valid_agent_ids)]
|
|
231
249
|
|
|
232
250
|
def final_presentation_system_message(
|
|
233
|
-
self,
|
|
251
|
+
self,
|
|
252
|
+
original_system_message: Optional[str] = None,
|
|
253
|
+
enable_image_generation: bool = False,
|
|
254
|
+
enable_audio_generation: bool = False,
|
|
255
|
+
has_irreversible_actions: bool = False,
|
|
256
|
+
enable_command_execution: bool = False,
|
|
234
257
|
) -> str:
|
|
235
258
|
"""System message for final answer presentation by winning agent.
|
|
236
259
|
|
|
237
260
|
Args:
|
|
238
261
|
original_system_message: The agent's original system message to preserve
|
|
262
|
+
enable_image_generation: Whether image generation is enabled
|
|
263
|
+
enable_audio_generation: Whether audio generation is enabled
|
|
264
|
+
has_irreversible_actions: Whether agent has write access to context paths (requires actual file delivery)
|
|
265
|
+
enable_command_execution: Whether command execution is enabled for this agent
|
|
239
266
|
"""
|
|
240
267
|
if "final_presentation_system_message" in self._template_overrides:
|
|
241
268
|
return str(self._template_overrides["final_presentation_system_message"])
|
|
242
269
|
|
|
243
|
-
|
|
270
|
+
# BACKUP - Original final presentation message (pre-explicit-synthesis update):
|
|
271
|
+
# presentation_instructions = """You have been selected as the winning presenter in a coordination process.
|
|
272
|
+
# Your task is to present a polished, comprehensive final answer that incorporates the best insights from all participants.
|
|
273
|
+
#
|
|
274
|
+
# Consider:
|
|
275
|
+
# 1. Your original response and how it can be refined
|
|
276
|
+
# 2. Valuable insights from other agents' answers that should be incorporated
|
|
277
|
+
# 3. Feedback received through the voting process
|
|
278
|
+
# 4. Ensuring clarity, completeness, and comprehensiveness for the final audience
|
|
279
|
+
#
|
|
280
|
+
# Present your final coordinated answer in the most helpful and complete way possible."""
|
|
281
|
+
|
|
282
|
+
presentation_instructions = """You have been selected as the winning presenter in a coordination process.
|
|
283
|
+
Present the best possible coordinated answer by combining the strengths from all participants.\n\n"""
|
|
284
|
+
|
|
285
|
+
# Add image generation instructions only if enabled
|
|
286
|
+
if enable_image_generation:
|
|
287
|
+
presentation_instructions += """For image generation tasks:
|
|
288
|
+
- Extract image paths from the existing answer and resolve them in the shared reference.
|
|
289
|
+
- Gather all agent-produced images (ignore non-existent files).
|
|
290
|
+
- MUST call the generate-image tool with these input images to synthesize one final image combining their strengths.
|
|
291
|
+
- MUST save the final outputand output the saved path.
|
|
292
|
+
"""
|
|
293
|
+
# Add audio generation instructions only if enabled
|
|
294
|
+
if enable_audio_generation:
|
|
295
|
+
presentation_instructions += """For audio generation tasks:
|
|
296
|
+
- Extract audio paths from the existing answer and resolve them in the shared reference.
|
|
297
|
+
- Gather ALL audio files produced by EVERY agent (ignore non-existent files).
|
|
298
|
+
IMPORTANT: You MUST call the generate_text_with_input_audio tool to obtain transcriptions
|
|
299
|
+
for EACH AND EVERY audio file from ALL agents - no audio should be skipped or overlooked.
|
|
300
|
+
- MUST combine the strengths of all transcriptions into one final detailed transcription that captures the best elements from each.
|
|
301
|
+
- MUST use the convert_text_to_audio tool to convert this final transcription to a new audio file and save it, then output the saved path.
|
|
302
|
+
"""
|
|
244
303
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
304
|
+
# Add irreversible actions reminder if needed
|
|
305
|
+
# TODO: Integrate more general irreversible actions handling in future (i.e., not just for context file delivery)
|
|
306
|
+
if has_irreversible_actions:
|
|
307
|
+
presentation_instructions += (
|
|
308
|
+
"### Write Access to Target Path:\n\n"
|
|
309
|
+
"Reminder: File Delivery Required. You should first place your final answer in your workspace. "
|
|
310
|
+
"However, note your workspace is NOT the final destination. You MUST copy/write files to the Target Path using FULL ABSOLUTE PATHS. "
|
|
311
|
+
"Then, clean up this Target Path by deleting any outdated or unused files. "
|
|
312
|
+
"Then, you must ALWAYS verify that the Target Path contains the correct final files, as no other agents were allowed to write to this path.\n"
|
|
313
|
+
)
|
|
250
314
|
|
|
251
|
-
|
|
315
|
+
# Add requirements.txt guidance if command execution is enabled
|
|
316
|
+
if enable_command_execution:
|
|
317
|
+
presentation_instructions += (
|
|
318
|
+
"### Package Dependencies:\n\n"
|
|
319
|
+
"Create a `requirements.txt` file listing all Python packages needed to run your code. "
|
|
320
|
+
"This helps users reproduce your work later. Include only the packages you actually used in your solution.\n"
|
|
321
|
+
)
|
|
252
322
|
|
|
253
323
|
# Combine with original system message if provided
|
|
254
324
|
if original_system_message:
|
|
255
325
|
return f"""{original_system_message}
|
|
256
326
|
|
|
257
|
-
COORDINATION CONTEXT:
|
|
258
327
|
{presentation_instructions}"""
|
|
259
328
|
else:
|
|
260
329
|
return presentation_instructions
|
|
@@ -269,17 +338,13 @@ COORDINATION CONTEXT:
|
|
|
269
338
|
|
|
270
339
|
{self.format_current_answers_empty()}"""
|
|
271
340
|
|
|
272
|
-
def build_case2_user_message(
|
|
273
|
-
self, task: str, agent_summaries: Dict[str, str]
|
|
274
|
-
) -> str:
|
|
341
|
+
def build_case2_user_message(self, task: str, agent_summaries: Dict[str, str]) -> str:
|
|
275
342
|
"""Build Case 2 user message (summaries exist)."""
|
|
276
343
|
return f"""{self.format_original_message(task)}
|
|
277
344
|
|
|
278
345
|
{self.format_current_answers_with_summaries(agent_summaries)}"""
|
|
279
346
|
|
|
280
|
-
def build_evaluation_message(
|
|
281
|
-
self, task: str, agent_answers: Optional[Dict[str, str]] = None
|
|
282
|
-
) -> str:
|
|
347
|
+
def build_evaluation_message(self, task: str, agent_answers: Optional[Dict[str, str]] = None) -> str:
|
|
283
348
|
"""Build evaluation user message for any case."""
|
|
284
349
|
if agent_answers:
|
|
285
350
|
return self.build_case2_user_message(task, agent_answers)
|
|
@@ -314,9 +379,7 @@ COORDINATION CONTEXT:
|
|
|
314
379
|
|
|
315
380
|
# Add agent answers
|
|
316
381
|
if agent_answers:
|
|
317
|
-
context_parts.append(
|
|
318
|
-
self.format_current_answers_with_summaries(agent_answers)
|
|
319
|
-
)
|
|
382
|
+
context_parts.append(self.format_current_answers_with_summaries(agent_answers))
|
|
320
383
|
else:
|
|
321
384
|
context_parts.append(self.format_current_answers_empty())
|
|
322
385
|
|
|
@@ -331,10 +394,17 @@ COORDINATION CONTEXT:
|
|
|
331
394
|
task: str,
|
|
332
395
|
agent_summaries: Optional[Dict[str, str]] = None,
|
|
333
396
|
valid_agent_ids: Optional[List[str]] = None,
|
|
397
|
+
base_system_message: Optional[str] = None,
|
|
334
398
|
) -> Dict[str, Any]:
|
|
335
399
|
"""Build complete initial conversation for MassGen evaluation."""
|
|
400
|
+
# Use agent's custom system message if provided, otherwise use default evaluation message
|
|
401
|
+
if base_system_message:
|
|
402
|
+
system_message = f"{self.evaluation_system_message()}\n\n#Special Requirement\n{base_system_message}"
|
|
403
|
+
else:
|
|
404
|
+
system_message = self.evaluation_system_message()
|
|
405
|
+
|
|
336
406
|
return {
|
|
337
|
-
"system_message":
|
|
407
|
+
"system_message": system_message,
|
|
338
408
|
"user_message": self.build_evaluation_message(task, agent_summaries),
|
|
339
409
|
"tools": self.get_standard_tools(valid_agent_ids),
|
|
340
410
|
}
|
|
@@ -345,13 +415,18 @@ COORDINATION CONTEXT:
|
|
|
345
415
|
conversation_history: Optional[List[Dict[str, str]]] = None,
|
|
346
416
|
agent_summaries: Optional[Dict[str, str]] = None,
|
|
347
417
|
valid_agent_ids: Optional[List[str]] = None,
|
|
418
|
+
base_system_message: Optional[str] = None,
|
|
348
419
|
) -> Dict[str, Any]:
|
|
349
420
|
"""Build complete conversation with conversation history context for MassGen evaluation."""
|
|
421
|
+
# Use agent's custom system message if provided, otherwise use default context-aware message
|
|
422
|
+
if base_system_message:
|
|
423
|
+
system_message = f"{base_system_message}\n\n{self.system_message_with_context(conversation_history)}"
|
|
424
|
+
else:
|
|
425
|
+
system_message = self.system_message_with_context(conversation_history)
|
|
426
|
+
|
|
350
427
|
return {
|
|
351
|
-
"system_message":
|
|
352
|
-
"user_message": self.build_coordination_context(
|
|
353
|
-
current_task, conversation_history, agent_summaries
|
|
354
|
-
),
|
|
428
|
+
"system_message": system_message,
|
|
429
|
+
"user_message": self.build_coordination_context(current_task, conversation_history, agent_summaries),
|
|
355
430
|
"tools": self.get_standard_tools(valid_agent_ids),
|
|
356
431
|
}
|
|
357
432
|
|
|
@@ -378,14 +453,219 @@ VOTING RESULTS:
|
|
|
378
453
|
|
|
379
454
|
Based on the coordination process above, present your final answer:"""
|
|
380
455
|
|
|
381
|
-
def add_enforcement_message(
|
|
382
|
-
self, conversation_messages: List[Dict[str, str]]
|
|
383
|
-
) -> List[Dict[str, str]]:
|
|
456
|
+
def add_enforcement_message(self, conversation_messages: List[Dict[str, str]]) -> List[Dict[str, str]]:
|
|
384
457
|
"""Add enforcement message to existing conversation (Case 3)."""
|
|
385
458
|
messages = conversation_messages.copy()
|
|
386
459
|
messages.append({"role": "user", "content": self.enforcement_message()})
|
|
387
460
|
return messages
|
|
388
461
|
|
|
462
|
+
def command_execution_system_message(self) -> str:
|
|
463
|
+
"""Generate concise command execution instructions when command line execution is enabled."""
|
|
464
|
+
parts = ["## Command Execution"]
|
|
465
|
+
parts.append("You can run command line commands using the `execute_command` tool.\n")
|
|
466
|
+
parts.append("If a `.venv` directory exists in your workspace, it will be automatically used.")
|
|
467
|
+
|
|
468
|
+
return "\n".join(parts)
|
|
469
|
+
|
|
470
|
+
def filesystem_system_message(
|
|
471
|
+
self,
|
|
472
|
+
main_workspace: Optional[str] = None,
|
|
473
|
+
temp_workspace: Optional[str] = None,
|
|
474
|
+
context_paths: Optional[List[Dict[str, str]]] = None,
|
|
475
|
+
previous_turns: Optional[List[Dict[str, Any]]] = None,
|
|
476
|
+
workspace_prepopulated: bool = False,
|
|
477
|
+
enable_image_generation: bool = False,
|
|
478
|
+
agent_answers: Optional[Dict[str, str]] = None,
|
|
479
|
+
enable_command_execution: bool = False,
|
|
480
|
+
) -> str:
|
|
481
|
+
"""Generate filesystem access instructions for agents with filesystem support.
|
|
482
|
+
|
|
483
|
+
Args:
|
|
484
|
+
main_workspace: Path to agent's main workspace
|
|
485
|
+
temp_workspace: Path to shared reference workspace
|
|
486
|
+
context_paths: List of context paths with permissions
|
|
487
|
+
previous_turns: List of previous turn metadata
|
|
488
|
+
workspace_prepopulated: Whether workspace is pre-populated
|
|
489
|
+
enable_image_generation: Whether image generation is enabled
|
|
490
|
+
agent_answers: Dict of agent answers (keys are agent IDs) to show workspace structure
|
|
491
|
+
enable_command_execution: Whether command line execution is enabled
|
|
492
|
+
"""
|
|
493
|
+
if "filesystem_system_message" in self._template_overrides:
|
|
494
|
+
return str(self._template_overrides["filesystem_system_message"])
|
|
495
|
+
|
|
496
|
+
parts = ["## Filesystem Access"]
|
|
497
|
+
|
|
498
|
+
# Explain workspace behavior
|
|
499
|
+
parts.append(
|
|
500
|
+
"Your working directory is set to your workspace, so all relative paths in your file operations "
|
|
501
|
+
"will be resolved from there. This ensures each agent works in isolation while having access to shared references. "
|
|
502
|
+
"Only include in your workspace files that should be used in your answer.\n",
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
if main_workspace:
|
|
506
|
+
workspace_note = f"**Your Workspace**: `{main_workspace}` - Write actual files here using file tools. All your file operations will be relative to this directory."
|
|
507
|
+
if workspace_prepopulated:
|
|
508
|
+
# Workspace is pre-populated with writable copy of most recent turn
|
|
509
|
+
workspace_note += (
|
|
510
|
+
" **Note**: Your workspace already contains a writable copy of the previous turn's results - "
|
|
511
|
+
"you can modify or build upon these files. The original unmodified version is also available as "
|
|
512
|
+
"a read-only context path if you need to reference what was originally there."
|
|
513
|
+
)
|
|
514
|
+
parts.append(workspace_note)
|
|
515
|
+
|
|
516
|
+
if temp_workspace:
|
|
517
|
+
# Build workspace tree structure
|
|
518
|
+
workspace_tree = f"**Shared Reference**: `{temp_workspace}` - Contains previous answers from all agents (read/execute-only)\n"
|
|
519
|
+
|
|
520
|
+
# Add agent subdirectories in tree format
|
|
521
|
+
# This was added bc weaker models would often try many incorrect paths.
|
|
522
|
+
# No point in requiring extra list dir calls if we can just show them the structure.
|
|
523
|
+
if agent_answers:
|
|
524
|
+
# Create anonymous mapping: agent1, agent2, etc.
|
|
525
|
+
agent_mapping = {}
|
|
526
|
+
for i, agent_id in enumerate(sorted(agent_answers.keys()), 1):
|
|
527
|
+
agent_mapping[agent_id] = f"agent{i}"
|
|
528
|
+
|
|
529
|
+
workspace_tree += " Available agent workspaces:\n"
|
|
530
|
+
agent_items = list(agent_mapping.items())
|
|
531
|
+
for idx, (agent_id, anon_id) in enumerate(agent_items):
|
|
532
|
+
is_last = idx == len(agent_items) - 1
|
|
533
|
+
prefix = " └── " if is_last else " ├── "
|
|
534
|
+
workspace_tree += f"{prefix}{temp_workspace}/{anon_id}/\n"
|
|
535
|
+
|
|
536
|
+
workspace_tree += (
|
|
537
|
+
" - To improve upon existing answers: Copy files from Shared Reference to your workspace using `copy_file` or `copy_directory` tools, then modify them\n"
|
|
538
|
+
" - These correspond directly to the answers shown in the CURRENT ANSWERS section\n"
|
|
539
|
+
" - However, not all workspaces may have a matching answer (e.g., if an agent was in the middle of working but restarted before submitting an answer). "
|
|
540
|
+
"So, it is wise to check the actual files in the Shared Reference, not rely solely on the CURRENT ANSWERS section.\n"
|
|
541
|
+
)
|
|
542
|
+
parts.append(workspace_tree)
|
|
543
|
+
|
|
544
|
+
if context_paths:
|
|
545
|
+
has_target = any(p.get("will_be_writable", False) for p in context_paths)
|
|
546
|
+
has_readonly_context = any(not p.get("will_be_writable", False) and p.get("permission") == "read" for p in context_paths)
|
|
547
|
+
|
|
548
|
+
if has_target:
|
|
549
|
+
parts.append(
|
|
550
|
+
"\n**Important Context**: If the user asks about improving, fixing, debugging, or understanding an existing "
|
|
551
|
+
"code/project (e.g., 'Why is this code not working?', 'Fix this bug', 'Add feature X'), they are referring "
|
|
552
|
+
"to the Target Path below. First READ the existing files from that path to understand what's there, then "
|
|
553
|
+
"make your changes based on that codebase. Final deliverables must end up there.\n",
|
|
554
|
+
)
|
|
555
|
+
elif has_readonly_context:
|
|
556
|
+
parts.append(
|
|
557
|
+
"\n**Important Context**: If the user asks about debugging or understanding an existing code/project "
|
|
558
|
+
"(e.g., 'Why is this code not working?', 'Explain this bug'), they are referring to (one of) the Context Path(s) "
|
|
559
|
+
"below. Read then provide analysis/explanation based on that codebase - you cannot modify it directly.\n",
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
for path_config in context_paths:
|
|
563
|
+
path = path_config.get("path", "")
|
|
564
|
+
permission = path_config.get("permission", "read")
|
|
565
|
+
will_be_writable = path_config.get("will_be_writable", False)
|
|
566
|
+
if path:
|
|
567
|
+
if permission == "read" and will_be_writable:
|
|
568
|
+
parts.append(
|
|
569
|
+
f"**Target Path**: `{path}` (read-only now, write access later) - This is where your changes will be delivered. "
|
|
570
|
+
f"Work in your workspace first, then the final presenter will place or update files DIRECTLY into `{path}` using the FULL ABSOLUTE PATH.",
|
|
571
|
+
)
|
|
572
|
+
elif permission == "write":
|
|
573
|
+
parts.append(
|
|
574
|
+
f"**Target Path**: `{path}` (write access) - This is where your changes must be delivered. "
|
|
575
|
+
f"First, ensure you place your answer in your workspace, then copy/write files DIRECTLY into `{path}` using FULL ABSOLUTE PATH (not relative paths). "
|
|
576
|
+
f"Files must go directly into the target path itself (e.g., `{path}/file.txt`), NOT into a `.massgen/` subdirectory within it.",
|
|
577
|
+
)
|
|
578
|
+
else:
|
|
579
|
+
parts.append(f"**Context Path**: `{path}` (read-only) - Use FULL ABSOLUTE PATH when reading.")
|
|
580
|
+
|
|
581
|
+
# Add note connecting conversation history (in user message) to context paths (in system message)
|
|
582
|
+
if previous_turns:
|
|
583
|
+
parts.append(
|
|
584
|
+
"\n**Note**: This is a multi-turn conversation. Each User/Assistant exchange in the conversation "
|
|
585
|
+
"history represents one turn. The workspace from each turn is available as a read-only context path "
|
|
586
|
+
"listed above (e.g., turn 1's workspace is at the path ending in `/turn_1/workspace`).",
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
# Add intelligent task handling guidance with clear priority hierarchy
|
|
590
|
+
parts.append(
|
|
591
|
+
"\n**Task Handling Priority**: When responding to user requests, follow this priority order:\n"
|
|
592
|
+
"1. **Use MCP Tools First**: If you have specialized MCP tools available, call them DIRECTLY to complete the task\n"
|
|
593
|
+
" - Save any outputs/artifacts from MCP tools to your workspace\n"
|
|
594
|
+
"2. **Write Code If Needed**: If MCP tools cannot complete the task, write and execute code\n"
|
|
595
|
+
"3. **Create Other Files**: Create configs, documents, or other deliverables as needed\n"
|
|
596
|
+
"4. **Text Response Otherwise**: If no tools or files are needed, provide a direct text answer\n\n"
|
|
597
|
+
"**Important**: Do NOT ask the user for clarification or additional input. Make reasonable assumptions and proceed with sensible defaults. "
|
|
598
|
+
"You will not receive user feedback, so complete the task autonomously based on the original request.\n",
|
|
599
|
+
)
|
|
600
|
+
|
|
601
|
+
# Add requirement for path explanations in answers
|
|
602
|
+
# if enable_image_generation:
|
|
603
|
+
# # # Enabled for image generation tasks
|
|
604
|
+
# parts.append(
|
|
605
|
+
# "\n**Image Generation Tasks**: When working on image generation tasks, if you find images equivalent and cannot choose between them, "
|
|
606
|
+
# "choose the one with the smallest file size.\n"
|
|
607
|
+
# "\n**New Answer**: When calling `new_answer` tool:"
|
|
608
|
+
# "- For non-image generation tasks, if you created files, list your cwd and file paths (but do NOT paste full file contents)\n"
|
|
609
|
+
# "- For image generation tasks, do not use file write tools. Instead, the images are already generated directly "
|
|
610
|
+
# "with the image_generation tool. Then, providing new answer with 1) briefly describing the contents of the images "
|
|
611
|
+
# "and 2) listing your full cwd and the image paths you created.\n",
|
|
612
|
+
# )
|
|
613
|
+
# else:
|
|
614
|
+
# Not enabled for image generation tasks
|
|
615
|
+
new_answer_guidance = "\n**New Answer**: When calling `new_answer`:\n"
|
|
616
|
+
if enable_command_execution:
|
|
617
|
+
new_answer_guidance += "- If you executed commands (e.g., running tests), explain the results in your answer (what passed, what failed, what the output shows)\n"
|
|
618
|
+
new_answer_guidance += "- If you created files, list your cwd and file paths (but do NOT paste full file contents)\n"
|
|
619
|
+
new_answer_guidance += "- If providing a text response, include your analysis/explanation in the `content` field\n"
|
|
620
|
+
parts.append(new_answer_guidance)
|
|
621
|
+
|
|
622
|
+
# Add workspace cleanup guidance
|
|
623
|
+
parts.append(
|
|
624
|
+
"**Workspace Cleanup**: Before submitting your answer with `new_answer`, " "ensure that your workspace contains only the files relevant to your final answer.\n",
|
|
625
|
+
# use `delete_file` or "
|
|
626
|
+
# "`delete_files_batch` to remove any outdated, temporary, or unused files from your workspace. "
|
|
627
|
+
# "Note: You cannot delete read-only files (e.g., files from other agents' workspaces or read-only context paths). "
|
|
628
|
+
# "This ensures only the relevant final files remain for evaluation. For example, if you created "
|
|
629
|
+
# "`old_index.html` then later created `new_website/index.html`, delete the old version.\n",
|
|
630
|
+
)
|
|
631
|
+
|
|
632
|
+
# Add diff tools guidance
|
|
633
|
+
parts.append(
|
|
634
|
+
"**Comparison Tools**: Use `compare_directories` to see differences between two directories (e.g., comparing "
|
|
635
|
+
"your workspace to another agent's workspace or a previous version), or `compare_files` to see line-by-line diffs "
|
|
636
|
+
"between two files. These read-only tools help you understand what changed, build upon existing work effectively, "
|
|
637
|
+
"or verify solutions before voting.\n",
|
|
638
|
+
)
|
|
639
|
+
|
|
640
|
+
# Add voting guidance
|
|
641
|
+
# if enable_image_generation:
|
|
642
|
+
# # Enabled for image generation tasks
|
|
643
|
+
# parts.append(
|
|
644
|
+
# "**Evaluation**: When evaluating agents' answers, do NOT base your decision solely on the answer text. "
|
|
645
|
+
# "Instead, read and verify the actual files in their workspaces (via Shared Reference) to ensure the work matches their claims."
|
|
646
|
+
# "IMPORTANT: For image tasks, you MUST use ONLY the `mcp__workspace__extract_multimodal_files` tool to view and evaluate images. Do NOT use any other tool for this purpose.\n",
|
|
647
|
+
# )
|
|
648
|
+
# else:
|
|
649
|
+
# Not enabled for image generation tasks
|
|
650
|
+
parts.append(
|
|
651
|
+
"**Evaluation**: When evaluating agents' answers, do NOT base your decision solely on the answer text. "
|
|
652
|
+
"Instead, read and verify the actual files in their workspaces (via Shared Reference) to ensure the work matches their claims.\n",
|
|
653
|
+
)
|
|
654
|
+
|
|
655
|
+
# Add command execution instructions if enabled
|
|
656
|
+
if enable_command_execution:
|
|
657
|
+
command_exec_message = self.command_execution_system_message()
|
|
658
|
+
parts.append(f"\n{command_exec_message}")
|
|
659
|
+
|
|
660
|
+
return "\n".join(parts)
|
|
661
|
+
|
|
662
|
+
|
|
663
|
+
# ### IMPORTANT Evaluation Note:
|
|
664
|
+
# When evaluating other agents' work, focus on the CONTENT and FUNCTIONALITY of their files.
|
|
665
|
+
# Each agent works in their own isolated workspace - this is correct behavior.
|
|
666
|
+
# The paths shown in their answers are normalized so you can access and verify their work.
|
|
667
|
+
# Judge based on code quality, correctness, and completeness, not on which workspace directory was used.
|
|
668
|
+
|
|
389
669
|
|
|
390
670
|
# Global template instance
|
|
391
671
|
_templates = MessageTemplates()
|
|
@@ -414,9 +694,7 @@ def build_case2_conversation(
|
|
|
414
694
|
valid_agent_ids: Optional[List[str]] = None,
|
|
415
695
|
) -> Dict[str, Any]:
|
|
416
696
|
"""Build Case 2 conversation (summaries exist)."""
|
|
417
|
-
return get_templates().build_initial_conversation(
|
|
418
|
-
task, agent_summaries, valid_agent_ids
|
|
419
|
-
)
|
|
697
|
+
return get_templates().build_initial_conversation(task, agent_summaries, valid_agent_ids)
|
|
420
698
|
|
|
421
699
|
|
|
422
700
|
def get_standard_tools(
|