massgen 0.0.3__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of massgen might be problematic. Click here for more details.

Files changed (268) hide show
  1. massgen/__init__.py +142 -8
  2. massgen/adapters/__init__.py +29 -0
  3. massgen/adapters/ag2_adapter.py +483 -0
  4. massgen/adapters/base.py +183 -0
  5. massgen/adapters/tests/__init__.py +0 -0
  6. massgen/adapters/tests/test_ag2_adapter.py +439 -0
  7. massgen/adapters/tests/test_agent_adapter.py +128 -0
  8. massgen/adapters/utils/__init__.py +2 -0
  9. massgen/adapters/utils/ag2_utils.py +236 -0
  10. massgen/adapters/utils/tests/__init__.py +0 -0
  11. massgen/adapters/utils/tests/test_ag2_utils.py +138 -0
  12. massgen/agent_config.py +329 -55
  13. massgen/api_params_handler/__init__.py +10 -0
  14. massgen/api_params_handler/_api_params_handler_base.py +99 -0
  15. massgen/api_params_handler/_chat_completions_api_params_handler.py +176 -0
  16. massgen/api_params_handler/_claude_api_params_handler.py +113 -0
  17. massgen/api_params_handler/_response_api_params_handler.py +130 -0
  18. massgen/backend/__init__.py +39 -4
  19. massgen/backend/azure_openai.py +385 -0
  20. massgen/backend/base.py +341 -69
  21. massgen/backend/base_with_mcp.py +1102 -0
  22. massgen/backend/capabilities.py +386 -0
  23. massgen/backend/chat_completions.py +577 -130
  24. massgen/backend/claude.py +1033 -537
  25. massgen/backend/claude_code.py +1203 -0
  26. massgen/backend/cli_base.py +209 -0
  27. massgen/backend/docs/BACKEND_ARCHITECTURE.md +126 -0
  28. massgen/backend/{CLAUDE_API_RESEARCH.md → docs/CLAUDE_API_RESEARCH.md} +18 -18
  29. massgen/backend/{GEMINI_API_DOCUMENTATION.md → docs/GEMINI_API_DOCUMENTATION.md} +9 -9
  30. massgen/backend/docs/Gemini MCP Integration Analysis.md +1050 -0
  31. massgen/backend/docs/MCP_IMPLEMENTATION_CLAUDE_BACKEND.md +177 -0
  32. massgen/backend/docs/MCP_INTEGRATION_RESPONSE_BACKEND.md +352 -0
  33. massgen/backend/docs/OPENAI_GPT5_MODELS.md +211 -0
  34. massgen/backend/{OPENAI_RESPONSES_API_FORMAT.md → docs/OPENAI_RESPONSE_API_TOOL_CALLS.md} +3 -3
  35. massgen/backend/docs/OPENAI_response_streaming.md +20654 -0
  36. massgen/backend/docs/inference_backend.md +257 -0
  37. massgen/backend/docs/permissions_and_context_files.md +1085 -0
  38. massgen/backend/external.py +126 -0
  39. massgen/backend/gemini.py +1850 -241
  40. massgen/backend/grok.py +40 -156
  41. massgen/backend/inference.py +156 -0
  42. massgen/backend/lmstudio.py +171 -0
  43. massgen/backend/response.py +1095 -322
  44. massgen/chat_agent.py +131 -113
  45. massgen/cli.py +1560 -275
  46. massgen/config_builder.py +2396 -0
  47. massgen/configs/BACKEND_CONFIGURATION.md +458 -0
  48. massgen/configs/README.md +559 -216
  49. massgen/configs/ag2/ag2_case_study.yaml +27 -0
  50. massgen/configs/ag2/ag2_coder.yaml +34 -0
  51. massgen/configs/ag2/ag2_coder_case_study.yaml +36 -0
  52. massgen/configs/ag2/ag2_gemini.yaml +27 -0
  53. massgen/configs/ag2/ag2_groupchat.yaml +108 -0
  54. massgen/configs/ag2/ag2_groupchat_gpt.yaml +118 -0
  55. massgen/configs/ag2/ag2_single_agent.yaml +21 -0
  56. massgen/configs/basic/multi/fast_timeout_example.yaml +37 -0
  57. massgen/configs/basic/multi/gemini_4o_claude.yaml +31 -0
  58. massgen/configs/basic/multi/gemini_gpt5nano_claude.yaml +36 -0
  59. massgen/configs/{gemini_4o_claude.yaml → basic/multi/geminicode_4o_claude.yaml} +3 -3
  60. massgen/configs/basic/multi/geminicode_gpt5nano_claude.yaml +36 -0
  61. massgen/configs/basic/multi/glm_gemini_claude.yaml +25 -0
  62. massgen/configs/basic/multi/gpt4o_audio_generation.yaml +30 -0
  63. massgen/configs/basic/multi/gpt4o_image_generation.yaml +31 -0
  64. massgen/configs/basic/multi/gpt5nano_glm_qwen.yaml +26 -0
  65. massgen/configs/basic/multi/gpt5nano_image_understanding.yaml +26 -0
  66. massgen/configs/{three_agents_default.yaml → basic/multi/three_agents_default.yaml} +8 -4
  67. massgen/configs/basic/multi/three_agents_opensource.yaml +27 -0
  68. massgen/configs/basic/multi/three_agents_vllm.yaml +20 -0
  69. massgen/configs/basic/multi/two_agents_gemini.yaml +19 -0
  70. massgen/configs/{two_agents.yaml → basic/multi/two_agents_gpt5.yaml} +14 -6
  71. massgen/configs/basic/multi/two_agents_opensource_lmstudio.yaml +31 -0
  72. massgen/configs/basic/multi/two_qwen_vllm_sglang.yaml +28 -0
  73. massgen/configs/{single_agent.yaml → basic/single/single_agent.yaml} +1 -1
  74. massgen/configs/{single_flash2.5.yaml → basic/single/single_flash2.5.yaml} +1 -2
  75. massgen/configs/basic/single/single_gemini2.5pro.yaml +16 -0
  76. massgen/configs/basic/single/single_gpt4o_audio_generation.yaml +22 -0
  77. massgen/configs/basic/single/single_gpt4o_image_generation.yaml +22 -0
  78. massgen/configs/basic/single/single_gpt4o_video_generation.yaml +24 -0
  79. massgen/configs/basic/single/single_gpt5nano.yaml +20 -0
  80. massgen/configs/basic/single/single_gpt5nano_file_search.yaml +18 -0
  81. massgen/configs/basic/single/single_gpt5nano_image_understanding.yaml +17 -0
  82. massgen/configs/basic/single/single_gptoss120b.yaml +15 -0
  83. massgen/configs/basic/single/single_openrouter_audio_understanding.yaml +15 -0
  84. massgen/configs/basic/single/single_qwen_video_understanding.yaml +15 -0
  85. massgen/configs/debug/code_execution/command_filtering_blacklist.yaml +29 -0
  86. massgen/configs/debug/code_execution/command_filtering_whitelist.yaml +28 -0
  87. massgen/configs/debug/code_execution/docker_verification.yaml +29 -0
  88. massgen/configs/debug/skip_coordination_test.yaml +27 -0
  89. massgen/configs/debug/test_sdk_migration.yaml +17 -0
  90. massgen/configs/docs/DISCORD_MCP_SETUP.md +208 -0
  91. massgen/configs/docs/TWITTER_MCP_ENESCINAR_SETUP.md +82 -0
  92. massgen/configs/providers/azure/azure_openai_multi.yaml +21 -0
  93. massgen/configs/providers/azure/azure_openai_single.yaml +19 -0
  94. massgen/configs/providers/claude/claude.yaml +14 -0
  95. massgen/configs/providers/gemini/gemini_gpt5nano.yaml +28 -0
  96. massgen/configs/providers/local/lmstudio.yaml +11 -0
  97. massgen/configs/providers/openai/gpt5.yaml +46 -0
  98. massgen/configs/providers/openai/gpt5_nano.yaml +46 -0
  99. massgen/configs/providers/others/grok_single_agent.yaml +19 -0
  100. massgen/configs/providers/others/zai_coding_team.yaml +108 -0
  101. massgen/configs/providers/others/zai_glm45.yaml +12 -0
  102. massgen/configs/{creative_team.yaml → teams/creative/creative_team.yaml} +16 -6
  103. massgen/configs/{travel_planning.yaml → teams/creative/travel_planning.yaml} +16 -6
  104. massgen/configs/{news_analysis.yaml → teams/research/news_analysis.yaml} +16 -6
  105. massgen/configs/{research_team.yaml → teams/research/research_team.yaml} +15 -7
  106. massgen/configs/{technical_analysis.yaml → teams/research/technical_analysis.yaml} +16 -6
  107. massgen/configs/tools/code-execution/basic_command_execution.yaml +25 -0
  108. massgen/configs/tools/code-execution/code_execution_use_case_simple.yaml +41 -0
  109. massgen/configs/tools/code-execution/docker_claude_code.yaml +32 -0
  110. massgen/configs/tools/code-execution/docker_multi_agent.yaml +32 -0
  111. massgen/configs/tools/code-execution/docker_simple.yaml +29 -0
  112. massgen/configs/tools/code-execution/docker_with_resource_limits.yaml +32 -0
  113. massgen/configs/tools/code-execution/multi_agent_playwright_automation.yaml +57 -0
  114. massgen/configs/tools/filesystem/cc_gpt5_gemini_filesystem.yaml +34 -0
  115. massgen/configs/tools/filesystem/claude_code_context_sharing.yaml +68 -0
  116. massgen/configs/tools/filesystem/claude_code_flash2.5.yaml +43 -0
  117. massgen/configs/tools/filesystem/claude_code_flash2.5_gptoss.yaml +49 -0
  118. massgen/configs/tools/filesystem/claude_code_gpt5nano.yaml +31 -0
  119. massgen/configs/tools/filesystem/claude_code_single.yaml +40 -0
  120. massgen/configs/tools/filesystem/fs_permissions_test.yaml +87 -0
  121. massgen/configs/tools/filesystem/gemini_gemini_workspace_cleanup.yaml +54 -0
  122. massgen/configs/tools/filesystem/gemini_gpt5_filesystem_casestudy.yaml +30 -0
  123. massgen/configs/tools/filesystem/gemini_gpt5nano_file_context_path.yaml +43 -0
  124. massgen/configs/tools/filesystem/gemini_gpt5nano_protected_paths.yaml +45 -0
  125. massgen/configs/tools/filesystem/gpt5mini_cc_fs_context_path.yaml +31 -0
  126. massgen/configs/tools/filesystem/grok4_gpt5_gemini_filesystem.yaml +32 -0
  127. massgen/configs/tools/filesystem/multiturn/grok4_gpt5_claude_code_filesystem_multiturn.yaml +58 -0
  128. massgen/configs/tools/filesystem/multiturn/grok4_gpt5_gemini_filesystem_multiturn.yaml +58 -0
  129. massgen/configs/tools/filesystem/multiturn/two_claude_code_filesystem_multiturn.yaml +47 -0
  130. massgen/configs/tools/filesystem/multiturn/two_gemini_flash_filesystem_multiturn.yaml +48 -0
  131. massgen/configs/tools/mcp/claude_code_discord_mcp_example.yaml +27 -0
  132. massgen/configs/tools/mcp/claude_code_simple_mcp.yaml +35 -0
  133. massgen/configs/tools/mcp/claude_code_twitter_mcp_example.yaml +32 -0
  134. massgen/configs/tools/mcp/claude_mcp_example.yaml +24 -0
  135. massgen/configs/tools/mcp/claude_mcp_test.yaml +27 -0
  136. massgen/configs/tools/mcp/five_agents_travel_mcp_test.yaml +157 -0
  137. massgen/configs/tools/mcp/five_agents_weather_mcp_test.yaml +103 -0
  138. massgen/configs/tools/mcp/gemini_mcp_example.yaml +24 -0
  139. massgen/configs/tools/mcp/gemini_mcp_filesystem_test.yaml +23 -0
  140. massgen/configs/tools/mcp/gemini_mcp_filesystem_test_sharing.yaml +23 -0
  141. massgen/configs/tools/mcp/gemini_mcp_filesystem_test_single_agent.yaml +17 -0
  142. massgen/configs/tools/mcp/gemini_mcp_filesystem_test_with_claude_code.yaml +24 -0
  143. massgen/configs/tools/mcp/gemini_mcp_test.yaml +27 -0
  144. massgen/configs/tools/mcp/gemini_notion_mcp.yaml +52 -0
  145. massgen/configs/tools/mcp/gpt5_nano_mcp_example.yaml +24 -0
  146. massgen/configs/tools/mcp/gpt5_nano_mcp_test.yaml +27 -0
  147. massgen/configs/tools/mcp/gpt5mini_claude_code_discord_mcp_example.yaml +38 -0
  148. massgen/configs/tools/mcp/gpt_oss_mcp_example.yaml +25 -0
  149. massgen/configs/tools/mcp/gpt_oss_mcp_test.yaml +28 -0
  150. massgen/configs/tools/mcp/grok3_mini_mcp_example.yaml +24 -0
  151. massgen/configs/tools/mcp/grok3_mini_mcp_test.yaml +27 -0
  152. massgen/configs/tools/mcp/multimcp_gemini.yaml +111 -0
  153. massgen/configs/tools/mcp/qwen_api_mcp_example.yaml +25 -0
  154. massgen/configs/tools/mcp/qwen_api_mcp_test.yaml +28 -0
  155. massgen/configs/tools/mcp/qwen_local_mcp_example.yaml +24 -0
  156. massgen/configs/tools/mcp/qwen_local_mcp_test.yaml +27 -0
  157. massgen/configs/tools/planning/five_agents_discord_mcp_planning_mode.yaml +140 -0
  158. massgen/configs/tools/planning/five_agents_filesystem_mcp_planning_mode.yaml +151 -0
  159. massgen/configs/tools/planning/five_agents_notion_mcp_planning_mode.yaml +151 -0
  160. massgen/configs/tools/planning/five_agents_twitter_mcp_planning_mode.yaml +155 -0
  161. massgen/configs/tools/planning/gpt5_mini_case_study_mcp_planning_mode.yaml +73 -0
  162. massgen/configs/tools/web-search/claude_streamable_http_test.yaml +43 -0
  163. massgen/configs/tools/web-search/gemini_streamable_http_test.yaml +43 -0
  164. massgen/configs/tools/web-search/gpt5_mini_streamable_http_test.yaml +43 -0
  165. massgen/configs/tools/web-search/gpt_oss_streamable_http_test.yaml +44 -0
  166. massgen/configs/tools/web-search/grok3_mini_streamable_http_test.yaml +43 -0
  167. massgen/configs/tools/web-search/qwen_api_streamable_http_test.yaml +44 -0
  168. massgen/configs/tools/web-search/qwen_local_streamable_http_test.yaml +43 -0
  169. massgen/coordination_tracker.py +708 -0
  170. massgen/docker/README.md +462 -0
  171. massgen/filesystem_manager/__init__.py +21 -0
  172. massgen/filesystem_manager/_base.py +9 -0
  173. massgen/filesystem_manager/_code_execution_server.py +545 -0
  174. massgen/filesystem_manager/_docker_manager.py +477 -0
  175. massgen/filesystem_manager/_file_operation_tracker.py +248 -0
  176. massgen/filesystem_manager/_filesystem_manager.py +813 -0
  177. massgen/filesystem_manager/_path_permission_manager.py +1261 -0
  178. massgen/filesystem_manager/_workspace_tools_server.py +1815 -0
  179. massgen/formatter/__init__.py +10 -0
  180. massgen/formatter/_chat_completions_formatter.py +284 -0
  181. massgen/formatter/_claude_formatter.py +235 -0
  182. massgen/formatter/_formatter_base.py +156 -0
  183. massgen/formatter/_response_formatter.py +263 -0
  184. massgen/frontend/__init__.py +1 -2
  185. massgen/frontend/coordination_ui.py +471 -286
  186. massgen/frontend/displays/base_display.py +56 -11
  187. massgen/frontend/displays/create_coordination_table.py +1956 -0
  188. massgen/frontend/displays/rich_terminal_display.py +1259 -619
  189. massgen/frontend/displays/simple_display.py +9 -4
  190. massgen/frontend/displays/terminal_display.py +27 -68
  191. massgen/logger_config.py +681 -0
  192. massgen/mcp_tools/README.md +232 -0
  193. massgen/mcp_tools/__init__.py +105 -0
  194. massgen/mcp_tools/backend_utils.py +1035 -0
  195. massgen/mcp_tools/circuit_breaker.py +195 -0
  196. massgen/mcp_tools/client.py +894 -0
  197. massgen/mcp_tools/config_validator.py +138 -0
  198. massgen/mcp_tools/docs/circuit_breaker.md +646 -0
  199. massgen/mcp_tools/docs/client.md +950 -0
  200. massgen/mcp_tools/docs/config_validator.md +478 -0
  201. massgen/mcp_tools/docs/exceptions.md +1165 -0
  202. massgen/mcp_tools/docs/security.md +854 -0
  203. massgen/mcp_tools/exceptions.py +338 -0
  204. massgen/mcp_tools/hooks.py +212 -0
  205. massgen/mcp_tools/security.py +780 -0
  206. massgen/message_templates.py +342 -64
  207. massgen/orchestrator.py +1515 -241
  208. massgen/stream_chunk/__init__.py +35 -0
  209. massgen/stream_chunk/base.py +92 -0
  210. massgen/stream_chunk/multimodal.py +237 -0
  211. massgen/stream_chunk/text.py +162 -0
  212. massgen/tests/mcp_test_server.py +150 -0
  213. massgen/tests/multi_turn_conversation_design.md +0 -8
  214. massgen/tests/test_azure_openai_backend.py +156 -0
  215. massgen/tests/test_backend_capabilities.py +262 -0
  216. massgen/tests/test_backend_event_loop_all.py +179 -0
  217. massgen/tests/test_chat_completions_refactor.py +142 -0
  218. massgen/tests/test_claude_backend.py +15 -28
  219. massgen/tests/test_claude_code.py +268 -0
  220. massgen/tests/test_claude_code_context_sharing.py +233 -0
  221. massgen/tests/test_claude_code_orchestrator.py +175 -0
  222. massgen/tests/test_cli_backends.py +180 -0
  223. massgen/tests/test_code_execution.py +679 -0
  224. massgen/tests/test_external_agent_backend.py +134 -0
  225. massgen/tests/test_final_presentation_fallback.py +237 -0
  226. massgen/tests/test_gemini_planning_mode.py +351 -0
  227. massgen/tests/test_grok_backend.py +7 -10
  228. massgen/tests/test_http_mcp_server.py +42 -0
  229. massgen/tests/test_integration_simple.py +198 -0
  230. massgen/tests/test_mcp_blocking.py +125 -0
  231. massgen/tests/test_message_context_building.py +29 -47
  232. massgen/tests/test_orchestrator_final_presentation.py +48 -0
  233. massgen/tests/test_path_permission_manager.py +2087 -0
  234. massgen/tests/test_rich_terminal_display.py +14 -13
  235. massgen/tests/test_timeout.py +133 -0
  236. massgen/tests/test_v3_3agents.py +11 -12
  237. massgen/tests/test_v3_simple.py +8 -13
  238. massgen/tests/test_v3_three_agents.py +11 -18
  239. massgen/tests/test_v3_two_agents.py +8 -13
  240. massgen/token_manager/__init__.py +7 -0
  241. massgen/token_manager/token_manager.py +400 -0
  242. massgen/utils.py +52 -16
  243. massgen/v1/agent.py +45 -91
  244. massgen/v1/agents.py +18 -53
  245. massgen/v1/backends/gemini.py +50 -153
  246. massgen/v1/backends/grok.py +21 -54
  247. massgen/v1/backends/oai.py +39 -111
  248. massgen/v1/cli.py +36 -93
  249. massgen/v1/config.py +8 -12
  250. massgen/v1/logging.py +43 -127
  251. massgen/v1/main.py +18 -32
  252. massgen/v1/orchestrator.py +68 -209
  253. massgen/v1/streaming_display.py +62 -163
  254. massgen/v1/tools.py +8 -12
  255. massgen/v1/types.py +9 -23
  256. massgen/v1/utils.py +5 -23
  257. massgen-0.1.0.dist-info/METADATA +1245 -0
  258. massgen-0.1.0.dist-info/RECORD +273 -0
  259. massgen-0.1.0.dist-info/entry_points.txt +2 -0
  260. massgen/frontend/logging/__init__.py +0 -9
  261. massgen/frontend/logging/realtime_logger.py +0 -197
  262. massgen-0.0.3.dist-info/METADATA +0 -568
  263. massgen-0.0.3.dist-info/RECORD +0 -76
  264. massgen-0.0.3.dist-info/entry_points.txt +0 -2
  265. /massgen/backend/{Function calling openai responses.md → docs/Function calling openai responses.md} +0 -0
  266. {massgen-0.0.3.dist-info → massgen-0.1.0.dist-info}/WHEEL +0 -0
  267. {massgen-0.0.3.dist-info → massgen-0.1.0.dist-info}/licenses/LICENSE +0 -0
  268. {massgen-0.0.3.dist-info → massgen-0.1.0.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,10 @@
1
+ # -*- coding: utf-8 -*-
1
2
  """
2
3
  Message templates for MassGen framework following input_cases_reference.md
3
4
  Implements proven binary decision framework that eliminates perfectionism loops.
4
5
  """
5
6
 
6
- from typing import Dict, Any, Optional, List
7
+ from typing import Any, Dict, List, Optional
7
8
 
8
9
 
9
10
  class MessageTemplates:
@@ -25,27 +26,48 @@ class MessageTemplates:
25
26
  import time
26
27
 
27
28
  # return f"""You are evaluating answers from multiple agents for final response to a message.
28
-
29
+ # For every aspect, claim, and reasoning step in the CURRENT ANSWERS, verify correctness, factual accuracy, and completeness using your expertise, reasoning, and **available tools**.
30
+ # **You must use at least one tool in every evaluation round**—this is mandatory.
31
+ # - If the CURRENT ANSWERS fully address the ORIGINAL MESSAGE, use the `vote` tool to record your vote and skip the `new_answer` tool.
32
+ # - If the CURRENT ANSWERS are incomplete, incorrect, or do not fully address the ORIGINAL MESSAGE,
33
+ # conduct any necessary reasoning or research using tools (such as `search`), and then use the
34
+ # `new_answer` tool to submit a new response.
35
+ # Your new answer must be self-contained, process-complete, well-sourced, and compelling—ready to serve as the final reply.
36
+ # **Important**:
37
+ # - You must actually call at least one tool per round.
38
+ # - If no other tools are relevant or available, you must use either `new_answer` or `vote` to fulfill the tool-use requirement.
39
+ # *Note*: The CURRENT TIME is **{time.strftime("%Y-%m-%d %H:%M:%S")}**.
40
+ # For any time-sensitive requests, use the `search` tool (if available) rather than relying on prior knowledge.
41
+ # """
42
+ # return f"""You are evaluating answers from multiple agents for final response to a message.
29
43
  # For every aspect, claim, reasoning steps in the CURRENT ANSWERS, verify correctness, factual accuracy, and completeness using your expertise, reasoning, and available tools.
30
-
31
44
  # If the CURRENT ANSWERS fully address the ORIGINAL MESSAGE, use the `vote` tool to record your vote and skip the `new_answer` tool.
32
-
33
- # If the CURRENT ANSWERS are incomplete, incorrect, or not fully address the ORIGINAL MESSAGE, conduct any necessary reasoning or research. Then, use the `new_answer` tool to submit a new response.
34
-
45
+ # If the CURRENT ANSWERS are incomplete, incorrect, or not fully address the ORIGINAL MESSAGE,
46
+ # conduct any necessary reasoning or research. Then, use the `new_answer` tool to submit a new response.
35
47
  # Your new answer must be self-contained, process-complete, well-sourced, and compelling—ready to serve as the final reply.
36
-
37
48
  # **Important**: Be sure to actually call the `new_answer` tool to submit your new answer (use native tool call format).
38
-
39
49
  # *Note*: The CURRENT TIME is **{time.strftime("%Y-%m-%d %H:%M:%S")}**.
40
50
  # For any time-sensitive requests, use the search tool (if available) rather than relying on prior knowledge."""
41
-
42
- return f"""You are evaluating answers from multiple agents for final response to a message. Does the best CURRENT ANSWER address the ORIGINAL MESSAGE?
51
+ # BACKUP - Original evaluation message (pre-synthesis-encouragement update):
52
+ # return f"""You are evaluating answers from multiple agents for final response to a message. Does the best CURRENT ANSWER address the ORIGINAL MESSAGE?
53
+ #
54
+ # If YES, use the `vote` tool to record your vote and skip the `new_answer` tool.
55
+ # Otherwise, digest existing answers, combine their strengths, and do additional work to address their
56
+ # weaknesses, then use the `new_answer` tool to record a better answer to the ORIGINAL MESSAGE.
57
+ # Make sure you actually call `vote` or `new_answer` (in tool call format).
58
+ #
59
+ # *Note*: The CURRENT TIME is **{time.strftime("%Y-%m-%d %H:%M:%S")}**."""
60
+
61
+ return f"""You are evaluating answers from multiple agents for final response to a message.
62
+ Different agents may have different builtin tools and capabilities.
63
+ Does the best CURRENT ANSWER address the ORIGINAL MESSAGE well?
43
64
 
44
65
  If YES, use the `vote` tool to record your vote and skip the `new_answer` tool.
45
- Otherwise, do additional work first, then use the `new_answer` tool to record a better answer to the ORIGINAL MESSAGE. Make sure you actually call `vote` or `new_answer` (in tool call format).
66
+ Otherwise, digest existing answers, combine their strengths, and do additional work to address their weaknesses,
67
+ then use the `new_answer` tool to record a better answer to the ORIGINAL MESSAGE.
68
+ Make sure you actually call `vote` or `new_answer` (in tool call format).
46
69
 
47
- *Note*: The CURRENT TIME is **{time.strftime("%Y-%m-%d %H:%M:%S")}**.
48
- """
70
+ *Note*: The CURRENT TIME is **{time.strftime("%Y-%m-%d %H:%M:%S")}**."""
49
71
 
50
72
  # =============================================================================
51
73
  # USER MESSAGE TEMPLATES
@@ -61,9 +83,7 @@ Otherwise, do additional work first, then use the `new_answer` tool to record a
61
83
 
62
84
  return f"<ORIGINAL MESSAGE> {task} <END OF ORIGINAL MESSAGE>"
63
85
 
64
- def format_conversation_history(
65
- self, conversation_history: List[Dict[str, str]]
66
- ) -> str:
86
+ def format_conversation_history(self, conversation_history: List[Dict[str, str]]) -> str:
67
87
  """Format conversation history for agent context."""
68
88
  if "format_conversation_history" in self._template_overrides:
69
89
  override = self._template_overrides["format_conversation_history"]
@@ -88,9 +108,7 @@ Otherwise, do additional work first, then use the `new_answer` tool to record a
88
108
  lines.append("<END OF CONVERSATION_HISTORY>")
89
109
  return "\n".join(lines)
90
110
 
91
- def system_message_with_context(
92
- self, conversation_history: Optional[List[Dict[str, str]]] = None
93
- ) -> str:
111
+ def system_message_with_context(self, conversation_history: Optional[List[Dict[str, str]]] = None) -> str:
94
112
  """Evaluation system message with conversation context awareness."""
95
113
  if "system_message_with_context" in self._template_overrides:
96
114
  override = self._template_overrides["system_message_with_context"]
@@ -102,7 +120,7 @@ Otherwise, do additional work first, then use the `new_answer` tool to record a
102
120
 
103
121
  if conversation_history and len(conversation_history) > 0:
104
122
  context_note = """
105
-
123
+
106
124
  IMPORTANT: You are responding to the latest message in an ongoing conversation. Consider the full conversation context when evaluating answers and providing your response."""
107
125
  return base_message + context_note
108
126
 
@@ -117,9 +135,7 @@ IMPORTANT: You are responding to the latest message in an ongoing conversation.
117
135
  (no answers available yet)
118
136
  <END OF CURRENT ANSWERS>"""
119
137
 
120
- def format_current_answers_with_summaries(
121
- self, agent_summaries: Dict[str, str]
122
- ) -> str:
138
+ def format_current_answers_with_summaries(self, agent_summaries: Dict[str, str]) -> str:
123
139
  """Format current answers section with agent summaries (Case 2) using anonymous agent IDs."""
124
140
  if "format_current_answers_with_summaries" in self._template_overrides:
125
141
  override = self._template_overrides["format_current_answers_with_summaries"]
@@ -160,7 +176,15 @@ IMPORTANT: You are responding to the latest message in an ongoing conversation.
160
176
  # =============================================================================
161
177
 
162
178
  def get_new_answer_tool(self) -> Dict[str, Any]:
163
- """Get new_answer tool definition."""
179
+ """Get new_answer tool definition.
180
+
181
+ TODO: Consider extending with optional context parameters for stateful backends:
182
+ - cwd: Working directory for Claude Code sessions
183
+ - session_id: Backend session identifier for continuity
184
+ - model: Model used to generate the answer
185
+ - tools_used: List of tools actually utilized
186
+ This would enable better context preservation in multi-iteration workflows.
187
+ """
164
188
  if "new_answer_tool" in self._template_overrides:
165
189
  return self._template_overrides["new_answer_tool"]
166
190
 
@@ -174,17 +198,15 @@ IMPORTANT: You are responding to the latest message in an ongoing conversation.
174
198
  "properties": {
175
199
  "content": {
176
200
  "type": "string",
177
- "description": "Your improved answer. If any builtin tools like search or code execution were used, include how they are used here.",
178
- }
201
+ "description": "Your improved answer. If any builtin tools like search or code execution were used, mention how they are used here.",
202
+ },
179
203
  },
180
204
  "required": ["content"],
181
205
  },
182
206
  },
183
207
  }
184
208
 
185
- def get_vote_tool(
186
- self, valid_agent_ids: Optional[List[str]] = None
187
- ) -> Dict[str, Any]:
209
+ def get_vote_tool(self, valid_agent_ids: Optional[List[str]] = None) -> Dict[str, Any]:
188
210
  """Get vote tool definition with anonymous agent IDs."""
189
211
  if "vote_tool" in self._template_overrides:
190
212
  override = self._template_overrides["vote_tool"]
@@ -217,44 +239,91 @@ IMPORTANT: You are responding to the latest message in an ongoing conversation.
217
239
  # Create anonymous mapping for enum constraint
218
240
  if valid_agent_ids:
219
241
  anon_agent_ids = [f"agent{i}" for i in range(1, len(valid_agent_ids) + 1)]
220
- tool_def["function"]["parameters"]["properties"]["agent_id"][
221
- "enum"
222
- ] = anon_agent_ids
242
+ tool_def["function"]["parameters"]["properties"]["agent_id"]["enum"] = anon_agent_ids
223
243
 
224
244
  return tool_def
225
245
 
226
- def get_standard_tools(
227
- self, valid_agent_ids: Optional[List[str]] = None
228
- ) -> List[Dict[str, Any]]:
246
+ def get_standard_tools(self, valid_agent_ids: Optional[List[str]] = None) -> List[Dict[str, Any]]:
229
247
  """Get standard tools for MassGen framework."""
230
248
  return [self.get_new_answer_tool(), self.get_vote_tool(valid_agent_ids)]
231
249
 
232
250
  def final_presentation_system_message(
233
- self, original_system_message: Optional[str] = None
251
+ self,
252
+ original_system_message: Optional[str] = None,
253
+ enable_image_generation: bool = False,
254
+ enable_audio_generation: bool = False,
255
+ has_irreversible_actions: bool = False,
256
+ enable_command_execution: bool = False,
234
257
  ) -> str:
235
258
  """System message for final answer presentation by winning agent.
236
259
 
237
260
  Args:
238
261
  original_system_message: The agent's original system message to preserve
262
+ enable_image_generation: Whether image generation is enabled
263
+ enable_audio_generation: Whether audio generation is enabled
264
+ has_irreversible_actions: Whether agent has write access to context paths (requires actual file delivery)
265
+ enable_command_execution: Whether command execution is enabled for this agent
239
266
  """
240
267
  if "final_presentation_system_message" in self._template_overrides:
241
268
  return str(self._template_overrides["final_presentation_system_message"])
242
269
 
243
- presentation_instructions = """You have been selected as the winning answer in a coordination process. Your task is to present a polished, comprehensive final answer that incorporates the best insights from all participants.
270
+ # BACKUP - Original final presentation message (pre-explicit-synthesis update):
271
+ # presentation_instructions = """You have been selected as the winning presenter in a coordination process.
272
+ # Your task is to present a polished, comprehensive final answer that incorporates the best insights from all participants.
273
+ #
274
+ # Consider:
275
+ # 1. Your original response and how it can be refined
276
+ # 2. Valuable insights from other agents' answers that should be incorporated
277
+ # 3. Feedback received through the voting process
278
+ # 4. Ensuring clarity, completeness, and comprehensiveness for the final audience
279
+ #
280
+ # Present your final coordinated answer in the most helpful and complete way possible."""
281
+
282
+ presentation_instructions = """You have been selected as the winning presenter in a coordination process.
283
+ Present the best possible coordinated answer by combining the strengths from all participants.\n\n"""
284
+
285
+ # Add image generation instructions only if enabled
286
+ if enable_image_generation:
287
+ presentation_instructions += """For image generation tasks:
288
+ - Extract image paths from the existing answer and resolve them in the shared reference.
289
+ - Gather all agent-produced images (ignore non-existent files).
290
+ - MUST call the generate-image tool with these input images to synthesize one final image combining their strengths.
291
+ - MUST save the final outputand output the saved path.
292
+ """
293
+ # Add audio generation instructions only if enabled
294
+ if enable_audio_generation:
295
+ presentation_instructions += """For audio generation tasks:
296
+ - Extract audio paths from the existing answer and resolve them in the shared reference.
297
+ - Gather ALL audio files produced by EVERY agent (ignore non-existent files).
298
+ IMPORTANT: You MUST call the generate_text_with_input_audio tool to obtain transcriptions
299
+ for EACH AND EVERY audio file from ALL agents - no audio should be skipped or overlooked.
300
+ - MUST combine the strengths of all transcriptions into one final detailed transcription that captures the best elements from each.
301
+ - MUST use the convert_text_to_audio tool to convert this final transcription to a new audio file and save it, then output the saved path.
302
+ """
244
303
 
245
- Consider:
246
- 1. Your original response and how it can be refined
247
- 2. Valuable insights from other agents' answers that should be incorporated
248
- 3. Feedback received through the voting process
249
- 4. Ensuring clarity, completeness, and comprehensiveness for the final audience
304
+ # Add irreversible actions reminder if needed
305
+ # TODO: Integrate more general irreversible actions handling in future (i.e., not just for context file delivery)
306
+ if has_irreversible_actions:
307
+ presentation_instructions += (
308
+ "### Write Access to Target Path:\n\n"
309
+ "Reminder: File Delivery Required. You should first place your final answer in your workspace. "
310
+ "However, note your workspace is NOT the final destination. You MUST copy/write files to the Target Path using FULL ABSOLUTE PATHS. "
311
+ "Then, clean up this Target Path by deleting any outdated or unused files. "
312
+ "Then, you must ALWAYS verify that the Target Path contains the correct final files, as no other agents were allowed to write to this path.\n"
313
+ )
250
314
 
251
- Present your final coordinated answer in the most helpful and complete way possible."""
315
+ # Add requirements.txt guidance if command execution is enabled
316
+ if enable_command_execution:
317
+ presentation_instructions += (
318
+ "### Package Dependencies:\n\n"
319
+ "Create a `requirements.txt` file listing all Python packages needed to run your code. "
320
+ "This helps users reproduce your work later. Include only the packages you actually used in your solution.\n"
321
+ )
252
322
 
253
323
  # Combine with original system message if provided
254
324
  if original_system_message:
255
325
  return f"""{original_system_message}
256
326
 
257
- COORDINATION CONTEXT:
258
327
  {presentation_instructions}"""
259
328
  else:
260
329
  return presentation_instructions
@@ -269,17 +338,13 @@ COORDINATION CONTEXT:
269
338
 
270
339
  {self.format_current_answers_empty()}"""
271
340
 
272
- def build_case2_user_message(
273
- self, task: str, agent_summaries: Dict[str, str]
274
- ) -> str:
341
+ def build_case2_user_message(self, task: str, agent_summaries: Dict[str, str]) -> str:
275
342
  """Build Case 2 user message (summaries exist)."""
276
343
  return f"""{self.format_original_message(task)}
277
344
 
278
345
  {self.format_current_answers_with_summaries(agent_summaries)}"""
279
346
 
280
- def build_evaluation_message(
281
- self, task: str, agent_answers: Optional[Dict[str, str]] = None
282
- ) -> str:
347
+ def build_evaluation_message(self, task: str, agent_answers: Optional[Dict[str, str]] = None) -> str:
283
348
  """Build evaluation user message for any case."""
284
349
  if agent_answers:
285
350
  return self.build_case2_user_message(task, agent_answers)
@@ -314,9 +379,7 @@ COORDINATION CONTEXT:
314
379
 
315
380
  # Add agent answers
316
381
  if agent_answers:
317
- context_parts.append(
318
- self.format_current_answers_with_summaries(agent_answers)
319
- )
382
+ context_parts.append(self.format_current_answers_with_summaries(agent_answers))
320
383
  else:
321
384
  context_parts.append(self.format_current_answers_empty())
322
385
 
@@ -331,10 +394,17 @@ COORDINATION CONTEXT:
331
394
  task: str,
332
395
  agent_summaries: Optional[Dict[str, str]] = None,
333
396
  valid_agent_ids: Optional[List[str]] = None,
397
+ base_system_message: Optional[str] = None,
334
398
  ) -> Dict[str, Any]:
335
399
  """Build complete initial conversation for MassGen evaluation."""
400
+ # Use agent's custom system message if provided, otherwise use default evaluation message
401
+ if base_system_message:
402
+ system_message = f"{self.evaluation_system_message()}\n\n#Special Requirement\n{base_system_message}"
403
+ else:
404
+ system_message = self.evaluation_system_message()
405
+
336
406
  return {
337
- "system_message": self.evaluation_system_message(),
407
+ "system_message": system_message,
338
408
  "user_message": self.build_evaluation_message(task, agent_summaries),
339
409
  "tools": self.get_standard_tools(valid_agent_ids),
340
410
  }
@@ -345,13 +415,18 @@ COORDINATION CONTEXT:
345
415
  conversation_history: Optional[List[Dict[str, str]]] = None,
346
416
  agent_summaries: Optional[Dict[str, str]] = None,
347
417
  valid_agent_ids: Optional[List[str]] = None,
418
+ base_system_message: Optional[str] = None,
348
419
  ) -> Dict[str, Any]:
349
420
  """Build complete conversation with conversation history context for MassGen evaluation."""
421
+ # Use agent's custom system message if provided, otherwise use default context-aware message
422
+ if base_system_message:
423
+ system_message = f"{base_system_message}\n\n{self.system_message_with_context(conversation_history)}"
424
+ else:
425
+ system_message = self.system_message_with_context(conversation_history)
426
+
350
427
  return {
351
- "system_message": self.system_message_with_context(conversation_history),
352
- "user_message": self.build_coordination_context(
353
- current_task, conversation_history, agent_summaries
354
- ),
428
+ "system_message": system_message,
429
+ "user_message": self.build_coordination_context(current_task, conversation_history, agent_summaries),
355
430
  "tools": self.get_standard_tools(valid_agent_ids),
356
431
  }
357
432
 
@@ -378,14 +453,219 @@ VOTING RESULTS:
378
453
 
379
454
  Based on the coordination process above, present your final answer:"""
380
455
 
381
- def add_enforcement_message(
382
- self, conversation_messages: List[Dict[str, str]]
383
- ) -> List[Dict[str, str]]:
456
+ def add_enforcement_message(self, conversation_messages: List[Dict[str, str]]) -> List[Dict[str, str]]:
384
457
  """Add enforcement message to existing conversation (Case 3)."""
385
458
  messages = conversation_messages.copy()
386
459
  messages.append({"role": "user", "content": self.enforcement_message()})
387
460
  return messages
388
461
 
462
+ def command_execution_system_message(self) -> str:
463
+ """Generate concise command execution instructions when command line execution is enabled."""
464
+ parts = ["## Command Execution"]
465
+ parts.append("You can run command line commands using the `execute_command` tool.\n")
466
+ parts.append("If a `.venv` directory exists in your workspace, it will be automatically used.")
467
+
468
+ return "\n".join(parts)
469
+
470
+ def filesystem_system_message(
471
+ self,
472
+ main_workspace: Optional[str] = None,
473
+ temp_workspace: Optional[str] = None,
474
+ context_paths: Optional[List[Dict[str, str]]] = None,
475
+ previous_turns: Optional[List[Dict[str, Any]]] = None,
476
+ workspace_prepopulated: bool = False,
477
+ enable_image_generation: bool = False,
478
+ agent_answers: Optional[Dict[str, str]] = None,
479
+ enable_command_execution: bool = False,
480
+ ) -> str:
481
+ """Generate filesystem access instructions for agents with filesystem support.
482
+
483
+ Args:
484
+ main_workspace: Path to agent's main workspace
485
+ temp_workspace: Path to shared reference workspace
486
+ context_paths: List of context paths with permissions
487
+ previous_turns: List of previous turn metadata
488
+ workspace_prepopulated: Whether workspace is pre-populated
489
+ enable_image_generation: Whether image generation is enabled
490
+ agent_answers: Dict of agent answers (keys are agent IDs) to show workspace structure
491
+ enable_command_execution: Whether command line execution is enabled
492
+ """
493
+ if "filesystem_system_message" in self._template_overrides:
494
+ return str(self._template_overrides["filesystem_system_message"])
495
+
496
+ parts = ["## Filesystem Access"]
497
+
498
+ # Explain workspace behavior
499
+ parts.append(
500
+ "Your working directory is set to your workspace, so all relative paths in your file operations "
501
+ "will be resolved from there. This ensures each agent works in isolation while having access to shared references. "
502
+ "Only include in your workspace files that should be used in your answer.\n",
503
+ )
504
+
505
+ if main_workspace:
506
+ workspace_note = f"**Your Workspace**: `{main_workspace}` - Write actual files here using file tools. All your file operations will be relative to this directory."
507
+ if workspace_prepopulated:
508
+ # Workspace is pre-populated with writable copy of most recent turn
509
+ workspace_note += (
510
+ " **Note**: Your workspace already contains a writable copy of the previous turn's results - "
511
+ "you can modify or build upon these files. The original unmodified version is also available as "
512
+ "a read-only context path if you need to reference what was originally there."
513
+ )
514
+ parts.append(workspace_note)
515
+
516
+ if temp_workspace:
517
+ # Build workspace tree structure
518
+ workspace_tree = f"**Shared Reference**: `{temp_workspace}` - Contains previous answers from all agents (read/execute-only)\n"
519
+
520
+ # Add agent subdirectories in tree format
521
+ # This was added bc weaker models would often try many incorrect paths.
522
+ # No point in requiring extra list dir calls if we can just show them the structure.
523
+ if agent_answers:
524
+ # Create anonymous mapping: agent1, agent2, etc.
525
+ agent_mapping = {}
526
+ for i, agent_id in enumerate(sorted(agent_answers.keys()), 1):
527
+ agent_mapping[agent_id] = f"agent{i}"
528
+
529
+ workspace_tree += " Available agent workspaces:\n"
530
+ agent_items = list(agent_mapping.items())
531
+ for idx, (agent_id, anon_id) in enumerate(agent_items):
532
+ is_last = idx == len(agent_items) - 1
533
+ prefix = " └── " if is_last else " ├── "
534
+ workspace_tree += f"{prefix}{temp_workspace}/{anon_id}/\n"
535
+
536
+ workspace_tree += (
537
+ " - To improve upon existing answers: Copy files from Shared Reference to your workspace using `copy_file` or `copy_directory` tools, then modify them\n"
538
+ " - These correspond directly to the answers shown in the CURRENT ANSWERS section\n"
539
+ " - However, not all workspaces may have a matching answer (e.g., if an agent was in the middle of working but restarted before submitting an answer). "
540
+ "So, it is wise to check the actual files in the Shared Reference, not rely solely on the CURRENT ANSWERS section.\n"
541
+ )
542
+ parts.append(workspace_tree)
543
+
544
+ if context_paths:
545
+ has_target = any(p.get("will_be_writable", False) for p in context_paths)
546
+ has_readonly_context = any(not p.get("will_be_writable", False) and p.get("permission") == "read" for p in context_paths)
547
+
548
+ if has_target:
549
+ parts.append(
550
+ "\n**Important Context**: If the user asks about improving, fixing, debugging, or understanding an existing "
551
+ "code/project (e.g., 'Why is this code not working?', 'Fix this bug', 'Add feature X'), they are referring "
552
+ "to the Target Path below. First READ the existing files from that path to understand what's there, then "
553
+ "make your changes based on that codebase. Final deliverables must end up there.\n",
554
+ )
555
+ elif has_readonly_context:
556
+ parts.append(
557
+ "\n**Important Context**: If the user asks about debugging or understanding an existing code/project "
558
+ "(e.g., 'Why is this code not working?', 'Explain this bug'), they are referring to (one of) the Context Path(s) "
559
+ "below. Read then provide analysis/explanation based on that codebase - you cannot modify it directly.\n",
560
+ )
561
+
562
+ for path_config in context_paths:
563
+ path = path_config.get("path", "")
564
+ permission = path_config.get("permission", "read")
565
+ will_be_writable = path_config.get("will_be_writable", False)
566
+ if path:
567
+ if permission == "read" and will_be_writable:
568
+ parts.append(
569
+ f"**Target Path**: `{path}` (read-only now, write access later) - This is where your changes will be delivered. "
570
+ f"Work in your workspace first, then the final presenter will place or update files DIRECTLY into `{path}` using the FULL ABSOLUTE PATH.",
571
+ )
572
+ elif permission == "write":
573
+ parts.append(
574
+ f"**Target Path**: `{path}` (write access) - This is where your changes must be delivered. "
575
+ f"First, ensure you place your answer in your workspace, then copy/write files DIRECTLY into `{path}` using FULL ABSOLUTE PATH (not relative paths). "
576
+ f"Files must go directly into the target path itself (e.g., `{path}/file.txt`), NOT into a `.massgen/` subdirectory within it.",
577
+ )
578
+ else:
579
+ parts.append(f"**Context Path**: `{path}` (read-only) - Use FULL ABSOLUTE PATH when reading.")
580
+
581
+ # Add note connecting conversation history (in user message) to context paths (in system message)
582
+ if previous_turns:
583
+ parts.append(
584
+ "\n**Note**: This is a multi-turn conversation. Each User/Assistant exchange in the conversation "
585
+ "history represents one turn. The workspace from each turn is available as a read-only context path "
586
+ "listed above (e.g., turn 1's workspace is at the path ending in `/turn_1/workspace`).",
587
+ )
588
+
589
+ # Add intelligent task handling guidance with clear priority hierarchy
590
+ parts.append(
591
+ "\n**Task Handling Priority**: When responding to user requests, follow this priority order:\n"
592
+ "1. **Use MCP Tools First**: If you have specialized MCP tools available, call them DIRECTLY to complete the task\n"
593
+ " - Save any outputs/artifacts from MCP tools to your workspace\n"
594
+ "2. **Write Code If Needed**: If MCP tools cannot complete the task, write and execute code\n"
595
+ "3. **Create Other Files**: Create configs, documents, or other deliverables as needed\n"
596
+ "4. **Text Response Otherwise**: If no tools or files are needed, provide a direct text answer\n\n"
597
+ "**Important**: Do NOT ask the user for clarification or additional input. Make reasonable assumptions and proceed with sensible defaults. "
598
+ "You will not receive user feedback, so complete the task autonomously based on the original request.\n",
599
+ )
600
+
601
+ # Add requirement for path explanations in answers
602
+ # if enable_image_generation:
603
+ # # # Enabled for image generation tasks
604
+ # parts.append(
605
+ # "\n**Image Generation Tasks**: When working on image generation tasks, if you find images equivalent and cannot choose between them, "
606
+ # "choose the one with the smallest file size.\n"
607
+ # "\n**New Answer**: When calling `new_answer` tool:"
608
+ # "- For non-image generation tasks, if you created files, list your cwd and file paths (but do NOT paste full file contents)\n"
609
+ # "- For image generation tasks, do not use file write tools. Instead, the images are already generated directly "
610
+ # "with the image_generation tool. Then, providing new answer with 1) briefly describing the contents of the images "
611
+ # "and 2) listing your full cwd and the image paths you created.\n",
612
+ # )
613
+ # else:
614
+ # Not enabled for image generation tasks
615
+ new_answer_guidance = "\n**New Answer**: When calling `new_answer`:\n"
616
+ if enable_command_execution:
617
+ new_answer_guidance += "- If you executed commands (e.g., running tests), explain the results in your answer (what passed, what failed, what the output shows)\n"
618
+ new_answer_guidance += "- If you created files, list your cwd and file paths (but do NOT paste full file contents)\n"
619
+ new_answer_guidance += "- If providing a text response, include your analysis/explanation in the `content` field\n"
620
+ parts.append(new_answer_guidance)
621
+
622
+ # Add workspace cleanup guidance
623
+ parts.append(
624
+ "**Workspace Cleanup**: Before submitting your answer with `new_answer`, " "ensure that your workspace contains only the files relevant to your final answer.\n",
625
+ # use `delete_file` or "
626
+ # "`delete_files_batch` to remove any outdated, temporary, or unused files from your workspace. "
627
+ # "Note: You cannot delete read-only files (e.g., files from other agents' workspaces or read-only context paths). "
628
+ # "This ensures only the relevant final files remain for evaluation. For example, if you created "
629
+ # "`old_index.html` then later created `new_website/index.html`, delete the old version.\n",
630
+ )
631
+
632
+ # Add diff tools guidance
633
+ parts.append(
634
+ "**Comparison Tools**: Use `compare_directories` to see differences between two directories (e.g., comparing "
635
+ "your workspace to another agent's workspace or a previous version), or `compare_files` to see line-by-line diffs "
636
+ "between two files. These read-only tools help you understand what changed, build upon existing work effectively, "
637
+ "or verify solutions before voting.\n",
638
+ )
639
+
640
+ # Add voting guidance
641
+ # if enable_image_generation:
642
+ # # Enabled for image generation tasks
643
+ # parts.append(
644
+ # "**Evaluation**: When evaluating agents' answers, do NOT base your decision solely on the answer text. "
645
+ # "Instead, read and verify the actual files in their workspaces (via Shared Reference) to ensure the work matches their claims."
646
+ # "IMPORTANT: For image tasks, you MUST use ONLY the `mcp__workspace__extract_multimodal_files` tool to view and evaluate images. Do NOT use any other tool for this purpose.\n",
647
+ # )
648
+ # else:
649
+ # Not enabled for image generation tasks
650
+ parts.append(
651
+ "**Evaluation**: When evaluating agents' answers, do NOT base your decision solely on the answer text. "
652
+ "Instead, read and verify the actual files in their workspaces (via Shared Reference) to ensure the work matches their claims.\n",
653
+ )
654
+
655
+ # Add command execution instructions if enabled
656
+ if enable_command_execution:
657
+ command_exec_message = self.command_execution_system_message()
658
+ parts.append(f"\n{command_exec_message}")
659
+
660
+ return "\n".join(parts)
661
+
662
+
663
+ # ### IMPORTANT Evaluation Note:
664
+ # When evaluating other agents' work, focus on the CONTENT and FUNCTIONALITY of their files.
665
+ # Each agent works in their own isolated workspace - this is correct behavior.
666
+ # The paths shown in their answers are normalized so you can access and verify their work.
667
+ # Judge based on code quality, correctness, and completeness, not on which workspace directory was used.
668
+
389
669
 
390
670
  # Global template instance
391
671
  _templates = MessageTemplates()
@@ -414,9 +694,7 @@ def build_case2_conversation(
414
694
  valid_agent_ids: Optional[List[str]] = None,
415
695
  ) -> Dict[str, Any]:
416
696
  """Build Case 2 conversation (summaries exist)."""
417
- return get_templates().build_initial_conversation(
418
- task, agent_summaries, valid_agent_ids
419
- )
697
+ return get_templates().build_initial_conversation(task, agent_summaries, valid_agent_ids)
420
698
 
421
699
 
422
700
  def get_standard_tools(