massgen 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. massgen/__init__.py +1 -1
  2. massgen/agent_config.py +33 -7
  3. massgen/api_params_handler/_api_params_handler_base.py +3 -0
  4. massgen/backend/azure_openai.py +9 -1
  5. massgen/backend/base.py +4 -0
  6. massgen/backend/claude_code.py +9 -1
  7. massgen/backend/gemini.py +35 -6
  8. massgen/backend/gemini_utils.py +30 -0
  9. massgen/chat_agent.py +9 -3
  10. massgen/cli.py +291 -43
  11. massgen/config_builder.py +163 -18
  12. massgen/configs/README.md +52 -6
  13. massgen/configs/debug/restart_test_controlled.yaml +60 -0
  14. massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
  15. massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
  16. massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
  17. massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
  18. massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
  19. massgen/configs/tools/custom_tools/crawl4ai_mcp_example.yaml +67 -0
  20. massgen/configs/tools/custom_tools/crawl4ai_multi_agent_example.yaml +68 -0
  21. massgen/configs/tools/custom_tools/multimodal_tools/playwright_with_img_understanding.yaml +98 -0
  22. massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
  23. massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
  24. massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
  25. massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
  26. massgen/configs/tools/custom_tools/multimodal_tools/understand_video_example.yaml +54 -0
  27. massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
  28. massgen/configs/tools/memory/README.md +199 -0
  29. massgen/configs/tools/memory/gpt5mini_gemini_context_window_management.yaml +131 -0
  30. massgen/configs/tools/memory/gpt5mini_gemini_no_persistent_memory.yaml +133 -0
  31. massgen/configs/tools/memory/test_context_window_management.py +286 -0
  32. massgen/configs/tools/multimodal/gpt5mini_gpt5nano_documentation_evolution.yaml +97 -0
  33. massgen/docker/README.md +83 -0
  34. massgen/filesystem_manager/_code_execution_server.py +22 -7
  35. massgen/filesystem_manager/_docker_manager.py +21 -1
  36. massgen/filesystem_manager/_filesystem_manager.py +8 -0
  37. massgen/filesystem_manager/_workspace_tools_server.py +0 -997
  38. massgen/formatter/_gemini_formatter.py +73 -0
  39. massgen/frontend/coordination_ui.py +175 -257
  40. massgen/frontend/displays/base_display.py +29 -0
  41. massgen/frontend/displays/rich_terminal_display.py +155 -9
  42. massgen/frontend/displays/simple_display.py +21 -0
  43. massgen/frontend/displays/terminal_display.py +22 -2
  44. massgen/logger_config.py +50 -6
  45. massgen/message_templates.py +123 -3
  46. massgen/orchestrator.py +319 -38
  47. massgen/tests/test_code_execution.py +178 -0
  48. massgen/tests/test_orchestration_restart.py +204 -0
  49. massgen/tool/__init__.py +4 -0
  50. massgen/tool/_multimodal_tools/understand_audio.py +193 -0
  51. massgen/tool/_multimodal_tools/understand_file.py +550 -0
  52. massgen/tool/_multimodal_tools/understand_image.py +212 -0
  53. massgen/tool/_multimodal_tools/understand_video.py +313 -0
  54. massgen/tool/docs/multimodal_tools.md +779 -0
  55. massgen/tool/workflow_toolkits/__init__.py +26 -0
  56. massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
  57. massgen/utils.py +1 -0
  58. {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/METADATA +8 -3
  59. {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/RECORD +63 -36
  60. {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/WHEEL +0 -0
  61. {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/entry_points.txt +0 -0
  62. {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/licenses/LICENSE +0 -0
  63. {massgen-0.1.2.dist-info → massgen-0.1.3.dist-info}/top_level.txt +0 -0
@@ -193,6 +193,7 @@ class RichTerminalDisplay(TerminalDisplay):
193
193
  self._key_handler = None
194
194
  self._input_thread = None
195
195
  self._stop_input_thread = False
196
+ self._user_quit_requested = False # Flag to signal user wants to quit
196
197
  self._original_settings = None
197
198
  self._agent_selector_active = False # Flag to prevent duplicate agent selector calls
198
199
 
@@ -207,6 +208,15 @@ class RichTerminalDisplay(TerminalDisplay):
207
208
  self._final_presentation_agent = None
208
209
  self._final_presentation_vote_results = None
209
210
 
211
+ # Post-evaluation display state
212
+ self._post_evaluation_active = False
213
+ self._post_evaluation_content = ""
214
+ self._post_evaluation_agent = None
215
+
216
+ # Restart context state (for attempt 2+)
217
+ self._restart_context_reason = None
218
+ self._restart_context_instructions = None
219
+
210
220
  # Code detection patterns
211
221
  self.code_patterns = [
212
222
  r"```(\w+)?\n(.*?)\n```", # Markdown code blocks
@@ -1077,12 +1087,25 @@ class RichTerminalDisplay(TerminalDisplay):
1077
1087
  Layout(footer, name="footer", size=8),
1078
1088
  )
1079
1089
  else:
1080
- # Arrange layout without final presentation
1081
- layout.split_column(
1082
- Layout(header, name="header", size=5),
1083
- Layout(agent_columns, name="main"),
1084
- Layout(footer, name="footer", size=8),
1085
- )
1090
+ # Build layout components
1091
+ layout_components = []
1092
+
1093
+ # Add header
1094
+ layout_components.append(Layout(header, name="header", size=5))
1095
+
1096
+ # Add agent columns
1097
+ layout_components.append(Layout(agent_columns, name="main"))
1098
+
1099
+ # Add post-evaluation panel if active (below agents)
1100
+ post_eval_panel = self._create_post_evaluation_panel()
1101
+ if post_eval_panel:
1102
+ layout_components.append(Layout(post_eval_panel, name="post_eval", size=6))
1103
+
1104
+ # Add footer
1105
+ layout_components.append(Layout(footer, name="footer", size=8))
1106
+
1107
+ # Arrange layout
1108
+ layout.split_column(*layout_components)
1086
1109
 
1087
1110
  return layout
1088
1111
 
@@ -1343,7 +1366,10 @@ class RichTerminalDisplay(TerminalDisplay):
1343
1366
  elif key == "q":
1344
1367
  # Quit the application - restore terminal and stop
1345
1368
  self._stop_input_thread = True
1369
+ self._user_quit_requested = True
1346
1370
  self._restore_terminal_settings()
1371
+ # Print quit message
1372
+ self.console.print("\n[yellow]Exiting coordination...[/yellow]")
1347
1373
 
1348
1374
  def _open_agent_in_default_text_editor(self, agent_id: str) -> None:
1349
1375
  """Open agent's txt file in default text editor."""
@@ -2013,6 +2039,56 @@ class RichTerminalDisplay(TerminalDisplay):
2013
2039
  expand=True, # Full width
2014
2040
  )
2015
2041
 
2042
+ def _create_post_evaluation_panel(self) -> Optional[Panel]:
2043
+ """Create a panel for post-evaluation display (below agent columns)."""
2044
+ if not self._post_evaluation_active:
2045
+ return None
2046
+
2047
+ content_text = Text()
2048
+
2049
+ if not self._post_evaluation_content:
2050
+ content_text.append("Evaluating answer...", style=self.colors["text"])
2051
+ else:
2052
+ # Show last few lines of post-eval content
2053
+ lines = self._post_evaluation_content.split("\n")
2054
+ display_lines = lines[-5:] if len(lines) > 5 else lines
2055
+
2056
+ for line in display_lines:
2057
+ if line.strip():
2058
+ formatted_line = self._format_content_line(line)
2059
+ content_text.append(formatted_line)
2060
+ content_text.append("\n")
2061
+
2062
+ title = f"🔍 Post-Evaluation by {self._post_evaluation_agent}"
2063
+
2064
+ return Panel(
2065
+ content_text,
2066
+ title=f"[{self.colors['info']}]{title}[/{self.colors['info']}]",
2067
+ border_style=self.colors["info"],
2068
+ box=ROUNDED,
2069
+ expand=True,
2070
+ height=6, # Fixed height to not take too much space
2071
+ )
2072
+
2073
+ def _create_restart_context_panel(self) -> Optional[Panel]:
2074
+ """Create restart context panel for attempt 2+ (yellow warning at top)."""
2075
+ if not self._restart_context_reason or not self._restart_context_instructions:
2076
+ return None
2077
+
2078
+ content_text = Text()
2079
+ content_text.append("Reason: ", style="bold bright_yellow")
2080
+ content_text.append(f"{self._restart_context_reason}\n\n", style="bright_yellow")
2081
+ content_text.append("Instructions: ", style="bold bright_yellow")
2082
+ content_text.append(f"{self._restart_context_instructions}", style="bright_yellow")
2083
+
2084
+ return Panel(
2085
+ content_text,
2086
+ title="[bold bright_yellow]⚠️ PREVIOUS ATTEMPT FEEDBACK[/bold bright_yellow]",
2087
+ border_style="bright_yellow",
2088
+ box=ROUNDED,
2089
+ expand=True,
2090
+ )
2091
+
2016
2092
  def _format_presentation_content(self, content: str) -> Text:
2017
2093
  """Format presentation content with enhanced styling for orchestrator queries."""
2018
2094
  formatted = Text()
@@ -3174,7 +3250,7 @@ class RichTerminalDisplay(TerminalDisplay):
3174
3250
  title="[bold bright_green]🎯 FINAL COORDINATED ANSWER[/bold bright_green]",
3175
3251
  border_style=self.colors["success"],
3176
3252
  box=DOUBLE,
3177
- expand=False,
3253
+ expand=True,
3178
3254
  )
3179
3255
 
3180
3256
  self.console.print(final_panel)
@@ -3244,10 +3320,80 @@ class RichTerminalDisplay(TerminalDisplay):
3244
3320
  )
3245
3321
  self.console.print(error_text)
3246
3322
 
3247
- # Show interactive options for viewing agent details (only if not in safe mode)
3248
- if self._keyboard_interactive_mode and hasattr(self, "_agent_keys") and not self._safe_keyboard_mode:
3323
+ # Show interactive options for viewing agent details (only if not in safe mode and not restarting)
3324
+ # Don't show inspection menu if orchestration is restarting
3325
+ is_restarting = hasattr(self, "orchestrator") and hasattr(self.orchestrator, "restart_pending") and self.orchestrator.restart_pending
3326
+ if self._keyboard_interactive_mode and hasattr(self, "_agent_keys") and not self._safe_keyboard_mode and not is_restarting:
3249
3327
  self.show_agent_selector()
3250
3328
 
3329
+ def show_post_evaluation_content(self, content: str, agent_id: str):
3330
+ """Display post-evaluation streaming content in a panel below agents."""
3331
+ self._post_evaluation_active = True
3332
+ self._post_evaluation_agent = agent_id
3333
+ self._post_evaluation_content += content
3334
+ # Panel will be created/updated in _update_display via layout
3335
+
3336
+ def show_restart_banner(self, reason: str, instructions: str, attempt: int, max_attempts: int):
3337
+ """Display restart decision banner prominently (like final presentation)."""
3338
+ # Stop live display temporarily for static banner
3339
+ self.live is not None
3340
+ if self.live:
3341
+ self.live.stop()
3342
+ self.live = None
3343
+
3344
+ # Create restart banner content
3345
+ banner_content = Text()
3346
+ banner_content.append("\nREASON:\n", style="bold bright_yellow")
3347
+ banner_content.append(f"{reason}\n\n", style="bright_yellow")
3348
+ banner_content.append("INSTRUCTIONS FOR NEXT ATTEMPT:\n", style="bold bright_yellow")
3349
+ banner_content.append(f"{instructions}\n", style="bright_yellow")
3350
+
3351
+ restart_panel = Panel(
3352
+ banner_content,
3353
+ title=f"[bold bright_yellow]🔄 ORCHESTRATION RESTART (Attempt {attempt}/{max_attempts})[/bold bright_yellow]",
3354
+ border_style="bright_yellow",
3355
+ box=DOUBLE,
3356
+ expand=True,
3357
+ )
3358
+
3359
+ self.console.print(restart_panel)
3360
+ time.sleep(2.0) # Allow user to read restart banner
3361
+
3362
+ # Reset state for fresh attempt - clear all agent content and status
3363
+ for agent_id in self.agent_ids:
3364
+ self.agent_outputs[agent_id] = []
3365
+ self.agent_status[agent_id] = "waiting"
3366
+ # Clear text buffers
3367
+ if hasattr(self, "_text_buffers") and agent_id in self._text_buffers:
3368
+ self._text_buffers[agent_id] = ""
3369
+
3370
+ # Clear cached panels and ALL cached state
3371
+ self._agent_panels_cache.clear()
3372
+ self._footer_cache = None
3373
+ self._header_cache = None
3374
+
3375
+ # Clear orchestrator events (from base class)
3376
+ self.orchestrator_events = []
3377
+
3378
+ # Clear presentation state
3379
+ self._final_presentation_active = False
3380
+ self._final_presentation_content = ""
3381
+ self._post_evaluation_active = False
3382
+ self._post_evaluation_content = ""
3383
+
3384
+ # Clear restart context state (so it doesn't show on next attempt)
3385
+ self._restart_context_reason = None
3386
+ self._restart_context_instructions = None
3387
+
3388
+ # DON'T restart live display here - let the next coordinate() call handle it
3389
+ # The CLI will create a fresh UI instance which will initialize its own display
3390
+
3391
+ def show_restart_context_panel(self, reason: str, instructions: str):
3392
+ """Display restart context panel at top of UI (for attempt 2+)."""
3393
+ self._restart_context_reason = reason
3394
+ self._restart_context_instructions = instructions
3395
+ # Panel will be displayed in initialize() method before agent columns
3396
+
3251
3397
  def _display_answer_with_flush(self, answer: str) -> None:
3252
3398
  """Display answer with flush output effect - streaming character by character."""
3253
3399
  import sys
@@ -90,6 +90,27 @@ class SimpleDisplay(BaseDisplay):
90
90
  print(f"🗳️ Vote results: {vote_summary}")
91
91
  print("=" * 50)
92
92
 
93
+ def show_post_evaluation_content(self, content: str, agent_id: str):
94
+ """Display post-evaluation streaming content."""
95
+ print(f"🔍 [{agent_id}] {content}", end="", flush=True)
96
+
97
+ def show_restart_banner(self, reason: str, instructions: str, attempt: int, max_attempts: int):
98
+ """Display restart decision banner."""
99
+ print("\n" + "🔄" * 40)
100
+ print(f"ORCHESTRATION RESTART - Attempt {attempt}/{max_attempts}")
101
+ print("🔄" * 40)
102
+ print(f"\n{reason}\n")
103
+ print(f"Instructions: {instructions}\n")
104
+ print("🔄" * 40 + "\n")
105
+
106
+ def show_restart_context_panel(self, reason: str, instructions: str):
107
+ """Display restart context panel at top of UI (for attempt 2+)."""
108
+ print("\n" + "⚠️ " * 30)
109
+ print("PREVIOUS ATTEMPT FEEDBACK")
110
+ print(f"Reason: {reason}")
111
+ print(f"Instructions: {instructions}")
112
+ print("⚠️ " * 30 + "\n")
113
+
93
114
  def cleanup(self):
94
115
  """Clean up resources."""
95
116
  print(f"\n✅ Coordination completed with {len(self.agent_ids)} agents")
@@ -220,8 +220,6 @@ class TerminalDisplay(BaseDisplay):
220
220
 
221
221
  # Add working indicator if transitioning to working
222
222
  if old_status != "working" and status == "working":
223
- agent_prefix = f"[{agent_id}] " if self.num_agents > 1 else ""
224
- print(f"\n{agent_prefix}⚡ Working...")
225
223
  if not self.agent_outputs[agent_id] or not self.agent_outputs[agent_id][-1].startswith("⚡"):
226
224
  self.agent_outputs[agent_id].append("⚡ Working...")
227
225
 
@@ -245,6 +243,28 @@ class TerminalDisplay(BaseDisplay):
245
243
  print(f"🗳️ Vote results: {vote_summary}")
246
244
  print("=" * 60)
247
245
 
246
+ def show_post_evaluation_content(self, content: str, agent_id: str):
247
+ """Display post-evaluation streaming content."""
248
+ print(f"🔍 Post-Evaluation [{agent_id}]: {content}", end="", flush=True)
249
+
250
+ def show_restart_banner(self, reason: str, instructions: str, attempt: int, max_attempts: int):
251
+ """Display restart decision banner."""
252
+ print("\n" + "=" * 80)
253
+ print(f"🔄 ORCHESTRATION RESTART (Attempt {attempt}/{max_attempts})")
254
+ print("=" * 80)
255
+ print(f"\nREASON:\n{reason}")
256
+ print(f"\nINSTRUCTIONS FOR NEXT ATTEMPT:\n{instructions}")
257
+ print("\n" + "=" * 80 + "\n")
258
+
259
+ def show_restart_context_panel(self, reason: str, instructions: str):
260
+ """Display restart context panel at top of UI (for attempt 2+)."""
261
+ print("\n" + "⚠" * 40)
262
+ print("⚠️ PREVIOUS ATTEMPT FEEDBACK")
263
+ print("⚠" * 40)
264
+ print(f"\nReason: {reason}")
265
+ print(f"\nInstructions: {instructions}")
266
+ print("\n" + "⚠" * 40 + "\n")
267
+
248
268
  def cleanup(self):
249
269
  """Clean up display resources."""
250
270
  # No special cleanup needed for terminal display
massgen/logger_config.py CHANGED
@@ -41,6 +41,7 @@ _DEBUG_MODE = False
41
41
  _LOG_SESSION_DIR = None
42
42
  _LOG_BASE_SESSION_DIR = None # Base session dir (without turn subdirectory)
43
43
  _CURRENT_TURN = None
44
+ _CURRENT_ATTEMPT = None # Current attempt number for restart tracking
44
45
 
45
46
  # Console logging suppression (for Rich Live display compatibility)
46
47
  _CONSOLE_HANDLER_ID = None
@@ -48,15 +49,15 @@ _CONSOLE_SUPPRESSED = False
48
49
 
49
50
 
50
51
  def get_log_session_dir(turn: Optional[int] = None) -> Path:
51
- """Get the current log session directory.
52
+ """Get the current log session directory, including attempt subdirectory if set.
52
53
 
53
54
  Args:
54
55
  turn: Optional turn number for multi-turn conversations
55
56
 
56
57
  Returns:
57
- Path to the log directory
58
+ Path to the log directory (includes attempt subdirectory if _CURRENT_ATTEMPT is set)
58
59
  """
59
- global _LOG_SESSION_DIR, _LOG_BASE_SESSION_DIR, _CURRENT_TURN
60
+ global _LOG_SESSION_DIR, _LOG_BASE_SESSION_DIR, _CURRENT_TURN, _CURRENT_ATTEMPT
60
61
 
61
62
  # Initialize base session dir once per session
62
63
  if _LOG_BASE_SESSION_DIR is None:
@@ -88,19 +89,62 @@ def get_log_session_dir(turn: Optional[int] = None) -> Path:
88
89
  _LOG_SESSION_DIR = None # Force recreation
89
90
 
90
91
  if _LOG_SESSION_DIR is None:
91
- # Create directory structure based on turn
92
+ # Build directory structure based on turn and attempt
92
93
  if _CURRENT_TURN and _CURRENT_TURN > 0:
93
94
  # Multi-turn conversation: organize by turn within session
94
- _LOG_SESSION_DIR = _LOG_BASE_SESSION_DIR / f"turn_{_CURRENT_TURN}"
95
+ base_dir = _LOG_BASE_SESSION_DIR / f"turn_{_CURRENT_TURN}"
95
96
  else:
96
97
  # First execution or single execution: use base session dir
97
- _LOG_SESSION_DIR = _LOG_BASE_SESSION_DIR
98
+ base_dir = _LOG_BASE_SESSION_DIR
99
+
100
+ # Add attempt subdirectory if attempt is set
101
+ if _CURRENT_ATTEMPT and _CURRENT_ATTEMPT > 0:
102
+ _LOG_SESSION_DIR = base_dir / f"attempt_{_CURRENT_ATTEMPT}"
103
+ else:
104
+ _LOG_SESSION_DIR = base_dir
98
105
 
99
106
  _LOG_SESSION_DIR.mkdir(parents=True, exist_ok=True)
100
107
 
101
108
  return _LOG_SESSION_DIR
102
109
 
103
110
 
111
+ def set_log_attempt(attempt: int) -> None:
112
+ """Set the current attempt number for restart tracking.
113
+
114
+ This forces the log directory to be recreated with the new attempt subdirectory.
115
+
116
+ Args:
117
+ attempt: Attempt number (1-indexed)
118
+ """
119
+ global _LOG_SESSION_DIR, _CURRENT_ATTEMPT
120
+ _CURRENT_ATTEMPT = attempt
121
+ _LOG_SESSION_DIR = None # Force recreation with new attempt subdirectory
122
+
123
+
124
+ def get_log_session_dir_base() -> Path:
125
+ """Get the base log session directory without attempt subdirectory.
126
+
127
+ This is useful for copying final results to the root level after all attempts complete.
128
+
129
+ Returns:
130
+ Path to the base log directory (turn level or session root, without attempt)
131
+ """
132
+ global _LOG_BASE_SESSION_DIR, _CURRENT_TURN
133
+
134
+ # Ensure base session dir is initialized
135
+ if _LOG_BASE_SESSION_DIR is None:
136
+ # Initialize by calling get_log_session_dir
137
+ get_log_session_dir()
138
+
139
+ # Build base directory based on turn (without attempt)
140
+ if _CURRENT_TURN and _CURRENT_TURN > 0:
141
+ # Multi-turn conversation: return turn directory
142
+ return _LOG_BASE_SESSION_DIR / f"turn_{_CURRENT_TURN}"
143
+ else:
144
+ # Single turn: return base session dir
145
+ return _LOG_BASE_SESSION_DIR
146
+
147
+
104
148
  def save_execution_metadata(
105
149
  query: str,
106
150
  config_path: Optional[str] = None,
@@ -378,6 +378,93 @@ Present the best possible coordinated answer by combining the strengths from all
378
378
  else:
379
379
  return presentation_instructions
380
380
 
381
+ def post_evaluation_system_message(
382
+ self,
383
+ original_system_message: Optional[str] = None,
384
+ ) -> str:
385
+ """System message for post-evaluation phase after final presentation.
386
+
387
+ The winning agent evaluates its own answer with a fresh perspective and decides
388
+ whether to submit or restart with specific improvement instructions.
389
+
390
+ Args:
391
+ original_system_message: The agent's original system message to preserve
392
+ """
393
+ if "post_evaluation_system_message" in self._template_overrides:
394
+ return str(self._template_overrides["post_evaluation_system_message"])
395
+
396
+ evaluation_instructions = """## Post-Presentation Evaluation
397
+
398
+ You have just presented a final answer to the user. Now you must evaluate whether your answer fully addresses the original task.
399
+
400
+ **Your Task:**
401
+ Review the final answer that was presented and determine if it completely and accurately addresses the original task requirements.
402
+
403
+ **Available Tools:**
404
+ You have access to the same filesystem and MCP tools that were available during presentation. Use these tools to:
405
+ - Verify that claimed files actually exist in the workspace
406
+ - Check file contents to confirm they match what was described
407
+ - Validate any technical claims or implementations
408
+
409
+ **Decision:**
410
+ You must call ONE of these tools:
411
+
412
+ 1. **submit(confirmed=True)** - Use this when:
413
+ - The answer fully addresses ALL parts of the original task
414
+ - All claims in the answer are accurate and verified
415
+ - The work is complete and ready for the user
416
+
417
+ 2. **restart_orchestration(reason, instructions)** - Use this when:
418
+ - The answer is incomplete (missing required elements)
419
+ - The answer contains errors or inaccuracies
420
+ - Important aspects of the task were not addressed
421
+
422
+ Provide:
423
+ - **reason**: Clear explanation of what's wrong (e.g., "The task required descriptions of two Beatles, but only John Lennon was described")
424
+ - **instructions**: Detailed, actionable guidance for the next attempt (e.g.,
425
+ "Provide two descriptions (John Lennon AND Paul McCartney). Each should include:
426
+ birth year, role in band, notable songs, impact on music. Use 4-6 sentences per person.")
427
+
428
+ **Important Notes:**
429
+ - Be honest and thorough in your evaluation
430
+ - You are evaluating your own work with a fresh perspective
431
+ - If you find problems, restarting with clear instructions will lead to a better result
432
+ - The restart process gives you another opportunity to get it right
433
+ """
434
+
435
+ # Combine with original system message if provided
436
+ if original_system_message:
437
+ return f"""{original_system_message}
438
+
439
+ {evaluation_instructions}"""
440
+ else:
441
+ return evaluation_instructions
442
+
443
+ def format_restart_context(self, reason: str, instructions: str) -> str:
444
+ """Format restart context for subsequent orchestration attempts.
445
+
446
+ This context is added to agent messages (like multi-turn context) on restart attempts.
447
+
448
+ Args:
449
+ reason: Why the previous attempt was insufficient
450
+ instructions: Detailed guidance for improvement
451
+ """
452
+ if "format_restart_context" in self._template_overrides:
453
+ override = self._template_overrides["format_restart_context"]
454
+ if callable(override):
455
+ return override(reason, instructions)
456
+ return str(override).format(reason=reason, instructions=instructions)
457
+
458
+ return f"""<PREVIOUS ATTEMPT FEEDBACK>
459
+ The previous orchestration attempt was restarted because:
460
+ {reason}
461
+
462
+ **Instructions for this attempt:**
463
+ {instructions}
464
+
465
+ Please address these specific issues in your coordination and final answer.
466
+ <END OF PREVIOUS ATTEMPT FEEDBACK>"""
467
+
381
468
  # =============================================================================
382
469
  # COMPLETE MESSAGE BUILDERS
383
470
  # =============================================================================
@@ -509,10 +596,36 @@ Based on the coordination process above, present your final answer:"""
509
596
  messages.append({"role": "user", "content": self.enforcement_message()})
510
597
  return messages
511
598
 
512
- def command_execution_system_message(self) -> str:
513
- """Generate concise command execution instructions when command line execution is enabled."""
599
+ def command_execution_system_message(
600
+ self,
601
+ docker_mode: bool = False,
602
+ enable_sudo: bool = False,
603
+ ) -> str:
604
+ """Generate concise command execution instructions when command line execution is enabled.
605
+
606
+ Args:
607
+ docker_mode: Whether commands execute in Docker containers
608
+ enable_sudo: Whether sudo is available in Docker containers
609
+ """
514
610
  parts = ["## Command Execution"]
515
611
  parts.append("You can run command line commands using the `execute_command` tool.\n")
612
+
613
+ if docker_mode:
614
+ parts.append("**IMPORTANT: Docker Execution Environment**")
615
+ parts.append("- You are running in a Linux Docker container (Debian-based)")
616
+ parts.append("- Base image: Python 3.11-slim with Node.js 20.x")
617
+ parts.append("- Pre-installed: git, curl, build-essential, pytest, requests, numpy, pandas")
618
+ parts.append("- Use `apt-get` for system packages (NOT brew, dnf, yum, etc.)")
619
+
620
+ if enable_sudo:
621
+ parts.append("- **Sudo is available**: You can install packages with `sudo apt-get install <package>`")
622
+ parts.append("- Example: `sudo apt-get update && sudo apt-get install -y ffmpeg`")
623
+ else:
624
+ parts.append("- Sudo is NOT available - use pip/npm for user-level packages only")
625
+ parts.append("- For system packages, ask the user to rebuild the Docker image with needed packages")
626
+
627
+ parts.append("")
628
+
516
629
  parts.append("If a `.venv` directory exists in your workspace, it will be automatically used.")
517
630
 
518
631
  return "\n".join(parts)
@@ -527,6 +640,8 @@ Based on the coordination process above, present your final answer:"""
527
640
  enable_image_generation: bool = False,
528
641
  agent_answers: Optional[Dict[str, str]] = None,
529
642
  enable_command_execution: bool = False,
643
+ docker_mode: bool = False,
644
+ enable_sudo: bool = False,
530
645
  ) -> str:
531
646
  """Generate filesystem access instructions for agents with filesystem support.
532
647
 
@@ -539,6 +654,8 @@ Based on the coordination process above, present your final answer:"""
539
654
  enable_image_generation: Whether image generation is enabled
540
655
  agent_answers: Dict of agent answers (keys are agent IDs) to show workspace structure
541
656
  enable_command_execution: Whether command line execution is enabled
657
+ docker_mode: Whether commands execute in Docker containers
658
+ enable_sudo: Whether sudo is available in Docker containers
542
659
  """
543
660
  if "filesystem_system_message" in self._template_overrides:
544
661
  return str(self._template_overrides["filesystem_system_message"])
@@ -704,7 +821,10 @@ Based on the coordination process above, present your final answer:"""
704
821
 
705
822
  # Add command execution instructions if enabled
706
823
  if enable_command_execution:
707
- command_exec_message = self.command_execution_system_message()
824
+ command_exec_message = self.command_execution_system_message(
825
+ docker_mode=docker_mode,
826
+ enable_sudo=enable_sudo,
827
+ )
708
828
  parts.append(f"\n{command_exec_message}")
709
829
 
710
830
  return "\n".join(parts)