massgen 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of massgen might be problematic. Click here for more details.

Files changed (82) hide show
  1. massgen/__init__.py +1 -1
  2. massgen/agent_config.py +33 -7
  3. massgen/api_params_handler/_api_params_handler_base.py +3 -0
  4. massgen/api_params_handler/_chat_completions_api_params_handler.py +4 -0
  5. massgen/api_params_handler/_claude_api_params_handler.py +4 -0
  6. massgen/api_params_handler/_gemini_api_params_handler.py +4 -0
  7. massgen/api_params_handler/_response_api_params_handler.py +4 -0
  8. massgen/backend/azure_openai.py +9 -1
  9. massgen/backend/base.py +4 -0
  10. massgen/backend/base_with_custom_tool_and_mcp.py +25 -5
  11. massgen/backend/claude_code.py +9 -1
  12. massgen/backend/docs/permissions_and_context_files.md +2 -2
  13. massgen/backend/gemini.py +35 -6
  14. massgen/backend/gemini_utils.py +30 -0
  15. massgen/backend/response.py +2 -0
  16. massgen/chat_agent.py +9 -3
  17. massgen/cli.py +291 -43
  18. massgen/config_builder.py +163 -18
  19. massgen/configs/README.md +69 -14
  20. massgen/configs/debug/restart_test_controlled.yaml +60 -0
  21. massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
  22. massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
  23. massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
  24. massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
  25. massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
  26. massgen/configs/tools/custom_tools/crawl4ai_example.yaml +55 -0
  27. massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml +61 -0
  28. massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml +29 -0
  29. massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml +51 -0
  30. massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml +33 -0
  31. massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml +55 -0
  32. massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml +33 -0
  33. massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml +47 -0
  34. massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml +29 -0
  35. massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
  36. massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
  37. massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
  38. massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
  39. massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
  40. massgen/docker/README.md +83 -0
  41. massgen/filesystem_manager/_code_execution_server.py +22 -7
  42. massgen/filesystem_manager/_docker_manager.py +21 -1
  43. massgen/filesystem_manager/_filesystem_manager.py +9 -0
  44. massgen/filesystem_manager/_path_permission_manager.py +148 -0
  45. massgen/filesystem_manager/_workspace_tools_server.py +0 -997
  46. massgen/formatter/_gemini_formatter.py +73 -0
  47. massgen/frontend/coordination_ui.py +175 -257
  48. massgen/frontend/displays/base_display.py +29 -0
  49. massgen/frontend/displays/rich_terminal_display.py +155 -9
  50. massgen/frontend/displays/simple_display.py +21 -0
  51. massgen/frontend/displays/terminal_display.py +22 -2
  52. massgen/logger_config.py +50 -6
  53. massgen/message_templates.py +283 -15
  54. massgen/orchestrator.py +335 -38
  55. massgen/tests/test_binary_file_blocking.py +274 -0
  56. massgen/tests/test_case_studies.md +12 -12
  57. massgen/tests/test_code_execution.py +178 -0
  58. massgen/tests/test_multimodal_size_limits.py +407 -0
  59. massgen/tests/test_orchestration_restart.py +204 -0
  60. massgen/tool/__init__.py +4 -0
  61. massgen/tool/_manager.py +7 -2
  62. massgen/tool/_multimodal_tools/image_to_image_generation.py +293 -0
  63. massgen/tool/_multimodal_tools/text_to_file_generation.py +455 -0
  64. massgen/tool/_multimodal_tools/text_to_image_generation.py +222 -0
  65. massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py +226 -0
  66. massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py +217 -0
  67. massgen/tool/_multimodal_tools/text_to_video_generation.py +223 -0
  68. massgen/tool/_multimodal_tools/understand_audio.py +211 -0
  69. massgen/tool/_multimodal_tools/understand_file.py +555 -0
  70. massgen/tool/_multimodal_tools/understand_image.py +316 -0
  71. massgen/tool/_multimodal_tools/understand_video.py +340 -0
  72. massgen/tool/_web_tools/crawl4ai_tool.py +718 -0
  73. massgen/tool/docs/multimodal_tools.md +1368 -0
  74. massgen/tool/workflow_toolkits/__init__.py +26 -0
  75. massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
  76. massgen/utils.py +1 -0
  77. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/METADATA +101 -69
  78. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/RECORD +82 -46
  79. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/WHEEL +0 -0
  80. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/entry_points.txt +0 -0
  81. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/licenses/LICENSE +0 -0
  82. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,407 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Tests for size and dimension limits in multimodal tools (image, video, audio).
5
+
6
+ This test suite generates fake media files to test:
7
+ - understand_image: 18MB file size + 768px × 2000px dimension limits
8
+ - understand_video: Frame dimension limits (768px × 2000px per frame)
9
+ - understand_audio: 25MB file size limit
10
+
11
+ All test files are created in temporary directories and cleaned up after tests.
12
+ """
13
+
14
+ import tempfile
15
+ from pathlib import Path
16
+
17
+ import pytest
18
+
19
+
20
+ class TestImageSizeLimits:
21
+ """Test suite for understand_image size and dimension limits."""
22
+
23
+ @pytest.fixture
24
+ def temp_dir(self):
25
+ """Create a temporary directory for test files."""
26
+ with tempfile.TemporaryDirectory() as tmpdir:
27
+ yield Path(tmpdir)
28
+
29
+ def _create_test_image(self, width: int, height: int, output_path: Path, format: str = "PNG"):
30
+ """
31
+ Create a test image with specified dimensions.
32
+
33
+ Args:
34
+ width: Image width in pixels
35
+ height: Image height in pixels
36
+ output_path: Path to save the image
37
+ format: Image format (PNG or JPEG)
38
+ """
39
+ import numpy as np
40
+ from PIL import Image
41
+
42
+ # Create a simple gradient image
43
+ img_array = np.zeros((height, width, 3), dtype=np.uint8)
44
+ for i in range(height):
45
+ img_array[i, :, 0] = int((i / height) * 255) # Red gradient
46
+ for j in range(width):
47
+ img_array[:, j, 1] = int((j / width) * 255) # Green gradient
48
+
49
+ img = Image.fromarray(img_array, "RGB")
50
+ img.save(output_path, format=format)
51
+
52
+ def _create_large_image(self, output_path: Path, target_size_mb: float = 20):
53
+ """
54
+ Create a large image file exceeding size limits.
55
+
56
+ Args:
57
+ output_path: Path to save the image
58
+ target_size_mb: Target size in megabytes
59
+ """
60
+ import numpy as np
61
+ from PIL import Image
62
+
63
+ # Calculate dimensions to achieve target file size
64
+ # PNG compression varies, so we'll create a large uncompressed image
65
+ # Rough estimate: width * height * 3 (RGB) should exceed target
66
+ pixels_needed = int((target_size_mb * 1024 * 1024) / 3)
67
+ side = int(pixels_needed**0.5)
68
+
69
+ # Create random noise image (doesn't compress well)
70
+ img_array = np.random.randint(0, 256, (side, side, 3), dtype=np.uint8)
71
+ img = Image.fromarray(img_array, "RGB")
72
+ img.save(output_path, format="PNG")
73
+
74
+ @pytest.mark.asyncio
75
+ async def test_image_within_limits(self, temp_dir):
76
+ """Test that images within size and dimension limits are processed without resizing."""
77
+ from massgen.tool._multimodal_tools.understand_image import understand_image
78
+
79
+ # Create a small image within limits (512x512)
80
+ img_path = temp_dir / "small_image.png"
81
+ self._create_test_image(512, 512, img_path, format="PNG")
82
+
83
+ # Use real OpenAI API
84
+ result = await understand_image(str(img_path), prompt="Describe this test image in one sentence.")
85
+
86
+ # Check that it succeeded
87
+ assert result.output_blocks is not None
88
+ assert len(result.output_blocks) > 0
89
+
90
+ # Parse result JSON
91
+ import json
92
+
93
+ result_data = json.loads(result.output_blocks[0].data)
94
+
95
+ print("\n" + "=" * 80)
96
+ print("TEST: Image Within Limits (512x512)")
97
+ print("=" * 80)
98
+ print(json.dumps(result_data, indent=2))
99
+ print("=" * 80 + "\n")
100
+
101
+ assert result_data["success"] is True
102
+
103
+ @pytest.mark.asyncio
104
+ async def test_image_dimension_limit(self, temp_dir):
105
+ """Test that images exceeding dimension limits are resized."""
106
+ from massgen.tool._multimodal_tools.understand_image import understand_image
107
+
108
+ # Create an image exceeding dimension limits (3000x4000)
109
+ img_path = temp_dir / "large_dimensions.jpg"
110
+ self._create_test_image(3000, 4000, img_path, format="JPEG")
111
+
112
+ # Check original size
113
+ from PIL import Image
114
+
115
+ with Image.open(img_path) as img:
116
+ original_width, original_height = img.size
117
+ assert original_width == 3000
118
+ assert original_height == 4000
119
+
120
+ # Use real OpenAI API - should resize internally and succeed
121
+ result = await understand_image(str(img_path), prompt="Describe this test image in one sentence.")
122
+
123
+ # Check that it succeeded (image was resized internally)
124
+ assert result.output_blocks is not None
125
+ import json
126
+
127
+ result_data = json.loads(result.output_blocks[0].data)
128
+
129
+ print("\n" + "=" * 80)
130
+ print("TEST: Image Exceeding Dimension Limits (3000x4000)")
131
+ print("=" * 80)
132
+ print(json.dumps(result_data, indent=2))
133
+ print("=" * 80 + "\n")
134
+
135
+ assert result_data["success"] is True
136
+
137
+ def test_image_dimension_calculation(self, temp_dir):
138
+ """Test dimension limit calculation logic directly."""
139
+ # Test that we correctly identify when resizing is needed
140
+ max_short_side = 768
141
+ max_long_side = 2000
142
+
143
+ test_cases = [
144
+ # (width, height, needs_resize)
145
+ (512, 512, False), # Within limits
146
+ (768, 2000, False), # Exactly at limits
147
+ (2000, 768, False), # Rotated, exactly at limits
148
+ (800, 1000, True), # Short side exceeds
149
+ (1000, 2500, True), # Long side exceeds
150
+ (3000, 4000, True), # Both exceed
151
+ ]
152
+
153
+ for width, height, expected_resize in test_cases:
154
+ short_side = min(width, height)
155
+ long_side = max(width, height)
156
+ needs_resize = short_side > max_short_side or long_side > max_long_side
157
+
158
+ assert needs_resize == expected_resize, f"Dimension check failed for {width}x{height}: expected resize={expected_resize}, got {needs_resize}"
159
+
160
+
161
+ class TestVideoFrameLimits:
162
+ """Test suite for understand_video frame dimension limits."""
163
+
164
+ @pytest.fixture
165
+ def temp_dir(self):
166
+ """Create a temporary directory for test files."""
167
+ with tempfile.TemporaryDirectory() as tmpdir:
168
+ yield Path(tmpdir)
169
+
170
+ def _create_test_video(self, width: int, height: int, output_path: Path, num_frames: int = 30):
171
+ """
172
+ Create a test video with specified dimensions.
173
+
174
+ Args:
175
+ width: Video width in pixels
176
+ height: Video height in pixels
177
+ output_path: Path to save the video
178
+ num_frames: Number of frames to generate
179
+ """
180
+ import cv2
181
+ import numpy as np
182
+
183
+ # Define the codec and create VideoWriter object
184
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
185
+ fps = 10.0
186
+ video = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
187
+
188
+ try:
189
+ for i in range(num_frames):
190
+ # Create a frame with gradient (changes over time)
191
+ frame = np.zeros((height, width, 3), dtype=np.uint8)
192
+ intensity = int((i / num_frames) * 255)
193
+ frame[:, :, 0] = intensity # Blue channel varies by frame
194
+ frame[: height // 2, :, 1] = 128 # Green in top half
195
+ frame[height // 2 :, :, 2] = 128 # Red in bottom half
196
+
197
+ video.write(frame)
198
+ finally:
199
+ video.release()
200
+
201
+ @pytest.mark.asyncio
202
+ async def test_video_with_large_frames(self, temp_dir):
203
+ """Test that video with large frame dimensions processes correctly (frames are resized)."""
204
+ try:
205
+ import cv2 # noqa: F401
206
+ except ImportError:
207
+ pytest.skip("opencv-python not installed")
208
+
209
+ from massgen.tool._multimodal_tools.understand_video import understand_video
210
+
211
+ # Create a video with large dimensions (3000x4000)
212
+ video_path = temp_dir / "large_video.mp4"
213
+ self._create_test_video(3000, 4000, video_path, num_frames=10)
214
+
215
+ # Use real OpenAI API - should resize frames internally and succeed
216
+ result = await understand_video(
217
+ str(video_path),
218
+ num_frames=3,
219
+ prompt="Describe what you see in this test video in one sentence.",
220
+ )
221
+
222
+ # Check that it succeeded
223
+ assert result.output_blocks is not None
224
+ import json
225
+
226
+ result_data = json.loads(result.output_blocks[0].data)
227
+
228
+ print("\n" + "=" * 80)
229
+ print("TEST: Video With Large Frames (3000x4000) - Frames Should Be Resized")
230
+ print("=" * 80)
231
+ print(json.dumps(result_data, indent=2))
232
+ print("=" * 80 + "\n")
233
+
234
+ assert result_data["success"] is True
235
+
236
+ @pytest.mark.asyncio
237
+ async def test_video_with_small_frames(self, temp_dir):
238
+ """Test that video with small frame dimensions processes without resizing."""
239
+ try:
240
+ import cv2 # noqa: F401
241
+ except ImportError:
242
+ pytest.skip("opencv-python not installed")
243
+
244
+ from massgen.tool._multimodal_tools.understand_video import understand_video
245
+
246
+ # Create a video with small dimensions (640x480)
247
+ video_path = temp_dir / "small_video.mp4"
248
+ self._create_test_video(640, 480, video_path, num_frames=10)
249
+
250
+ # Use real OpenAI API
251
+ result = await understand_video(
252
+ str(video_path),
253
+ num_frames=3,
254
+ prompt="Describe what you see in this test video in one sentence.",
255
+ )
256
+
257
+ # Check that it succeeded
258
+ assert result.output_blocks is not None
259
+ import json
260
+
261
+ result_data = json.loads(result.output_blocks[0].data)
262
+
263
+ print("\n" + "=" * 80)
264
+ print("TEST: Video With Small Frames (640x480) - No Resize Needed")
265
+ print("=" * 80)
266
+ print(json.dumps(result_data, indent=2))
267
+ print("=" * 80 + "\n")
268
+
269
+ assert result_data["success"] is True
270
+
271
+
272
+ class TestAudioSizeLimits:
273
+ """Test suite for understand_audio file size limits."""
274
+
275
+ @pytest.fixture
276
+ def temp_dir(self):
277
+ """Create a temporary directory for test files."""
278
+ with tempfile.TemporaryDirectory() as tmpdir:
279
+ yield Path(tmpdir)
280
+
281
+ def _create_test_audio(self, output_path: Path, duration_seconds: float = 1.0, sample_rate: int = 44100):
282
+ """
283
+ Create a test audio file (WAV format).
284
+
285
+ Args:
286
+ output_path: Path to save the audio file
287
+ duration_seconds: Duration in seconds
288
+ sample_rate: Sample rate in Hz
289
+ """
290
+ import wave
291
+
292
+ import numpy as np
293
+
294
+ # Generate a simple sine wave
295
+ frequency = 440.0 # A4 note
296
+ num_samples = int(sample_rate * duration_seconds)
297
+ t = np.linspace(0, duration_seconds, num_samples, False)
298
+ audio_data = np.sin(2 * np.pi * frequency * t)
299
+
300
+ # Convert to 16-bit PCM
301
+ audio_data = (audio_data * 32767).astype(np.int16)
302
+
303
+ # Write WAV file
304
+ with wave.open(str(output_path), "w") as wav_file:
305
+ wav_file.setnchannels(1) # Mono
306
+ wav_file.setsampwidth(2) # 16-bit
307
+ wav_file.setframerate(sample_rate)
308
+ wav_file.writeframes(audio_data.tobytes())
309
+
310
+ def _create_large_audio(self, output_path: Path, target_size_mb: float = 30):
311
+ """
312
+ Create a large audio file exceeding size limits.
313
+
314
+ Args:
315
+ output_path: Path to save the audio file
316
+ target_size_mb: Target size in megabytes
317
+ """
318
+ # Calculate duration needed to achieve target size
319
+ # WAV: sample_rate * duration * 2 bytes (16-bit) * channels
320
+ sample_rate = 44100
321
+ bytes_per_second = sample_rate * 2 # 16-bit mono
322
+ duration_seconds = (target_size_mb * 1024 * 1024) / bytes_per_second
323
+
324
+ self._create_test_audio(output_path, duration_seconds=duration_seconds, sample_rate=sample_rate)
325
+
326
+ @pytest.mark.asyncio
327
+ async def test_audio_within_size_limit(self, temp_dir):
328
+ """Test that audio files within size limit are accepted."""
329
+ from massgen.tool._multimodal_tools.understand_audio import understand_audio
330
+
331
+ # Create a small audio file (~1 second, ~88KB)
332
+ audio_path = temp_dir / "small_audio.wav"
333
+ self._create_test_audio(audio_path, duration_seconds=1.0)
334
+
335
+ file_size = audio_path.stat().st_size
336
+ assert file_size < 25 * 1024 * 1024, "Test audio should be under 25MB"
337
+
338
+ # Use real OpenAI API
339
+ result = await understand_audio([str(audio_path)])
340
+
341
+ # Check that it succeeded
342
+ assert result.output_blocks is not None
343
+ import json
344
+
345
+ result_data = json.loads(result.output_blocks[0].data)
346
+
347
+ print("\n" + "=" * 80)
348
+ print(f"TEST: Audio Within Size Limit (~{file_size/1024/1024:.2f}MB)")
349
+ print("=" * 80)
350
+ print(json.dumps(result_data, indent=2))
351
+ print("=" * 80 + "\n")
352
+
353
+ assert result_data["success"] is True
354
+
355
+ @pytest.mark.asyncio
356
+ async def test_audio_exceeds_size_limit(self, temp_dir):
357
+ """Test that audio files exceeding 25MB limit are rejected."""
358
+ from massgen.tool._multimodal_tools.understand_audio import understand_audio
359
+
360
+ # Create a large audio file (~30MB)
361
+ audio_path = temp_dir / "large_audio.wav"
362
+ self._create_large_audio(audio_path, target_size_mb=30)
363
+
364
+ file_size = audio_path.stat().st_size
365
+ assert file_size > 25 * 1024 * 1024, f"Test audio should exceed 25MB, got {file_size / 1024 / 1024:.1f}MB"
366
+
367
+ # This should fail validation before calling OpenAI
368
+ result = await understand_audio([str(audio_path)])
369
+
370
+ # Check that it failed due to size limit
371
+ assert result.output_blocks is not None
372
+ import json
373
+
374
+ result_data = json.loads(result.output_blocks[0].data)
375
+
376
+ print("\n" + "=" * 80)
377
+ print(f"TEST: Audio Exceeds Size Limit ({file_size/1024/1024:.1f}MB > 25MB)")
378
+ print("=" * 80)
379
+ print(json.dumps(result_data, indent=2))
380
+ print("=" * 80 + "\n")
381
+
382
+ assert result_data["success"] is False
383
+ assert "too large" in result_data["error"].lower()
384
+ assert "25MB" in result_data["error"]
385
+
386
+ def test_audio_size_check(self, temp_dir):
387
+ """Test audio file size checking logic."""
388
+ # Create audio files of different sizes
389
+ test_cases = [
390
+ (1.0, True), # 1 second (~88KB) - should pass
391
+ (10.0, True), # 10 seconds (~880KB) - should pass
392
+ ]
393
+
394
+ for duration, should_pass in test_cases:
395
+ audio_path = temp_dir / f"audio_{duration}s.wav"
396
+ self._create_test_audio(audio_path, duration_seconds=duration)
397
+
398
+ file_size = audio_path.stat().st_size
399
+ max_size = 25 * 1024 * 1024
400
+
401
+ passes = file_size <= max_size
402
+
403
+ assert passes == should_pass, f"Size check failed for {duration}s audio ({file_size / 1024 / 1024:.1f}MB): " f"expected pass={should_pass}, got {passes}"
404
+
405
+
406
+ if __name__ == "__main__":
407
+ pytest.main([__file__, "-v"])
@@ -0,0 +1,204 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Unit tests for orchestration restart feature.
5
+ """
6
+
7
+ import os
8
+ import sys
9
+
10
+ import pytest
11
+
12
+ # Ensure project root is on sys.path
13
+ PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
14
+ if PROJECT_ROOT not in sys.path:
15
+ sys.path.insert(0, PROJECT_ROOT)
16
+
17
+
18
+ def test_coordination_config_restart_params():
19
+ """Test that CoordinationConfig has restart parameters."""
20
+ from massgen.agent_config import CoordinationConfig
21
+
22
+ config = CoordinationConfig()
23
+ assert hasattr(config, "max_orchestration_restarts")
24
+ assert config.max_orchestration_restarts == 0 # Default
25
+
26
+
27
+ def test_agent_config_debug_final_answer():
28
+ """Test that AgentConfig has debug_final_answer parameter."""
29
+ from massgen.agent_config import AgentConfig
30
+
31
+ config = AgentConfig()
32
+ assert hasattr(config, "debug_final_answer")
33
+ assert config.debug_final_answer is None # Default
34
+
35
+
36
+ def test_post_evaluation_toolkit_import():
37
+ """Test that PostEvaluationToolkit can be imported."""
38
+ from massgen.tool.workflow_toolkits import PostEvaluationToolkit
39
+
40
+ assert PostEvaluationToolkit is not None
41
+
42
+
43
+ def test_post_evaluation_tools_function():
44
+ """Test that get_post_evaluation_tools function exists."""
45
+ from massgen.tool import get_post_evaluation_tools
46
+
47
+ tools = get_post_evaluation_tools()
48
+ assert len(tools) == 2
49
+ assert tools[0]["function"]["name"] == "submit"
50
+ assert tools[1]["function"]["name"] == "restart_orchestration"
51
+
52
+
53
+ def test_submit_tool_schema():
54
+ """Test submit tool has correct schema."""
55
+ from massgen.tool import get_post_evaluation_tools
56
+
57
+ tools = get_post_evaluation_tools()
58
+ submit_tool = tools[0]
59
+
60
+ assert submit_tool["function"]["name"] == "submit"
61
+ assert "confirmed" in submit_tool["function"]["parameters"]["properties"]
62
+ assert submit_tool["function"]["parameters"]["properties"]["confirmed"]["enum"] == [True]
63
+
64
+
65
+ def test_restart_orchestration_tool_schema():
66
+ """Test restart_orchestration tool has correct schema."""
67
+ from massgen.tool import get_post_evaluation_tools
68
+
69
+ tools = get_post_evaluation_tools()
70
+ restart_tool = tools[1]
71
+
72
+ assert restart_tool["function"]["name"] == "restart_orchestration"
73
+ params = restart_tool["function"]["parameters"]["properties"]
74
+ assert "reason" in params
75
+ assert "instructions" in params
76
+ assert set(restart_tool["function"]["parameters"]["required"]) == {"reason", "instructions"}
77
+
78
+
79
+ def test_message_templates_post_evaluation():
80
+ """Test that MessageTemplates has post-evaluation methods."""
81
+ from massgen.message_templates import MessageTemplates
82
+
83
+ templates = MessageTemplates()
84
+ assert hasattr(templates, "post_evaluation_system_message")
85
+ assert hasattr(templates, "format_restart_context")
86
+
87
+ # Test method returns strings
88
+ post_eval_msg = templates.post_evaluation_system_message()
89
+ assert isinstance(post_eval_msg, str)
90
+ assert "Post-Presentation Evaluation" in post_eval_msg
91
+
92
+ restart_context = templates.format_restart_context("test reason", "test instructions")
93
+ assert isinstance(restart_context, str)
94
+ assert "PREVIOUS ATTEMPT FEEDBACK" in restart_context
95
+
96
+
97
+ def test_orchestrator_restart_state():
98
+ """Test that Orchestrator has restart state tracking."""
99
+ from massgen.agent_config import AgentConfig, CoordinationConfig
100
+ from massgen.orchestrator import Orchestrator
101
+
102
+ config = AgentConfig()
103
+ config.coordination_config = CoordinationConfig(max_orchestration_restarts=2)
104
+
105
+ orchestrator = Orchestrator(agents={}, config=config)
106
+
107
+ assert hasattr(orchestrator, "current_attempt")
108
+ assert hasattr(orchestrator, "max_attempts")
109
+ assert hasattr(orchestrator, "restart_pending")
110
+ assert hasattr(orchestrator, "restart_reason")
111
+ assert hasattr(orchestrator, "restart_instructions")
112
+
113
+ assert orchestrator.current_attempt == 0
114
+ assert orchestrator.max_attempts == 3 # 1 + 2 restarts
115
+ assert orchestrator.restart_pending is False
116
+
117
+
118
+ def test_orchestrator_post_evaluate_method():
119
+ """Test that Orchestrator has post_evaluate_answer method."""
120
+ import inspect
121
+
122
+ from massgen.orchestrator import Orchestrator
123
+
124
+ assert hasattr(Orchestrator, "post_evaluate_answer")
125
+ sig = inspect.signature(Orchestrator.post_evaluate_answer)
126
+ assert "selected_agent_id" in sig.parameters
127
+ assert "final_answer" in sig.parameters
128
+
129
+
130
+ def test_orchestrator_handle_restart_method():
131
+ """Test that Orchestrator has handle_restart method."""
132
+ from massgen.orchestrator import Orchestrator
133
+
134
+ assert hasattr(Orchestrator, "handle_restart")
135
+
136
+
137
+ def test_handle_restart_resets_state():
138
+ """Test that handle_restart resets orchestrator state."""
139
+ from massgen.agent_config import AgentConfig, CoordinationConfig
140
+ from massgen.orchestrator import Orchestrator
141
+
142
+ config = AgentConfig()
143
+ config.coordination_config = CoordinationConfig(max_orchestration_restarts=2)
144
+
145
+ # Initialize with empty agents dict (simpler for testing)
146
+ orchestrator = Orchestrator(agents={}, config=config)
147
+
148
+ # Simulate state after first attempt
149
+ orchestrator.current_attempt = 0
150
+ orchestrator.restart_reason = "test reason"
151
+ orchestrator.restart_instructions = "test instructions"
152
+ orchestrator.workflow_phase = "presenting"
153
+ orchestrator._selected_agent = "agent1"
154
+ orchestrator._final_presentation_content = "some content"
155
+
156
+ # Call handle_restart
157
+ orchestrator.handle_restart()
158
+
159
+ # Verify state reset
160
+ assert orchestrator.current_attempt == 1
161
+ assert orchestrator.workflow_phase == "idle"
162
+ assert orchestrator._selected_agent is None
163
+ assert orchestrator._final_presentation_content is None
164
+ # Restart reason/instructions should be preserved for next attempt
165
+ assert orchestrator.restart_reason == "test reason"
166
+ assert orchestrator.restart_instructions == "test instructions"
167
+
168
+
169
+ def test_base_display_restart_methods():
170
+ """Test that BaseDisplay has restart abstract methods."""
171
+ import inspect
172
+
173
+ from massgen.frontend.displays.base_display import BaseDisplay
174
+
175
+ abstract_methods = {name for name, method in inspect.getmembers(BaseDisplay, predicate=inspect.isfunction) if getattr(method, "__isabstractmethod__", False)}
176
+
177
+ assert "show_post_evaluation_content" in abstract_methods
178
+ assert "show_restart_banner" in abstract_methods
179
+ assert "show_restart_context_panel" in abstract_methods
180
+
181
+
182
+ @pytest.mark.asyncio
183
+ async def test_post_evaluation_tools_api_formats():
184
+ """Test post-evaluation tools work with different API formats."""
185
+ from massgen.tool import get_post_evaluation_tools
186
+
187
+ # Test chat_completions format (default)
188
+ tools_chat = get_post_evaluation_tools(api_format="chat_completions")
189
+ assert len(tools_chat) == 2
190
+ assert tools_chat[0]["type"] == "function"
191
+
192
+ # Test claude format
193
+ tools_claude = get_post_evaluation_tools(api_format="claude")
194
+ assert len(tools_claude) == 2
195
+ assert "input_schema" in tools_claude[0]
196
+
197
+ # Test response format
198
+ tools_response = get_post_evaluation_tools(api_format="response")
199
+ assert len(tools_response) == 2
200
+ assert tools_response[0]["type"] == "function"
201
+
202
+
203
+ if __name__ == "__main__":
204
+ pytest.main([__file__, "-v"])
massgen/tool/__init__.py CHANGED
@@ -8,8 +8,10 @@ from ._result import ExecutionResult
8
8
  from .workflow_toolkits import (
9
9
  BaseToolkit,
10
10
  NewAnswerToolkit,
11
+ PostEvaluationToolkit,
11
12
  ToolType,
12
13
  VoteToolkit,
14
+ get_post_evaluation_tools,
13
15
  get_workflow_tools,
14
16
  )
15
17
 
@@ -35,5 +37,7 @@ __all__ = [
35
37
  "ToolType",
36
38
  "NewAnswerToolkit",
37
39
  "VoteToolkit",
40
+ "PostEvaluationToolkit",
38
41
  "get_workflow_tools",
42
+ "get_post_evaluation_tools",
39
43
  ]
massgen/tool/_manager.py CHANGED
@@ -312,9 +312,14 @@ class ToolManager:
312
312
  return
313
313
 
314
314
  tool_entry = self.registered_tools[tool_name]
315
+
316
+ # Merge parameters: model input first, then preset params override
317
+ # This ensures preset_params (like agent_cwd) always take precedence
318
+ # and won't be overridden by null values from model
319
+ model_input = tool_request.get("input", {}) or {}
315
320
  exec_kwargs = {
316
- **tool_entry.preset_params,
317
- **(tool_request.get("input", {}) or {}),
321
+ **model_input,
322
+ **tool_entry.preset_params, # preset_params override model input
318
323
  }
319
324
 
320
325
  # Prepare post-processor if exists