massgen 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of massgen might be problematic. Click here for more details.
- massgen/__init__.py +1 -1
- massgen/api_params_handler/_chat_completions_api_params_handler.py +4 -0
- massgen/api_params_handler/_claude_api_params_handler.py +4 -0
- massgen/api_params_handler/_gemini_api_params_handler.py +4 -0
- massgen/api_params_handler/_response_api_params_handler.py +4 -0
- massgen/backend/base_with_custom_tool_and_mcp.py +25 -5
- massgen/backend/docs/permissions_and_context_files.md +2 -2
- massgen/backend/response.py +2 -0
- massgen/configs/README.md +49 -40
- massgen/configs/tools/custom_tools/crawl4ai_example.yaml +55 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml +61 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml +29 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml +51 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml +55 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml +33 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml +47 -0
- massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml +29 -0
- massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +1 -1
- massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +1 -1
- massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +1 -1
- massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +1 -1
- massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +1 -1
- massgen/filesystem_manager/_filesystem_manager.py +1 -0
- massgen/filesystem_manager/_path_permission_manager.py +148 -0
- massgen/message_templates.py +160 -12
- massgen/orchestrator.py +16 -0
- massgen/tests/test_binary_file_blocking.py +274 -0
- massgen/tests/test_case_studies.md +12 -12
- massgen/tests/test_multimodal_size_limits.py +407 -0
- massgen/tool/_manager.py +7 -2
- massgen/tool/_multimodal_tools/image_to_image_generation.py +293 -0
- massgen/tool/_multimodal_tools/text_to_file_generation.py +455 -0
- massgen/tool/_multimodal_tools/text_to_image_generation.py +222 -0
- massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py +226 -0
- massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py +217 -0
- massgen/tool/_multimodal_tools/text_to_video_generation.py +223 -0
- massgen/tool/_multimodal_tools/understand_audio.py +19 -1
- massgen/tool/_multimodal_tools/understand_file.py +6 -1
- massgen/tool/_multimodal_tools/understand_image.py +112 -8
- massgen/tool/_multimodal_tools/understand_video.py +32 -5
- massgen/tool/_web_tools/crawl4ai_tool.py +718 -0
- massgen/tool/docs/multimodal_tools.md +589 -0
- {massgen-0.1.3.dist-info → massgen-0.1.4.dist-info}/METADATA +96 -69
- {massgen-0.1.3.dist-info → massgen-0.1.4.dist-info}/RECORD +49 -40
- massgen/configs/tools/custom_tools/crawl4ai_mcp_example.yaml +0 -67
- massgen/configs/tools/custom_tools/crawl4ai_multi_agent_example.yaml +0 -68
- massgen/configs/tools/custom_tools/multimodal_tools/playwright_with_img_understanding.yaml +0 -98
- massgen/configs/tools/custom_tools/multimodal_tools/understand_video_example.yaml +0 -54
- massgen/configs/tools/memory/README.md +0 -199
- massgen/configs/tools/memory/gpt5mini_gemini_context_window_management.yaml +0 -131
- massgen/configs/tools/memory/gpt5mini_gemini_no_persistent_memory.yaml +0 -133
- massgen/configs/tools/memory/test_context_window_management.py +0 -286
- massgen/configs/tools/multimodal/gpt5mini_gpt5nano_documentation_evolution.yaml +0 -97
- {massgen-0.1.3.dist-info → massgen-0.1.4.dist-info}/WHEEL +0 -0
- {massgen-0.1.3.dist-info → massgen-0.1.4.dist-info}/entry_points.txt +0 -0
- {massgen-0.1.3.dist-info → massgen-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {massgen-0.1.3.dist-info → massgen-0.1.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Tests for binary file blocking in PathPermissionManager.
|
|
5
|
+
|
|
6
|
+
These tests ensure that text-based read tools (Read, read_text_file, etc.)
|
|
7
|
+
are blocked from reading binary files (images, videos, audio, etc.) to prevent
|
|
8
|
+
context pollution with binary data.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
import pytest
|
|
14
|
+
|
|
15
|
+
from massgen.filesystem_manager._base import Permission
|
|
16
|
+
from massgen.filesystem_manager._path_permission_manager import PathPermissionManager
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@pytest.fixture
|
|
20
|
+
def permission_manager():
|
|
21
|
+
"""Create a PathPermissionManager instance for testing."""
|
|
22
|
+
manager = PathPermissionManager(
|
|
23
|
+
context_write_access_enabled=False,
|
|
24
|
+
enforce_read_before_delete=True,
|
|
25
|
+
)
|
|
26
|
+
# Add a workspace path for testing
|
|
27
|
+
test_workspace = Path("/tmp/test_workspace").resolve()
|
|
28
|
+
manager.add_path(test_workspace, Permission.WRITE, "workspace")
|
|
29
|
+
return manager
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class TestBinaryFileBlocking:
|
|
33
|
+
"""Test suite for binary file blocking functionality."""
|
|
34
|
+
|
|
35
|
+
@pytest.mark.asyncio
|
|
36
|
+
async def test_block_read_image_with_read_tool(self, permission_manager):
|
|
37
|
+
"""Test that Read tool is blocked from reading image files."""
|
|
38
|
+
tool_name = "Read"
|
|
39
|
+
tool_args = {"file_path": "/tmp/test_workspace/photo.jpg"}
|
|
40
|
+
|
|
41
|
+
allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
|
|
42
|
+
|
|
43
|
+
assert not allowed, "Read should be blocked from reading .jpg files"
|
|
44
|
+
assert reason is not None
|
|
45
|
+
assert "understand_image" in reason.lower()
|
|
46
|
+
assert "photo.jpg" in reason
|
|
47
|
+
|
|
48
|
+
@pytest.mark.asyncio
|
|
49
|
+
async def test_block_read_text_file_image(self, permission_manager):
|
|
50
|
+
"""Test that read_text_file (MCP) is blocked from reading image files."""
|
|
51
|
+
tool_name = "mcp__filesystem__read_text_file"
|
|
52
|
+
tool_args = {"path": "/tmp/test_workspace/diagram.png"}
|
|
53
|
+
|
|
54
|
+
allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
|
|
55
|
+
|
|
56
|
+
assert not allowed, "read_text_file should be blocked from reading .png files"
|
|
57
|
+
assert reason is not None
|
|
58
|
+
assert "understand_image" in reason.lower()
|
|
59
|
+
|
|
60
|
+
@pytest.mark.asyncio
|
|
61
|
+
async def test_block_read_video(self, permission_manager):
|
|
62
|
+
"""Test that Read tool is blocked from reading video files."""
|
|
63
|
+
tool_name = "Read"
|
|
64
|
+
tool_args = {"file_path": "/tmp/test_workspace/demo.mp4"}
|
|
65
|
+
|
|
66
|
+
allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
|
|
67
|
+
|
|
68
|
+
assert not allowed, "Read should be blocked from reading .mp4 files"
|
|
69
|
+
assert reason is not None
|
|
70
|
+
assert "understand_video" in reason.lower()
|
|
71
|
+
|
|
72
|
+
@pytest.mark.asyncio
|
|
73
|
+
async def test_block_read_audio(self, permission_manager):
|
|
74
|
+
"""Test that Read tool is blocked from reading audio files."""
|
|
75
|
+
tool_name = "Read"
|
|
76
|
+
tool_args = {"file_path": "/tmp/test_workspace/recording.mp3"}
|
|
77
|
+
|
|
78
|
+
allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
|
|
79
|
+
|
|
80
|
+
assert not allowed, "Read should be blocked from reading .mp3 files"
|
|
81
|
+
assert reason is not None
|
|
82
|
+
assert "audio" in reason.lower()
|
|
83
|
+
|
|
84
|
+
@pytest.mark.asyncio
|
|
85
|
+
async def test_allow_read_text_file(self, permission_manager):
|
|
86
|
+
"""Test that Read tool is allowed to read text files."""
|
|
87
|
+
tool_name = "Read"
|
|
88
|
+
tool_args = {"file_path": "/tmp/test_workspace/document.txt"}
|
|
89
|
+
|
|
90
|
+
allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
|
|
91
|
+
|
|
92
|
+
assert allowed, "Read should be allowed to read .txt files"
|
|
93
|
+
assert reason is None
|
|
94
|
+
|
|
95
|
+
@pytest.mark.asyncio
|
|
96
|
+
async def test_allow_read_code_file(self, permission_manager):
|
|
97
|
+
"""Test that Read tool is allowed to read code files."""
|
|
98
|
+
test_cases = [
|
|
99
|
+
"script.py",
|
|
100
|
+
"app.js",
|
|
101
|
+
"component.tsx",
|
|
102
|
+
"main.go",
|
|
103
|
+
"app.rs",
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
for filename in test_cases:
|
|
107
|
+
tool_name = "Read"
|
|
108
|
+
tool_args = {"file_path": f"/tmp/test_workspace/{filename}"}
|
|
109
|
+
|
|
110
|
+
allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
|
|
111
|
+
|
|
112
|
+
assert allowed, f"Read should be allowed to read {filename}"
|
|
113
|
+
assert reason is None
|
|
114
|
+
|
|
115
|
+
@pytest.mark.asyncio
|
|
116
|
+
async def test_block_all_image_formats(self, permission_manager):
|
|
117
|
+
"""Test that all image formats are blocked."""
|
|
118
|
+
image_extensions = [".jpg", ".jpeg", ".png", ".gif", ".bmp", ".ico", ".svg", ".webp", ".tiff", ".tif"]
|
|
119
|
+
|
|
120
|
+
for ext in image_extensions:
|
|
121
|
+
tool_name = "Read"
|
|
122
|
+
tool_args = {"file_path": f"/tmp/test_workspace/image{ext}"}
|
|
123
|
+
|
|
124
|
+
allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
|
|
125
|
+
|
|
126
|
+
assert not allowed, f"Read should be blocked from reading {ext} files"
|
|
127
|
+
assert reason is not None
|
|
128
|
+
|
|
129
|
+
@pytest.mark.asyncio
|
|
130
|
+
async def test_block_all_video_formats(self, permission_manager):
|
|
131
|
+
"""Test that all video formats are blocked."""
|
|
132
|
+
video_extensions = [".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv", ".webm", ".m4v", ".mpg", ".mpeg"]
|
|
133
|
+
|
|
134
|
+
for ext in video_extensions:
|
|
135
|
+
tool_name = "Read"
|
|
136
|
+
tool_args = {"file_path": f"/tmp/test_workspace/video{ext}"}
|
|
137
|
+
|
|
138
|
+
allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
|
|
139
|
+
|
|
140
|
+
assert not allowed, f"Read should be blocked from reading {ext} files"
|
|
141
|
+
assert reason is not None
|
|
142
|
+
|
|
143
|
+
@pytest.mark.asyncio
|
|
144
|
+
async def test_block_all_audio_formats(self, permission_manager):
|
|
145
|
+
"""Test that all audio formats are blocked."""
|
|
146
|
+
audio_extensions = [".mp3", ".wav", ".ogg", ".flac", ".aac", ".m4a", ".wma"]
|
|
147
|
+
|
|
148
|
+
for ext in audio_extensions:
|
|
149
|
+
tool_name = "Read"
|
|
150
|
+
tool_args = {"file_path": f"/tmp/test_workspace/audio{ext}"}
|
|
151
|
+
|
|
152
|
+
allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
|
|
153
|
+
|
|
154
|
+
assert not allowed, f"Read should be blocked from reading {ext} files"
|
|
155
|
+
assert reason is not None
|
|
156
|
+
|
|
157
|
+
@pytest.mark.asyncio
|
|
158
|
+
async def test_block_archive_formats(self, permission_manager):
|
|
159
|
+
"""Test that archive formats are blocked."""
|
|
160
|
+
archive_extensions = [".zip", ".tar", ".gz", ".bz2", ".7z", ".rar", ".xz"]
|
|
161
|
+
|
|
162
|
+
for ext in archive_extensions:
|
|
163
|
+
tool_name = "Read"
|
|
164
|
+
tool_args = {"file_path": f"/tmp/test_workspace/archive{ext}"}
|
|
165
|
+
|
|
166
|
+
allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
|
|
167
|
+
|
|
168
|
+
assert not allowed, f"Read should be blocked from reading {ext} files"
|
|
169
|
+
assert reason is not None
|
|
170
|
+
|
|
171
|
+
@pytest.mark.asyncio
|
|
172
|
+
async def test_block_executable_formats(self, permission_manager):
|
|
173
|
+
"""Test that executable/binary formats are blocked."""
|
|
174
|
+
binary_extensions = [".exe", ".bin", ".dll", ".so", ".dylib", ".o", ".a", ".pyc", ".class", ".jar"]
|
|
175
|
+
|
|
176
|
+
for ext in binary_extensions:
|
|
177
|
+
tool_name = "Read"
|
|
178
|
+
tool_args = {"file_path": f"/tmp/test_workspace/binary{ext}"}
|
|
179
|
+
|
|
180
|
+
allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
|
|
181
|
+
|
|
182
|
+
assert not allowed, f"Read should be blocked from reading {ext} files"
|
|
183
|
+
assert reason is not None
|
|
184
|
+
|
|
185
|
+
@pytest.mark.asyncio
|
|
186
|
+
async def test_block_old_office_formats(self, permission_manager):
|
|
187
|
+
"""Test that old Office formats are blocked (use understand_file instead)."""
|
|
188
|
+
old_office_extensions = [".doc", ".xls", ".ppt"]
|
|
189
|
+
|
|
190
|
+
for ext in old_office_extensions:
|
|
191
|
+
tool_name = "Read"
|
|
192
|
+
tool_args = {"file_path": f"/tmp/test_workspace/document{ext}"}
|
|
193
|
+
|
|
194
|
+
allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
|
|
195
|
+
|
|
196
|
+
assert not allowed, f"Read should be blocked from reading {ext} files"
|
|
197
|
+
assert reason is not None
|
|
198
|
+
|
|
199
|
+
@pytest.mark.asyncio
|
|
200
|
+
async def test_block_office_formats(self, permission_manager):
|
|
201
|
+
"""Test that Office document formats are blocked from Read (must use understand_file).
|
|
202
|
+
|
|
203
|
+
These are binary formats that should be handled by understand_file tool,
|
|
204
|
+
which can properly extract text from them using specialized libraries.
|
|
205
|
+
"""
|
|
206
|
+
office_extensions = [".pdf", ".docx", ".xlsx", ".pptx"]
|
|
207
|
+
|
|
208
|
+
for ext in office_extensions:
|
|
209
|
+
tool_name = "Read"
|
|
210
|
+
tool_args = {"file_path": f"/tmp/test_workspace/document{ext}"}
|
|
211
|
+
|
|
212
|
+
allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
|
|
213
|
+
|
|
214
|
+
assert not allowed, f"Read should be blocked from reading {ext} files (use understand_file)"
|
|
215
|
+
assert reason is not None
|
|
216
|
+
assert "understand_file" in reason.lower()
|
|
217
|
+
|
|
218
|
+
@pytest.mark.asyncio
|
|
219
|
+
async def test_case_insensitive_extension_check(self, permission_manager):
|
|
220
|
+
"""Test that extension checking is case-insensitive."""
|
|
221
|
+
test_cases = [
|
|
222
|
+
"/tmp/test_workspace/PHOTO.JPG",
|
|
223
|
+
"/tmp/test_workspace/Video.MP4",
|
|
224
|
+
"/tmp/test_workspace/Audio.MP3",
|
|
225
|
+
"/tmp/test_workspace/Image.PNG",
|
|
226
|
+
]
|
|
227
|
+
|
|
228
|
+
for file_path in test_cases:
|
|
229
|
+
tool_name = "Read"
|
|
230
|
+
tool_args = {"file_path": file_path}
|
|
231
|
+
|
|
232
|
+
allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
|
|
233
|
+
|
|
234
|
+
assert not allowed, f"Read should be blocked from reading {file_path} (case-insensitive)"
|
|
235
|
+
assert reason is not None
|
|
236
|
+
|
|
237
|
+
@pytest.mark.asyncio
|
|
238
|
+
async def test_non_text_read_tools_not_affected(self, permission_manager):
|
|
239
|
+
"""Test that non-text-read tools are not affected by binary file blocking."""
|
|
240
|
+
# Tools like Write, Edit, Delete should not be affected
|
|
241
|
+
test_cases = [
|
|
242
|
+
("Write", {"file_path": "/tmp/test_workspace/image.jpg"}),
|
|
243
|
+
("Edit", {"file_path": "/tmp/test_workspace/video.mp4"}),
|
|
244
|
+
("Grep", {"pattern": "test"}), # No file_path, should pass
|
|
245
|
+
]
|
|
246
|
+
|
|
247
|
+
for tool_name, tool_args in test_cases:
|
|
248
|
+
allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
|
|
249
|
+
|
|
250
|
+
# These tools have their own validation, but shouldn't be blocked by binary check
|
|
251
|
+
# (they may be blocked for other reasons like permissions)
|
|
252
|
+
# The key is that _validate_binary_file_access is not called for these
|
|
253
|
+
assert isinstance(allowed, bool) # Should complete without binary file error
|
|
254
|
+
|
|
255
|
+
@pytest.mark.asyncio
|
|
256
|
+
async def test_helpful_error_messages(self, permission_manager):
|
|
257
|
+
"""Test that error messages provide helpful suggestions for blocked binary files."""
|
|
258
|
+
test_cases = [
|
|
259
|
+
(".jpg", "understand_image"),
|
|
260
|
+
(".mp4", "understand_video"),
|
|
261
|
+
(".mp3", "audio"),
|
|
262
|
+
(".pdf", "understand_file"),
|
|
263
|
+
(".docx", "understand_file"),
|
|
264
|
+
]
|
|
265
|
+
|
|
266
|
+
for ext, expected_suggestion in test_cases:
|
|
267
|
+
tool_name = "Read"
|
|
268
|
+
tool_args = {"file_path": f"/tmp/test_workspace/file{ext}"}
|
|
269
|
+
|
|
270
|
+
allowed, reason = await permission_manager.pre_tool_use_hook(tool_name, tool_args)
|
|
271
|
+
|
|
272
|
+
assert not allowed, f"File with {ext} extension should be blocked"
|
|
273
|
+
assert reason is not None
|
|
274
|
+
assert expected_suggestion.lower() in reason.lower(), f"Error message should suggest {expected_suggestion} for {ext} files"
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# MassGen Case Study Test Commands
|
|
2
2
|
|
|
3
|
-
This document contains commands to test all the case studies from `docs/case_studies/` using the three agents default configuration.
|
|
3
|
+
This document contains commands to test all the case studies from `docs/source/examples/case_studies/` using the three agents default configuration.
|
|
4
4
|
|
|
5
5
|
## Quick Commands
|
|
6
6
|
|
|
7
7
|
All tests use the `three_agents_default.yaml` configuration with:
|
|
8
8
|
- **Gemini 2.5 Flash** (web search enabled)
|
|
9
|
-
- **GPT-4o-mini** (web search + code interpreter)
|
|
9
|
+
- **GPT-4o-mini** (web search + code interpreter)
|
|
10
10
|
- **Grok 3 mini** (web search with citations)
|
|
11
11
|
|
|
12
12
|
### 1. Collaborative Creative Writing
|
|
@@ -17,8 +17,8 @@ python massgen/cli.py --config massgen/configs/three_agents_default.yaml "Write
|
|
|
17
17
|
# From tests directory:
|
|
18
18
|
python ../cli.py --config ../configs/three_agents_default.yaml "Write a short story about a robot who discovers music."
|
|
19
19
|
```
|
|
20
|
-
**Original:** gpt-4o, gemini-2.5-flash, grok-3-mini
|
|
21
|
-
**Current:** gemini2.5flash, 4omini, grok3mini with builtin tools
|
|
20
|
+
**Original:** gpt-4o, gemini-2.5-flash, grok-3-mini
|
|
21
|
+
**Current:** gemini2.5flash, 4omini, grok3mini with builtin tools
|
|
22
22
|
|
|
23
23
|
### 2. AI News Synthesis
|
|
24
24
|
```bash
|
|
@@ -28,8 +28,8 @@ python massgen/cli.py --config massgen/configs/three_agents_default.yaml "find b
|
|
|
28
28
|
# From tests directory:
|
|
29
29
|
python ../cli.py --config ../configs/three_agents_default.yaml "find big AI news this week"
|
|
30
30
|
```
|
|
31
|
-
**Original:** gpt-4.1, gemini-2.5-flash, grok-3-mini
|
|
32
|
-
**Current:** gemini2.5flash, 4omini, grok3mini with web search
|
|
31
|
+
**Original:** gpt-4.1, gemini-2.5-flash, grok-3-mini
|
|
32
|
+
**Current:** gemini2.5flash, 4omini, grok3mini with web search
|
|
33
33
|
|
|
34
34
|
### 3. Grok HLE Cost Estimation
|
|
35
35
|
```bash
|
|
@@ -39,8 +39,8 @@ python massgen/cli.py --config massgen/configs/three_agents_default.yaml "How mu
|
|
|
39
39
|
# From tests directory:
|
|
40
40
|
python ../cli.py --config ../configs/three_agents_default.yaml "How much does it cost to run HLE benchmark with Grok-4"
|
|
41
41
|
```
|
|
42
|
-
**Original:** gpt-4o, gemini-2.5-flash, grok-3-mini
|
|
43
|
-
**Current:** gemini2.5flash, 4omini, grok3mini with web search
|
|
42
|
+
**Original:** gpt-4o, gemini-2.5-flash, grok-3-mini
|
|
43
|
+
**Current:** gemini2.5flash, 4omini, grok3mini with web search
|
|
44
44
|
|
|
45
45
|
### 4. IMO 2025 Winner
|
|
46
46
|
```bash
|
|
@@ -50,8 +50,8 @@ python massgen/cli.py --config massgen/configs/three_agents_default.yaml "Which
|
|
|
50
50
|
# From tests directory:
|
|
51
51
|
python ../cli.py --config ../configs/three_agents_default.yaml "Which AI won IMO 2025?"
|
|
52
52
|
```
|
|
53
|
-
**Original:** gemini-2.5-flash, gpt-4.1 (2 agents)
|
|
54
|
-
**Current:** gemini2.5flash, 4omini, grok3mini (3 agents with web search)
|
|
53
|
+
**Original:** gemini-2.5-flash, gpt-4.1 (2 agents)
|
|
54
|
+
**Current:** gemini2.5flash, 4omini, grok3mini (3 agents with web search)
|
|
55
55
|
|
|
56
56
|
### 5. Stockholm Travel Guide
|
|
57
57
|
```bash
|
|
@@ -61,7 +61,7 @@ python massgen/cli.py --config massgen/configs/three_agents_default.yaml "what's
|
|
|
61
61
|
# From tests directory:
|
|
62
62
|
python ../cli.py --config ../configs/three_agents_default.yaml "what's best to do in Stockholm in October 2025"
|
|
63
63
|
```
|
|
64
|
-
**Original:** gemini-2.5-flash, gpt-4o (2 agents)
|
|
64
|
+
**Original:** gemini-2.5-flash, gpt-4o (2 agents)
|
|
65
65
|
**Current:** gemini2.5flash, 4omini, grok3mini with web search for current info
|
|
66
66
|
|
|
67
67
|
## Configuration Details
|
|
@@ -70,7 +70,7 @@ The `three_agents_default.yaml` configuration provides:
|
|
|
70
70
|
|
|
71
71
|
### Agent Capabilities
|
|
72
72
|
- **gemini2.5flash**: Gemini 2.5 Flash with web search
|
|
73
|
-
- **4omini**: GPT-4o-mini with web search + code interpreter
|
|
73
|
+
- **4omini**: GPT-4o-mini with web search + code interpreter
|
|
74
74
|
- **grok3mini**: Grok 3 mini with web search and citations
|
|
75
75
|
|
|
76
76
|
### UI Features
|