massgen 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of massgen might be problematic. Click here for more details.

Files changed (82) hide show
  1. massgen/__init__.py +1 -1
  2. massgen/agent_config.py +33 -7
  3. massgen/api_params_handler/_api_params_handler_base.py +3 -0
  4. massgen/api_params_handler/_chat_completions_api_params_handler.py +4 -0
  5. massgen/api_params_handler/_claude_api_params_handler.py +4 -0
  6. massgen/api_params_handler/_gemini_api_params_handler.py +4 -0
  7. massgen/api_params_handler/_response_api_params_handler.py +4 -0
  8. massgen/backend/azure_openai.py +9 -1
  9. massgen/backend/base.py +4 -0
  10. massgen/backend/base_with_custom_tool_and_mcp.py +25 -5
  11. massgen/backend/claude_code.py +9 -1
  12. massgen/backend/docs/permissions_and_context_files.md +2 -2
  13. massgen/backend/gemini.py +35 -6
  14. massgen/backend/gemini_utils.py +30 -0
  15. massgen/backend/response.py +2 -0
  16. massgen/chat_agent.py +9 -3
  17. massgen/cli.py +291 -43
  18. massgen/config_builder.py +163 -18
  19. massgen/configs/README.md +69 -14
  20. massgen/configs/debug/restart_test_controlled.yaml +60 -0
  21. massgen/configs/debug/restart_test_controlled_filesystem.yaml +73 -0
  22. massgen/configs/tools/code-execution/docker_with_sudo.yaml +35 -0
  23. massgen/configs/tools/custom_tools/computer_use_browser_example.yaml +56 -0
  24. massgen/configs/tools/custom_tools/computer_use_docker_example.yaml +65 -0
  25. massgen/configs/tools/custom_tools/computer_use_example.yaml +50 -0
  26. massgen/configs/tools/custom_tools/crawl4ai_example.yaml +55 -0
  27. massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_multi.yaml +61 -0
  28. massgen/configs/tools/custom_tools/multimodal_tools/text_to_file_generation_single.yaml +29 -0
  29. massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_multi.yaml +51 -0
  30. massgen/configs/tools/custom_tools/multimodal_tools/text_to_image_generation_single.yaml +33 -0
  31. massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_multi.yaml +55 -0
  32. massgen/configs/tools/custom_tools/multimodal_tools/text_to_speech_generation_single.yaml +33 -0
  33. massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_multi.yaml +47 -0
  34. massgen/configs/tools/custom_tools/multimodal_tools/text_to_video_generation_single.yaml +29 -0
  35. massgen/configs/tools/custom_tools/multimodal_tools/understand_audio.yaml +33 -0
  36. massgen/configs/tools/custom_tools/multimodal_tools/understand_file.yaml +34 -0
  37. massgen/configs/tools/custom_tools/multimodal_tools/understand_image.yaml +33 -0
  38. massgen/configs/tools/custom_tools/multimodal_tools/understand_video.yaml +34 -0
  39. massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml +59 -0
  40. massgen/docker/README.md +83 -0
  41. massgen/filesystem_manager/_code_execution_server.py +22 -7
  42. massgen/filesystem_manager/_docker_manager.py +21 -1
  43. massgen/filesystem_manager/_filesystem_manager.py +9 -0
  44. massgen/filesystem_manager/_path_permission_manager.py +148 -0
  45. massgen/filesystem_manager/_workspace_tools_server.py +0 -997
  46. massgen/formatter/_gemini_formatter.py +73 -0
  47. massgen/frontend/coordination_ui.py +175 -257
  48. massgen/frontend/displays/base_display.py +29 -0
  49. massgen/frontend/displays/rich_terminal_display.py +155 -9
  50. massgen/frontend/displays/simple_display.py +21 -0
  51. massgen/frontend/displays/terminal_display.py +22 -2
  52. massgen/logger_config.py +50 -6
  53. massgen/message_templates.py +283 -15
  54. massgen/orchestrator.py +335 -38
  55. massgen/tests/test_binary_file_blocking.py +274 -0
  56. massgen/tests/test_case_studies.md +12 -12
  57. massgen/tests/test_code_execution.py +178 -0
  58. massgen/tests/test_multimodal_size_limits.py +407 -0
  59. massgen/tests/test_orchestration_restart.py +204 -0
  60. massgen/tool/__init__.py +4 -0
  61. massgen/tool/_manager.py +7 -2
  62. massgen/tool/_multimodal_tools/image_to_image_generation.py +293 -0
  63. massgen/tool/_multimodal_tools/text_to_file_generation.py +455 -0
  64. massgen/tool/_multimodal_tools/text_to_image_generation.py +222 -0
  65. massgen/tool/_multimodal_tools/text_to_speech_continue_generation.py +226 -0
  66. massgen/tool/_multimodal_tools/text_to_speech_transcription_generation.py +217 -0
  67. massgen/tool/_multimodal_tools/text_to_video_generation.py +223 -0
  68. massgen/tool/_multimodal_tools/understand_audio.py +211 -0
  69. massgen/tool/_multimodal_tools/understand_file.py +555 -0
  70. massgen/tool/_multimodal_tools/understand_image.py +316 -0
  71. massgen/tool/_multimodal_tools/understand_video.py +340 -0
  72. massgen/tool/_web_tools/crawl4ai_tool.py +718 -0
  73. massgen/tool/docs/multimodal_tools.md +1368 -0
  74. massgen/tool/workflow_toolkits/__init__.py +26 -0
  75. massgen/tool/workflow_toolkits/post_evaluation.py +216 -0
  76. massgen/utils.py +1 -0
  77. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/METADATA +101 -69
  78. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/RECORD +82 -46
  79. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/WHEEL +0 -0
  80. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/entry_points.txt +0 -0
  81. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/licenses/LICENSE +0 -0
  82. {massgen-0.1.2.dist-info → massgen-0.1.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,59 @@
1
+ # MassGen Configuration: YouTube Video Analysis with Multimodal Understanding
2
+ #
3
+ # Use Case: Download and analyze YouTube videos from MassGen case studies
4
+ #
5
+ # This demonstrates MassGen's self-evolution capabilities by having agents:
6
+ # 1. Read local case study documentation to discover video URLs
7
+ # 2. Download YouTube videos using yt-dlp via command-line execution
8
+ # 3. Analyze video content using the understand_video multimodal tool
9
+ # 4. Extract insights that could inform future feature development
10
+ #
11
+ # Run with:
12
+ # uv run massgen --config massgen/configs/tools/custom_tools/multimodal_tools/youtube_video_analysis.yaml "Download recent MassGen case study videos listed in the case study md files, analyze them, find out how to improve them and automate their creation."
13
+
14
+ agents:
15
+ - id: "agent_a"
16
+ backend:
17
+ type: "openai"
18
+ model: "gpt-5-mini"
19
+ text:
20
+ verbosity: "medium"
21
+ reasoning:
22
+ effort: "medium"
23
+ summary: "auto"
24
+ custom_tools:
25
+ - name: ["understand_video"]
26
+ category: "multimodal"
27
+ path: "massgen/tool/_multimodal_tools/understand_video.py"
28
+ function: ["understand_video"]
29
+ enable_mcp_command_line: true
30
+ command_line_execution_mode: docker
31
+ command_line_docker_enable_sudo: true
32
+ command_line_docker_network_mode: "bridge"
33
+ cwd: "workspace1"
34
+
35
+ - id: "agent_b"
36
+ backend:
37
+ type: "claude_code"
38
+ model: "claude-sonnet-4-5-20250929"
39
+ custom_tools:
40
+ - name: ["understand_video"]
41
+ category: "multimodal"
42
+ path: "massgen/tool/_multimodal_tools/understand_video.py"
43
+ function: ["understand_video"]
44
+ enable_mcp_command_line: true
45
+ command_line_execution_mode: docker
46
+ command_line_docker_enable_sudo: true
47
+ command_line_docker_network_mode: "bridge"
48
+ cwd: "workspace2"
49
+
50
+ orchestrator:
51
+ snapshot_storage: "snapshots"
52
+ agent_temporary_workspace: "temp_workspaces"
53
+ context_paths:
54
+ - path: "docs/source/examples/case_studies"
55
+ permission: "read"
56
+
57
+ ui:
58
+ display_type: "rich_terminal"
59
+ logging_enabled: true
massgen/docker/README.md CHANGED
@@ -115,6 +115,7 @@ agents:
115
115
  | `command_line_docker_memory_limit` | None | Memory limit (e.g., `"2g"`, `"512m"`) |
116
116
  | `command_line_docker_cpu_limit` | None | CPU cores limit (e.g., `2.0`) |
117
117
  | `command_line_docker_network_mode` | `"none"` | `"none"`, `"bridge"`, or `"host"` |
118
+ | `command_line_docker_enable_sudo` | `false` | Enable sudo in containers (isolated from host) |
118
119
 
119
120
  ## How It Works
120
121
 
@@ -204,6 +205,88 @@ docker build -t my-custom-runtime:latest -f Dockerfile.custom .
204
205
  command_line_docker_image: "my-custom-runtime:latest"
205
206
  ```
206
207
 
208
+ ### Sudo Variant (Runtime Package Installation)
209
+
210
+ The sudo variant allows agents to install system packages at runtime inside their Docker container.
211
+
212
+ **IMPORTANT: Build the image before first use:**
213
+ ```bash
214
+ bash massgen/docker/build.sh --sudo
215
+ ```
216
+
217
+ This builds `massgen/mcp-runtime-sudo:latest` with sudo access locally. (This image is not available on Docker Hub - you must build it yourself.)
218
+
219
+ **Enable in config:**
220
+ ```yaml
221
+ agent:
222
+ backend:
223
+ cwd: "workspace"
224
+ enable_mcp_command_line: true
225
+ command_line_execution_mode: "docker"
226
+ command_line_docker_enable_sudo: true # Automatically uses sudo image
227
+ ```
228
+
229
+ **What agents can do with sudo:**
230
+ ```bash
231
+ # Install system packages at runtime
232
+ sudo apt-get update && sudo apt-get install -y ffmpeg
233
+
234
+ # Install additional Python packages
235
+ sudo pip install tensorflow
236
+
237
+ # Modify system configuration inside the container
238
+ sudo apt-get install -y postgresql-client
239
+ ```
240
+
241
+ **Security model - Is this safe?**
242
+
243
+ **YES, it's still safe** because Docker container isolation is the primary security boundary:
244
+
245
+ ✅ **Container is fully isolated from your host:**
246
+ - Sudo inside container ≠ sudo on your computer
247
+ - Agent can only access mounted volumes (workspace, context paths)
248
+ - Cannot access your host filesystem outside mounts
249
+ - Cannot affect host processes or system configuration
250
+ - Docker namespaces/cgroups provide strong isolation
251
+
252
+ ✅ **What sudo can and cannot do:**
253
+ - ✅ Can: Install packages inside the container (apt, pip, npm)
254
+ - ✅ Can: Modify container system configuration
255
+ - ✅ Can: Read/write mounted workspace (same as without sudo)
256
+ - ❌ Cannot: Access your host filesystem outside mounts
257
+ - ❌ Cannot: Affect your host system
258
+ - ❌ Cannot: Break out of the container (unless Docker vulnerability exists)
259
+
260
+ ℹ️ **Note:**
261
+ - Container escape vulnerabilities (CVEs in Docker/kernel) are extremely rare and quickly patched
262
+ - Standard Docker security practices apply
263
+
264
+ ❌ **Don't do this (makes it unsafe):**
265
+ - Enabling privileged mode (not exposed in MassGen, would need code changes)
266
+ - Mounting sensitive host paths like `/`, `/etc`, `/usr`
267
+ - Disabling security features like AppArmor/SELinux
268
+
269
+ **When to use sudo variant vs custom images:**
270
+
271
+ | Approach | Use When | Performance | Security |
272
+ |----------|----------|-------------|----------|
273
+ | **Sudo variant** | Need flexibility, unknown packages upfront, prototyping | Slower (runtime install) | Good (container isolated) |
274
+ | **Custom image** | Know packages needed, production use, performance matters | Fast (pre-installed) | Best (minimal attack surface) |
275
+
276
+ **Custom image example (recommended for production):**
277
+ ```dockerfile
278
+ FROM massgen/mcp-runtime:latest
279
+ USER root
280
+ RUN apt-get update && apt-get install -y ffmpeg postgresql-client
281
+ USER massgen
282
+ ```
283
+
284
+ Build: `docker build -t my-runtime:latest .`
285
+
286
+ Use: `command_line_docker_image: "my-runtime:latest"`
287
+
288
+ **Bottom line:** The sudo variant is safe for most use cases because Docker container isolation is strong. Custom images are preferred for production because they're faster and have a smaller attack surface, but sudo is fine for development and prototyping.
289
+
207
290
  ## Security Features
208
291
 
209
292
  ### Filesystem Isolation
@@ -62,7 +62,7 @@ def _validate_path_access(path: Path, allowed_paths: List[Path]) -> None:
62
62
  raise ValueError(f"Path not in allowed directories: {path}")
63
63
 
64
64
 
65
- def _sanitize_command(command: str) -> None:
65
+ def _sanitize_command(command: str, enable_sudo: bool = False) -> None:
66
66
  """
67
67
  Sanitize the command to prevent dangerous operations.
68
68
 
@@ -71,6 +71,7 @@ def _sanitize_command(command: str) -> None:
71
71
 
72
72
  Args:
73
73
  command: The command to sanitize
74
+ enable_sudo: Whether sudo is enabled (in Docker mode with sudo variant)
74
75
 
75
76
  Raises:
76
77
  ValueError: If dangerous command is detected
@@ -82,13 +83,20 @@ def _sanitize_command(command: str) -> None:
82
83
  (r"\bdd\b", "Use of 'dd' command is not allowed"),
83
84
  (r">\s*/dev/sd[a-z][1-9]?", "Overwriting disk blocks directly is not allowed"),
84
85
  (r":\(\)\{\s*:\|\:&\s*\};:", "Fork bombs are not allowed"),
85
- # Additional safety patterns
86
- (r"\bsudo\b", "Use of 'sudo' is not allowed"),
87
- (r"\bsu\b", "Use of 'su' is not allowed"),
88
- (r"\bchown\b", "Use of 'chown' is not allowed"),
89
- (r"\bchmod\b", "Use of 'chmod' is not allowed"),
90
86
  ]
91
87
 
88
+ # Only check these patterns if sudo is NOT enabled
89
+ # When sudo is enabled (Docker mode with sudo variant), these are safe
90
+ if not enable_sudo:
91
+ dangerous_patterns.extend(
92
+ [
93
+ (r"\bsudo\b", "Use of 'sudo' is not allowed"),
94
+ (r"\bsu\b", "Use of 'su' is not allowed"),
95
+ (r"\bchown\b", "Use of 'chown' is not allowed"),
96
+ (r"\bchmod\b", "Use of 'chmod' is not allowed"),
97
+ ],
98
+ )
99
+
92
100
  for pattern, message in dangerous_patterns:
93
101
  if re.search(pattern, command):
94
102
  raise ValueError(f"Potentially dangerous command detected: {message}")
@@ -202,6 +210,12 @@ async def create_server() -> fastmcp.FastMCP:
202
210
  default=None,
203
211
  help="Agent ID (required for Docker mode to identify container)",
204
212
  )
213
+ parser.add_argument(
214
+ "--enable-sudo",
215
+ action="store_true",
216
+ default=False,
217
+ help="Enable sudo in Docker containers (disables sudo command sanitization checks)",
218
+ )
205
219
  args = parser.parse_args()
206
220
 
207
221
  # Create the FastMCP server
@@ -215,6 +229,7 @@ async def create_server() -> fastmcp.FastMCP:
215
229
  mcp.blocked_commands = args.blocked_commands # Blacklist patterns
216
230
  mcp.execution_mode = args.execution_mode
217
231
  mcp.agent_id = args.agent_id
232
+ mcp.enable_sudo = args.enable_sudo
218
233
 
219
234
  # Initialize Docker client if Docker mode
220
235
  mcp.docker_client = None
@@ -294,7 +309,7 @@ async def create_server() -> fastmcp.FastMCP:
294
309
  try:
295
310
  # Basic command sanitization (dangerous patterns)
296
311
  try:
297
- _sanitize_command(command)
312
+ _sanitize_command(command, enable_sudo=mcp.enable_sudo)
298
313
  except ValueError as e:
299
314
  return {
300
315
  "success": False,
@@ -45,6 +45,7 @@ class DockerManager:
45
45
  network_mode: str = "none",
46
46
  memory_limit: Optional[str] = None,
47
47
  cpu_limit: Optional[float] = None,
48
+ enable_sudo: bool = False,
48
49
  ):
49
50
  """
50
51
  Initialize Docker manager.
@@ -54,6 +55,7 @@ class DockerManager:
54
55
  network_mode: Network mode (none/bridge/host)
55
56
  memory_limit: Memory limit (e.g., "2g", "512m")
56
57
  cpu_limit: CPU limit (e.g., 2.0 for 2 CPUs)
58
+ enable_sudo: Enable sudo access in containers (isolated from host system)
57
59
 
58
60
  Raises:
59
61
  RuntimeError: If Docker is not available or cannot connect
@@ -61,7 +63,20 @@ class DockerManager:
61
63
  if not DOCKER_AVAILABLE:
62
64
  raise RuntimeError("Docker Python library not available. Install with: pip install docker")
63
65
 
64
- self.image = image
66
+ # If sudo is enabled and user is using default image, switch to sudo variant
67
+ self.enable_sudo = enable_sudo
68
+ if enable_sudo and image == "massgen/mcp-runtime:latest":
69
+ self.image = "massgen/mcp-runtime-sudo:latest"
70
+ logger.info(
71
+ "ℹ️ [Docker] Sudo access enabled in container (isolated from host) - using 'massgen/mcp-runtime-sudo:latest' image.",
72
+ )
73
+ elif enable_sudo:
74
+ logger.info(
75
+ "ℹ️ [Docker] Sudo access enabled in container (isolated from host) with custom image.",
76
+ )
77
+ else:
78
+ self.image = image
79
+
65
80
  self.network_mode = network_mode
66
81
  self.memory_limit = memory_limit
67
82
  self.cpu_limit = cpu_limit
@@ -103,6 +118,11 @@ class DockerManager:
103
118
  self.client.images.pull(self.image)
104
119
  logger.info(f"✅ [Docker] Successfully pulled image '{self.image}'")
105
120
  except DockerException as e:
121
+ # Special handling for sudo image - it's built locally, not pulled
122
+ if "mcp-runtime-sudo" in self.image:
123
+ raise RuntimeError(
124
+ f"Failed to pull Docker image '{self.image}': {e}\n" f"The sudo image must be built locally. Run:\n" f" bash massgen/docker/build.sh --sudo",
125
+ )
106
126
  raise RuntimeError(f"Failed to pull Docker image '{self.image}': {e}")
107
127
 
108
128
  def create_container(
@@ -55,7 +55,9 @@ class FilesystemManager:
55
55
  command_line_docker_memory_limit: Optional[str] = None,
56
56
  command_line_docker_cpu_limit: Optional[float] = None,
57
57
  command_line_docker_network_mode: str = "none",
58
+ command_line_docker_enable_sudo: bool = False,
58
59
  enable_audio_generation: bool = False,
60
+ enable_file_generation: bool = False,
59
61
  ):
60
62
  """
61
63
  Initialize FilesystemManager.
@@ -75,6 +77,7 @@ class FilesystemManager:
75
77
  command_line_docker_memory_limit: Memory limit for Docker containers (e.g., "2g")
76
78
  command_line_docker_cpu_limit: CPU limit for Docker containers (e.g., 2.0 for 2 CPUs)
77
79
  command_line_docker_network_mode: Network mode for Docker containers (none/bridge/host)
80
+ command_line_docker_enable_sudo: Enable sudo access in Docker containers (isolated from host system)
78
81
  """
79
82
  self.agent_id = None # Will be set by orchestrator via setup_orchestration_paths
80
83
  self.enable_image_generation = enable_image_generation
@@ -86,6 +89,7 @@ class FilesystemManager:
86
89
  self.command_line_docker_memory_limit = command_line_docker_memory_limit
87
90
  self.command_line_docker_cpu_limit = command_line_docker_cpu_limit
88
91
  self.command_line_docker_network_mode = command_line_docker_network_mode
92
+ self.command_line_docker_enable_sudo = command_line_docker_enable_sudo
89
93
 
90
94
  # Initialize Docker manager if Docker mode enabled
91
95
  self.docker_manager = None
@@ -97,6 +101,7 @@ class FilesystemManager:
97
101
  network_mode=command_line_docker_network_mode,
98
102
  memory_limit=command_line_docker_memory_limit,
99
103
  cpu_limit=command_line_docker_cpu_limit,
104
+ enable_sudo=command_line_docker_enable_sudo,
100
105
  )
101
106
  self.enable_audio_generation = enable_audio_generation
102
107
 
@@ -360,6 +365,10 @@ class FilesystemManager:
360
365
  if self.command_line_execution_mode == "docker" and self.agent_id:
361
366
  config["args"].extend(["--agent-id", self.agent_id])
362
367
 
368
+ # Add sudo flag for Docker mode
369
+ if self.command_line_execution_mode == "docker" and self.command_line_docker_enable_sudo:
370
+ config["args"].append("--enable-sudo")
371
+
363
372
  # Add command filters if specified
364
373
  if self.command_line_allowed_commands:
365
374
  config["args"].extend(["--allowed-commands"] + self.command_line_allowed_commands)
@@ -90,6 +90,68 @@ class PathPermissionManager:
90
90
  "massgen_logs",
91
91
  ]
92
92
 
93
+ # Binary file extensions that should not be read by text-based tools
94
+ # These files should be handled by specialized tools (understand_image, understand_video, etc.)
95
+ BINARY_FILE_EXTENSIONS = {
96
+ # Images
97
+ ".jpg",
98
+ ".jpeg",
99
+ ".png",
100
+ ".gif",
101
+ ".bmp",
102
+ ".ico",
103
+ ".svg",
104
+ ".webp",
105
+ ".tiff",
106
+ ".tif",
107
+ # Videos
108
+ ".mp4",
109
+ ".avi",
110
+ ".mov",
111
+ ".mkv",
112
+ ".flv",
113
+ ".wmv",
114
+ ".webm",
115
+ ".m4v",
116
+ ".mpg",
117
+ ".mpeg",
118
+ # Audio
119
+ ".mp3",
120
+ ".wav",
121
+ ".ogg",
122
+ ".flac",
123
+ ".aac",
124
+ ".m4a",
125
+ ".wma",
126
+ # Archives
127
+ ".zip",
128
+ ".tar",
129
+ ".gz",
130
+ ".bz2",
131
+ ".7z",
132
+ ".rar",
133
+ ".xz",
134
+ # Executables and binaries
135
+ ".exe",
136
+ ".bin",
137
+ ".dll",
138
+ ".so",
139
+ ".dylib",
140
+ ".o",
141
+ ".a",
142
+ ".pyc",
143
+ ".class",
144
+ ".jar",
145
+ # Office documents (binary formats - use understand_file tool)
146
+ ".doc", # Old Word (not supported by understand_file)
147
+ ".xls", # Old Excel (not supported by understand_file)
148
+ ".ppt", # Old PowerPoint (not supported by understand_file)
149
+ ".pdf", # PDF (supported by understand_file with PyPDF2)
150
+ ".docx", # Word (supported by understand_file with python-docx)
151
+ ".xlsx", # Excel (supported by understand_file with openpyxl)
152
+ ".pptx", # PowerPoint (supported by understand_file with python-pptx)
153
+ }
154
+
93
155
  def __init__(
94
156
  self,
95
157
  context_write_access_enabled: bool = False,
@@ -440,6 +502,12 @@ class PathPermissionManager:
440
502
  - allowed: Whether the tool call should proceed
441
503
  - reason: Explanation if blocked (None if allowed)
442
504
  """
505
+ # Check if read tool is trying to read binary files (images, videos, etc.)
506
+ if self._is_text_read_tool(tool_name):
507
+ binary_check_result = self._validate_binary_file_access(tool_name, tool_args)
508
+ if not binary_check_result[0]:
509
+ return binary_check_result
510
+
443
511
  # Track read operations for read-before-delete enforcement
444
512
  if self._is_read_tool(tool_name):
445
513
  self._track_read_operation(tool_name, tool_args)
@@ -495,6 +563,33 @@ class PathPermissionManager:
495
563
 
496
564
  return False
497
565
 
566
+ def _is_text_read_tool(self, tool_name: str) -> bool:
567
+ """
568
+ Check if a tool is a text-based read operation that should not access binary files.
569
+
570
+ These tools are designed for reading text files and should be blocked from
571
+ reading binary files (images, videos, audio, etc.) to prevent context pollution.
572
+
573
+ Tools that read text file contents:
574
+ - Read: Claude Code read tool
575
+ - read_text_file: MCP filesystem read tool
576
+ - read_file: Generic read operations
577
+ """
578
+ # Use lowercase for case-insensitive matching
579
+ tool_lower = tool_name.lower()
580
+
581
+ # Check if tool name contains any text read operation keywords
582
+ text_read_keywords = [
583
+ "read_text_file", # MCP filesystem: read_text_file
584
+ "read_file", # Generic read operations
585
+ ]
586
+
587
+ # Also check for exact "Read" match (Claude Code tool)
588
+ if tool_name == "Read":
589
+ return True
590
+
591
+ return any(keyword in tool_lower for keyword in text_read_keywords)
592
+
498
593
  def _is_read_tool(self, tool_name: str) -> bool:
499
594
  """
500
595
  Check if a tool is a read operation that should be tracked.
@@ -518,6 +613,59 @@ class PathPermissionManager:
518
613
 
519
614
  return any(keyword in tool_lower for keyword in read_keywords)
520
615
 
616
+ def _validate_binary_file_access(self, tool_name: str, tool_args: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
617
+ """
618
+ Validate that text-based read tools are not trying to read binary files.
619
+
620
+ Binary files (images, videos, audio, etc.) should be handled by specialized tools
621
+ to prevent context pollution with binary data.
622
+
623
+ Args:
624
+ tool_name: Name of the tool being called
625
+ tool_args: Arguments passed to the tool
626
+
627
+ Returns:
628
+ Tuple of (allowed: bool, reason: Optional[str])
629
+ - allowed: False if trying to read binary file, True otherwise
630
+ - reason: Explanation if blocked (None if allowed)
631
+ """
632
+ # Extract file path from arguments
633
+ file_path = self._extract_file_path(tool_args)
634
+ if not file_path:
635
+ # Can't determine path - allow (tool may not access files)
636
+ return (True, None)
637
+
638
+ # Resolve path
639
+ try:
640
+ file_path_str = self._resolve_path_against_workspace(file_path)
641
+ path = Path(file_path_str)
642
+ except Exception:
643
+ # If path resolution fails, allow (will fail elsewhere if invalid)
644
+ return (True, None)
645
+
646
+ # Check file extension
647
+ file_extension = path.suffix.lower()
648
+ if file_extension in self.BINARY_FILE_EXTENSIONS:
649
+ # Determine appropriate tool suggestion based on file type
650
+ if file_extension in {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".ico", ".svg", ".webp", ".tiff", ".tif"}:
651
+ suggestion = "For images, use understand_image tool"
652
+ elif file_extension in {".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv", ".webm", ".m4v", ".mpg", ".mpeg"}:
653
+ suggestion = "For videos, use understand_video tool"
654
+ elif file_extension in {".mp3", ".wav", ".ogg", ".flac", ".aac", ".m4a", ".wma"}:
655
+ suggestion = "For audio files, use generate_text_with_input_audio tool"
656
+ elif file_extension in {".pdf"}:
657
+ suggestion = "For PDF files, use understand_file tool"
658
+ elif file_extension in {".docx", ".xlsx", ".pptx"}:
659
+ suggestion = "For Office documents, use understand_file tool"
660
+ else:
661
+ suggestion = "Use appropriate specialized tool for this file type"
662
+
663
+ reason = f"Cannot read binary file '{path.name}' with {tool_name}. {suggestion}."
664
+ logger.warning(f"[PathPermissionManager] Blocked {tool_name} from reading binary file: {path}")
665
+ return (False, reason)
666
+
667
+ return (True, None)
668
+
521
669
  def _is_delete_tool(self, tool_name: str) -> bool:
522
670
  """
523
671
  Check if a tool is a delete operation.