massgen 0.0.3__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of massgen might be problematic. Click here for more details.
- massgen/__init__.py +142 -8
- massgen/adapters/__init__.py +29 -0
- massgen/adapters/ag2_adapter.py +483 -0
- massgen/adapters/base.py +183 -0
- massgen/adapters/tests/__init__.py +0 -0
- massgen/adapters/tests/test_ag2_adapter.py +439 -0
- massgen/adapters/tests/test_agent_adapter.py +128 -0
- massgen/adapters/utils/__init__.py +2 -0
- massgen/adapters/utils/ag2_utils.py +236 -0
- massgen/adapters/utils/tests/__init__.py +0 -0
- massgen/adapters/utils/tests/test_ag2_utils.py +138 -0
- massgen/agent_config.py +329 -55
- massgen/api_params_handler/__init__.py +10 -0
- massgen/api_params_handler/_api_params_handler_base.py +99 -0
- massgen/api_params_handler/_chat_completions_api_params_handler.py +176 -0
- massgen/api_params_handler/_claude_api_params_handler.py +113 -0
- massgen/api_params_handler/_response_api_params_handler.py +130 -0
- massgen/backend/__init__.py +39 -4
- massgen/backend/azure_openai.py +385 -0
- massgen/backend/base.py +341 -69
- massgen/backend/base_with_mcp.py +1102 -0
- massgen/backend/capabilities.py +386 -0
- massgen/backend/chat_completions.py +577 -130
- massgen/backend/claude.py +1033 -537
- massgen/backend/claude_code.py +1203 -0
- massgen/backend/cli_base.py +209 -0
- massgen/backend/docs/BACKEND_ARCHITECTURE.md +126 -0
- massgen/backend/{CLAUDE_API_RESEARCH.md → docs/CLAUDE_API_RESEARCH.md} +18 -18
- massgen/backend/{GEMINI_API_DOCUMENTATION.md → docs/GEMINI_API_DOCUMENTATION.md} +9 -9
- massgen/backend/docs/Gemini MCP Integration Analysis.md +1050 -0
- massgen/backend/docs/MCP_IMPLEMENTATION_CLAUDE_BACKEND.md +177 -0
- massgen/backend/docs/MCP_INTEGRATION_RESPONSE_BACKEND.md +352 -0
- massgen/backend/docs/OPENAI_GPT5_MODELS.md +211 -0
- massgen/backend/{OPENAI_RESPONSES_API_FORMAT.md → docs/OPENAI_RESPONSE_API_TOOL_CALLS.md} +3 -3
- massgen/backend/docs/OPENAI_response_streaming.md +20654 -0
- massgen/backend/docs/inference_backend.md +257 -0
- massgen/backend/docs/permissions_and_context_files.md +1085 -0
- massgen/backend/external.py +126 -0
- massgen/backend/gemini.py +1850 -241
- massgen/backend/grok.py +40 -156
- massgen/backend/inference.py +156 -0
- massgen/backend/lmstudio.py +171 -0
- massgen/backend/response.py +1095 -322
- massgen/chat_agent.py +131 -113
- massgen/cli.py +1560 -275
- massgen/config_builder.py +2396 -0
- massgen/configs/BACKEND_CONFIGURATION.md +458 -0
- massgen/configs/README.md +559 -216
- massgen/configs/ag2/ag2_case_study.yaml +27 -0
- massgen/configs/ag2/ag2_coder.yaml +34 -0
- massgen/configs/ag2/ag2_coder_case_study.yaml +36 -0
- massgen/configs/ag2/ag2_gemini.yaml +27 -0
- massgen/configs/ag2/ag2_groupchat.yaml +108 -0
- massgen/configs/ag2/ag2_groupchat_gpt.yaml +118 -0
- massgen/configs/ag2/ag2_single_agent.yaml +21 -0
- massgen/configs/basic/multi/fast_timeout_example.yaml +37 -0
- massgen/configs/basic/multi/gemini_4o_claude.yaml +31 -0
- massgen/configs/basic/multi/gemini_gpt5nano_claude.yaml +36 -0
- massgen/configs/{gemini_4o_claude.yaml → basic/multi/geminicode_4o_claude.yaml} +3 -3
- massgen/configs/basic/multi/geminicode_gpt5nano_claude.yaml +36 -0
- massgen/configs/basic/multi/glm_gemini_claude.yaml +25 -0
- massgen/configs/basic/multi/gpt4o_audio_generation.yaml +30 -0
- massgen/configs/basic/multi/gpt4o_image_generation.yaml +31 -0
- massgen/configs/basic/multi/gpt5nano_glm_qwen.yaml +26 -0
- massgen/configs/basic/multi/gpt5nano_image_understanding.yaml +26 -0
- massgen/configs/{three_agents_default.yaml → basic/multi/three_agents_default.yaml} +8 -4
- massgen/configs/basic/multi/three_agents_opensource.yaml +27 -0
- massgen/configs/basic/multi/three_agents_vllm.yaml +20 -0
- massgen/configs/basic/multi/two_agents_gemini.yaml +19 -0
- massgen/configs/{two_agents.yaml → basic/multi/two_agents_gpt5.yaml} +14 -6
- massgen/configs/basic/multi/two_agents_opensource_lmstudio.yaml +31 -0
- massgen/configs/basic/multi/two_qwen_vllm_sglang.yaml +28 -0
- massgen/configs/{single_agent.yaml → basic/single/single_agent.yaml} +1 -1
- massgen/configs/{single_flash2.5.yaml → basic/single/single_flash2.5.yaml} +1 -2
- massgen/configs/basic/single/single_gemini2.5pro.yaml +16 -0
- massgen/configs/basic/single/single_gpt4o_audio_generation.yaml +22 -0
- massgen/configs/basic/single/single_gpt4o_image_generation.yaml +22 -0
- massgen/configs/basic/single/single_gpt4o_video_generation.yaml +24 -0
- massgen/configs/basic/single/single_gpt5nano.yaml +20 -0
- massgen/configs/basic/single/single_gpt5nano_file_search.yaml +18 -0
- massgen/configs/basic/single/single_gpt5nano_image_understanding.yaml +17 -0
- massgen/configs/basic/single/single_gptoss120b.yaml +15 -0
- massgen/configs/basic/single/single_openrouter_audio_understanding.yaml +15 -0
- massgen/configs/basic/single/single_qwen_video_understanding.yaml +15 -0
- massgen/configs/debug/code_execution/command_filtering_blacklist.yaml +29 -0
- massgen/configs/debug/code_execution/command_filtering_whitelist.yaml +28 -0
- massgen/configs/debug/code_execution/docker_verification.yaml +29 -0
- massgen/configs/debug/skip_coordination_test.yaml +27 -0
- massgen/configs/debug/test_sdk_migration.yaml +17 -0
- massgen/configs/docs/DISCORD_MCP_SETUP.md +208 -0
- massgen/configs/docs/TWITTER_MCP_ENESCINAR_SETUP.md +82 -0
- massgen/configs/providers/azure/azure_openai_multi.yaml +21 -0
- massgen/configs/providers/azure/azure_openai_single.yaml +19 -0
- massgen/configs/providers/claude/claude.yaml +14 -0
- massgen/configs/providers/gemini/gemini_gpt5nano.yaml +28 -0
- massgen/configs/providers/local/lmstudio.yaml +11 -0
- massgen/configs/providers/openai/gpt5.yaml +46 -0
- massgen/configs/providers/openai/gpt5_nano.yaml +46 -0
- massgen/configs/providers/others/grok_single_agent.yaml +19 -0
- massgen/configs/providers/others/zai_coding_team.yaml +108 -0
- massgen/configs/providers/others/zai_glm45.yaml +12 -0
- massgen/configs/{creative_team.yaml → teams/creative/creative_team.yaml} +16 -6
- massgen/configs/{travel_planning.yaml → teams/creative/travel_planning.yaml} +16 -6
- massgen/configs/{news_analysis.yaml → teams/research/news_analysis.yaml} +16 -6
- massgen/configs/{research_team.yaml → teams/research/research_team.yaml} +15 -7
- massgen/configs/{technical_analysis.yaml → teams/research/technical_analysis.yaml} +16 -6
- massgen/configs/tools/code-execution/basic_command_execution.yaml +25 -0
- massgen/configs/tools/code-execution/code_execution_use_case_simple.yaml +41 -0
- massgen/configs/tools/code-execution/docker_claude_code.yaml +32 -0
- massgen/configs/tools/code-execution/docker_multi_agent.yaml +32 -0
- massgen/configs/tools/code-execution/docker_simple.yaml +29 -0
- massgen/configs/tools/code-execution/docker_with_resource_limits.yaml +32 -0
- massgen/configs/tools/code-execution/multi_agent_playwright_automation.yaml +57 -0
- massgen/configs/tools/filesystem/cc_gpt5_gemini_filesystem.yaml +34 -0
- massgen/configs/tools/filesystem/claude_code_context_sharing.yaml +68 -0
- massgen/configs/tools/filesystem/claude_code_flash2.5.yaml +43 -0
- massgen/configs/tools/filesystem/claude_code_flash2.5_gptoss.yaml +49 -0
- massgen/configs/tools/filesystem/claude_code_gpt5nano.yaml +31 -0
- massgen/configs/tools/filesystem/claude_code_single.yaml +40 -0
- massgen/configs/tools/filesystem/fs_permissions_test.yaml +87 -0
- massgen/configs/tools/filesystem/gemini_gemini_workspace_cleanup.yaml +54 -0
- massgen/configs/tools/filesystem/gemini_gpt5_filesystem_casestudy.yaml +30 -0
- massgen/configs/tools/filesystem/gemini_gpt5nano_file_context_path.yaml +43 -0
- massgen/configs/tools/filesystem/gemini_gpt5nano_protected_paths.yaml +45 -0
- massgen/configs/tools/filesystem/gpt5mini_cc_fs_context_path.yaml +31 -0
- massgen/configs/tools/filesystem/grok4_gpt5_gemini_filesystem.yaml +32 -0
- massgen/configs/tools/filesystem/multiturn/grok4_gpt5_claude_code_filesystem_multiturn.yaml +58 -0
- massgen/configs/tools/filesystem/multiturn/grok4_gpt5_gemini_filesystem_multiturn.yaml +58 -0
- massgen/configs/tools/filesystem/multiturn/two_claude_code_filesystem_multiturn.yaml +47 -0
- massgen/configs/tools/filesystem/multiturn/two_gemini_flash_filesystem_multiturn.yaml +48 -0
- massgen/configs/tools/mcp/claude_code_discord_mcp_example.yaml +27 -0
- massgen/configs/tools/mcp/claude_code_simple_mcp.yaml +35 -0
- massgen/configs/tools/mcp/claude_code_twitter_mcp_example.yaml +32 -0
- massgen/configs/tools/mcp/claude_mcp_example.yaml +24 -0
- massgen/configs/tools/mcp/claude_mcp_test.yaml +27 -0
- massgen/configs/tools/mcp/five_agents_travel_mcp_test.yaml +157 -0
- massgen/configs/tools/mcp/five_agents_weather_mcp_test.yaml +103 -0
- massgen/configs/tools/mcp/gemini_mcp_example.yaml +24 -0
- massgen/configs/tools/mcp/gemini_mcp_filesystem_test.yaml +23 -0
- massgen/configs/tools/mcp/gemini_mcp_filesystem_test_sharing.yaml +23 -0
- massgen/configs/tools/mcp/gemini_mcp_filesystem_test_single_agent.yaml +17 -0
- massgen/configs/tools/mcp/gemini_mcp_filesystem_test_with_claude_code.yaml +24 -0
- massgen/configs/tools/mcp/gemini_mcp_test.yaml +27 -0
- massgen/configs/tools/mcp/gemini_notion_mcp.yaml +52 -0
- massgen/configs/tools/mcp/gpt5_nano_mcp_example.yaml +24 -0
- massgen/configs/tools/mcp/gpt5_nano_mcp_test.yaml +27 -0
- massgen/configs/tools/mcp/gpt5mini_claude_code_discord_mcp_example.yaml +38 -0
- massgen/configs/tools/mcp/gpt_oss_mcp_example.yaml +25 -0
- massgen/configs/tools/mcp/gpt_oss_mcp_test.yaml +28 -0
- massgen/configs/tools/mcp/grok3_mini_mcp_example.yaml +24 -0
- massgen/configs/tools/mcp/grok3_mini_mcp_test.yaml +27 -0
- massgen/configs/tools/mcp/multimcp_gemini.yaml +111 -0
- massgen/configs/tools/mcp/qwen_api_mcp_example.yaml +25 -0
- massgen/configs/tools/mcp/qwen_api_mcp_test.yaml +28 -0
- massgen/configs/tools/mcp/qwen_local_mcp_example.yaml +24 -0
- massgen/configs/tools/mcp/qwen_local_mcp_test.yaml +27 -0
- massgen/configs/tools/planning/five_agents_discord_mcp_planning_mode.yaml +140 -0
- massgen/configs/tools/planning/five_agents_filesystem_mcp_planning_mode.yaml +151 -0
- massgen/configs/tools/planning/five_agents_notion_mcp_planning_mode.yaml +151 -0
- massgen/configs/tools/planning/five_agents_twitter_mcp_planning_mode.yaml +155 -0
- massgen/configs/tools/planning/gpt5_mini_case_study_mcp_planning_mode.yaml +73 -0
- massgen/configs/tools/web-search/claude_streamable_http_test.yaml +43 -0
- massgen/configs/tools/web-search/gemini_streamable_http_test.yaml +43 -0
- massgen/configs/tools/web-search/gpt5_mini_streamable_http_test.yaml +43 -0
- massgen/configs/tools/web-search/gpt_oss_streamable_http_test.yaml +44 -0
- massgen/configs/tools/web-search/grok3_mini_streamable_http_test.yaml +43 -0
- massgen/configs/tools/web-search/qwen_api_streamable_http_test.yaml +44 -0
- massgen/configs/tools/web-search/qwen_local_streamable_http_test.yaml +43 -0
- massgen/coordination_tracker.py +708 -0
- massgen/docker/README.md +462 -0
- massgen/filesystem_manager/__init__.py +21 -0
- massgen/filesystem_manager/_base.py +9 -0
- massgen/filesystem_manager/_code_execution_server.py +545 -0
- massgen/filesystem_manager/_docker_manager.py +477 -0
- massgen/filesystem_manager/_file_operation_tracker.py +248 -0
- massgen/filesystem_manager/_filesystem_manager.py +813 -0
- massgen/filesystem_manager/_path_permission_manager.py +1261 -0
- massgen/filesystem_manager/_workspace_tools_server.py +1815 -0
- massgen/formatter/__init__.py +10 -0
- massgen/formatter/_chat_completions_formatter.py +284 -0
- massgen/formatter/_claude_formatter.py +235 -0
- massgen/formatter/_formatter_base.py +156 -0
- massgen/formatter/_response_formatter.py +263 -0
- massgen/frontend/__init__.py +1 -2
- massgen/frontend/coordination_ui.py +471 -286
- massgen/frontend/displays/base_display.py +56 -11
- massgen/frontend/displays/create_coordination_table.py +1956 -0
- massgen/frontend/displays/rich_terminal_display.py +1259 -619
- massgen/frontend/displays/simple_display.py +9 -4
- massgen/frontend/displays/terminal_display.py +27 -68
- massgen/logger_config.py +681 -0
- massgen/mcp_tools/README.md +232 -0
- massgen/mcp_tools/__init__.py +105 -0
- massgen/mcp_tools/backend_utils.py +1035 -0
- massgen/mcp_tools/circuit_breaker.py +195 -0
- massgen/mcp_tools/client.py +894 -0
- massgen/mcp_tools/config_validator.py +138 -0
- massgen/mcp_tools/docs/circuit_breaker.md +646 -0
- massgen/mcp_tools/docs/client.md +950 -0
- massgen/mcp_tools/docs/config_validator.md +478 -0
- massgen/mcp_tools/docs/exceptions.md +1165 -0
- massgen/mcp_tools/docs/security.md +854 -0
- massgen/mcp_tools/exceptions.py +338 -0
- massgen/mcp_tools/hooks.py +212 -0
- massgen/mcp_tools/security.py +780 -0
- massgen/message_templates.py +342 -64
- massgen/orchestrator.py +1515 -241
- massgen/stream_chunk/__init__.py +35 -0
- massgen/stream_chunk/base.py +92 -0
- massgen/stream_chunk/multimodal.py +237 -0
- massgen/stream_chunk/text.py +162 -0
- massgen/tests/mcp_test_server.py +150 -0
- massgen/tests/multi_turn_conversation_design.md +0 -8
- massgen/tests/test_azure_openai_backend.py +156 -0
- massgen/tests/test_backend_capabilities.py +262 -0
- massgen/tests/test_backend_event_loop_all.py +179 -0
- massgen/tests/test_chat_completions_refactor.py +142 -0
- massgen/tests/test_claude_backend.py +15 -28
- massgen/tests/test_claude_code.py +268 -0
- massgen/tests/test_claude_code_context_sharing.py +233 -0
- massgen/tests/test_claude_code_orchestrator.py +175 -0
- massgen/tests/test_cli_backends.py +180 -0
- massgen/tests/test_code_execution.py +679 -0
- massgen/tests/test_external_agent_backend.py +134 -0
- massgen/tests/test_final_presentation_fallback.py +237 -0
- massgen/tests/test_gemini_planning_mode.py +351 -0
- massgen/tests/test_grok_backend.py +7 -10
- massgen/tests/test_http_mcp_server.py +42 -0
- massgen/tests/test_integration_simple.py +198 -0
- massgen/tests/test_mcp_blocking.py +125 -0
- massgen/tests/test_message_context_building.py +29 -47
- massgen/tests/test_orchestrator_final_presentation.py +48 -0
- massgen/tests/test_path_permission_manager.py +2087 -0
- massgen/tests/test_rich_terminal_display.py +14 -13
- massgen/tests/test_timeout.py +133 -0
- massgen/tests/test_v3_3agents.py +11 -12
- massgen/tests/test_v3_simple.py +8 -13
- massgen/tests/test_v3_three_agents.py +11 -18
- massgen/tests/test_v3_two_agents.py +8 -13
- massgen/token_manager/__init__.py +7 -0
- massgen/token_manager/token_manager.py +400 -0
- massgen/utils.py +52 -16
- massgen/v1/agent.py +45 -91
- massgen/v1/agents.py +18 -53
- massgen/v1/backends/gemini.py +50 -153
- massgen/v1/backends/grok.py +21 -54
- massgen/v1/backends/oai.py +39 -111
- massgen/v1/cli.py +36 -93
- massgen/v1/config.py +8 -12
- massgen/v1/logging.py +43 -127
- massgen/v1/main.py +18 -32
- massgen/v1/orchestrator.py +68 -209
- massgen/v1/streaming_display.py +62 -163
- massgen/v1/tools.py +8 -12
- massgen/v1/types.py +9 -23
- massgen/v1/utils.py +5 -23
- massgen-0.1.0.dist-info/METADATA +1245 -0
- massgen-0.1.0.dist-info/RECORD +273 -0
- massgen-0.1.0.dist-info/entry_points.txt +2 -0
- massgen/frontend/logging/__init__.py +0 -9
- massgen/frontend/logging/realtime_logger.py +0 -197
- massgen-0.0.3.dist-info/METADATA +0 -568
- massgen-0.0.3.dist-info/RECORD +0 -76
- massgen-0.0.3.dist-info/entry_points.txt +0 -2
- /massgen/backend/{Function calling openai responses.md → docs/Function calling openai responses.md} +0 -0
- {massgen-0.0.3.dist-info → massgen-0.1.0.dist-info}/WHEEL +0 -0
- {massgen-0.0.3.dist-info → massgen-0.1.0.dist-info}/licenses/LICENSE +0 -0
- {massgen-0.0.3.dist-info → massgen-0.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,780 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Security utilities for MCP command validation and sanitization. These functions provide comprehensive security checks and validation for MCP servers and tools.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import ipaddress
|
|
7
|
+
import os
|
|
8
|
+
import re
|
|
9
|
+
import shlex
|
|
10
|
+
import socket
|
|
11
|
+
import urllib.parse
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any, Dict, List, Optional, Set, Union
|
|
14
|
+
|
|
15
|
+
# Security validation constants
|
|
16
|
+
MAX_COMMAND_LENGTH = 1000
|
|
17
|
+
MAX_ARG_LENGTH = 500
|
|
18
|
+
MAX_ARGS_COUNT = 50
|
|
19
|
+
MAX_SERVER_NAME_LENGTH = 100
|
|
20
|
+
MAX_URL_LENGTH = 2048
|
|
21
|
+
MAX_ENV_KEY_LENGTH = 100
|
|
22
|
+
MAX_ENV_VALUE_LENGTH = 1000
|
|
23
|
+
MAX_HEADER_KEY_LENGTH = 100
|
|
24
|
+
MAX_HEADER_VALUE_LENGTH = 1000
|
|
25
|
+
MAX_TOOL_NAME_LENGTH = 100
|
|
26
|
+
MAX_SERVER_NAME_FOR_TOOL_LENGTH = 50
|
|
27
|
+
MAX_FINAL_TOOL_NAME_LENGTH = 200
|
|
28
|
+
MAX_CWD_LENGTH = 500
|
|
29
|
+
MAX_TIMEOUT_SECONDS = 300
|
|
30
|
+
MAX_DICT_KEYS = 100
|
|
31
|
+
MAX_LIST_ITEMS = 1000
|
|
32
|
+
MAX_STRING_LENGTH = 10000
|
|
33
|
+
MAX_TOOL_ARG_DEPTH = 5
|
|
34
|
+
MAX_TOOL_ARG_SIZE = 10000
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _normalize_security_level(level: str) -> str:
|
|
38
|
+
"""
|
|
39
|
+
Normalize security level to a valid value.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
level: Security level string
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Normalized security level, defaults to "strict" for unknown values
|
|
46
|
+
"""
|
|
47
|
+
return level if level in {"strict", "moderate", "permissive"} else "strict"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _validate_non_empty_string(value: Any, field_name: str) -> None:
|
|
51
|
+
"""Validate that value is a non-empty string."""
|
|
52
|
+
if not isinstance(value, str) or not value.strip():
|
|
53
|
+
raise ValueError(f"{field_name} must be a non-empty string")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _validate_string_length(value: str, max_length: int, field_name: str) -> None:
|
|
57
|
+
"""Validate string length."""
|
|
58
|
+
if len(value) > max_length:
|
|
59
|
+
raise ValueError(f"{field_name} too long: {len(value)} > {max_length} characters")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _get_set_from_config(config: dict, key: str, default: Optional[List] = None) -> Optional[Set[str]]:
|
|
63
|
+
"""Extract a set from config, handling empty lists and None."""
|
|
64
|
+
value = config.get(key, default or [])
|
|
65
|
+
if not value:
|
|
66
|
+
return None
|
|
67
|
+
return set(value) if isinstance(value, (list, set, tuple)) else None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _get_dict_from_config(config: dict, key: str, default: Optional[dict] = None) -> dict:
|
|
71
|
+
"""Safely extract dict from config with type checking."""
|
|
72
|
+
value = config.get(key, default or {})
|
|
73
|
+
return value if isinstance(value, dict) else {}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def substitute_env_variables(text: str) -> str:
|
|
77
|
+
"""Substitute environment variables in text using ${VAR_NAME} pattern.
|
|
78
|
+
|
|
79
|
+
Raises:
|
|
80
|
+
ValueError: If referenced environment variable is not set or empty
|
|
81
|
+
"""
|
|
82
|
+
if not isinstance(text, str) or "${" not in text:
|
|
83
|
+
return text
|
|
84
|
+
|
|
85
|
+
def replace_env_var(match):
|
|
86
|
+
var_name = match.group(1)
|
|
87
|
+
env_value = os.environ.get(var_name)
|
|
88
|
+
if env_value is None or env_value.strip() == "":
|
|
89
|
+
raise ValueError(f"Required environment variable '{var_name}' is not set")
|
|
90
|
+
return env_value
|
|
91
|
+
|
|
92
|
+
return re.sub(r"\$\{([A-Z_][A-Z0-9_]*)\}", replace_env_var, text)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _get_default_allowed_executables(level: str) -> Set[str]:
|
|
96
|
+
"""Get default allowed executables based on security level.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
level: Security level string
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
Set of allowed executable names (lowercase)
|
|
103
|
+
"""
|
|
104
|
+
base_strict: Set[str] = {
|
|
105
|
+
# Python interpreters
|
|
106
|
+
"python",
|
|
107
|
+
"python3",
|
|
108
|
+
"python3.8",
|
|
109
|
+
"python3.9",
|
|
110
|
+
"python3.10",
|
|
111
|
+
"python3.11",
|
|
112
|
+
"python3.12",
|
|
113
|
+
"python3.13",
|
|
114
|
+
"python3.14",
|
|
115
|
+
"py",
|
|
116
|
+
# Python package managers
|
|
117
|
+
"uv",
|
|
118
|
+
"uvx",
|
|
119
|
+
"pipx",
|
|
120
|
+
"pip",
|
|
121
|
+
"pip3",
|
|
122
|
+
# Node.js ecosystem
|
|
123
|
+
"node",
|
|
124
|
+
"npm",
|
|
125
|
+
"npx",
|
|
126
|
+
"yarn",
|
|
127
|
+
"pnpm",
|
|
128
|
+
"bun",
|
|
129
|
+
# Other runtimes
|
|
130
|
+
"deno",
|
|
131
|
+
"java",
|
|
132
|
+
"ruby",
|
|
133
|
+
"go",
|
|
134
|
+
"rust",
|
|
135
|
+
"cargo",
|
|
136
|
+
"fastmcp",
|
|
137
|
+
# System utilities (limited set)
|
|
138
|
+
"sh",
|
|
139
|
+
"bash",
|
|
140
|
+
"zsh",
|
|
141
|
+
"fish",
|
|
142
|
+
"powershell",
|
|
143
|
+
"pwsh",
|
|
144
|
+
"cmd",
|
|
145
|
+
}
|
|
146
|
+
if level == "strict":
|
|
147
|
+
return base_strict
|
|
148
|
+
if level == "moderate":
|
|
149
|
+
# Extend with common tooling used legitimately
|
|
150
|
+
return base_strict | {"git", "nodejs"}
|
|
151
|
+
if level == "permissive":
|
|
152
|
+
# Still curated; not unbounded
|
|
153
|
+
return base_strict | {"git", "curl", "wget", "nodejs"}
|
|
154
|
+
# Unknown levels fall back to strict
|
|
155
|
+
return base_strict
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def prepare_command(
|
|
159
|
+
command: str,
|
|
160
|
+
max_length: int = MAX_COMMAND_LENGTH,
|
|
161
|
+
*,
|
|
162
|
+
security_level: str = "strict",
|
|
163
|
+
allowed_executables: Optional[Set[str]] = None,
|
|
164
|
+
) -> List[str]:
|
|
165
|
+
"""
|
|
166
|
+
Sanitize a command and split it into parts before using it to run an MCP server.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
List of command parts
|
|
170
|
+
|
|
171
|
+
Raises:
|
|
172
|
+
ValueError: If command contains dangerous characters or uses disallowed executables
|
|
173
|
+
"""
|
|
174
|
+
if not command or not command.strip():
|
|
175
|
+
raise ValueError("MCP command cannot be empty")
|
|
176
|
+
|
|
177
|
+
# Check command length to prevent resource exhaustion
|
|
178
|
+
if len(command) > max_length:
|
|
179
|
+
raise ValueError(f"MCP command too long: {len(command)} > {max_length} characters")
|
|
180
|
+
|
|
181
|
+
# Block dangerous characters that could enable shell injection
|
|
182
|
+
dangerous_chars = ["&", "|", ";", "`", "$", "(", ")", "<", ">"]
|
|
183
|
+
for char in dangerous_chars:
|
|
184
|
+
if char in command:
|
|
185
|
+
raise ValueError(f"MCP command cannot contain shell metacharacters: {char}")
|
|
186
|
+
|
|
187
|
+
# Block dangerous patterns
|
|
188
|
+
dangerous_patterns = [
|
|
189
|
+
r"\$\{.*\}", # Variable expansion
|
|
190
|
+
r"\$\(.*\)", # Command substitution
|
|
191
|
+
r"`.*`", # Backtick command substitution
|
|
192
|
+
r"\.\./", # Directory traversal
|
|
193
|
+
r"\\\.\\", # Windows directory traversal
|
|
194
|
+
]
|
|
195
|
+
|
|
196
|
+
for pattern in dangerous_patterns:
|
|
197
|
+
if re.search(pattern, command):
|
|
198
|
+
raise ValueError(f"MCP command contains dangerous pattern: {pattern}")
|
|
199
|
+
|
|
200
|
+
# Parse command using shlex for proper shell-like parsing
|
|
201
|
+
try:
|
|
202
|
+
parts = shlex.split(command)
|
|
203
|
+
except ValueError as e:
|
|
204
|
+
raise ValueError(f"Invalid command syntax: {e}")
|
|
205
|
+
|
|
206
|
+
if not parts:
|
|
207
|
+
raise ValueError("MCP command cannot be empty after parsing")
|
|
208
|
+
|
|
209
|
+
# Validate number of arguments
|
|
210
|
+
if len(parts) > MAX_ARGS_COUNT:
|
|
211
|
+
raise ValueError(f"Too many command arguments: {len(parts)} > {MAX_ARGS_COUNT}")
|
|
212
|
+
|
|
213
|
+
# Validate individual argument lengths
|
|
214
|
+
for i, part in enumerate(parts):
|
|
215
|
+
if len(part) > MAX_ARG_LENGTH:
|
|
216
|
+
raise ValueError(f"Command argument {i} too long: {len(part)} > {MAX_ARG_LENGTH} characters")
|
|
217
|
+
|
|
218
|
+
# Normalize security level for consistency
|
|
219
|
+
normalized_level = _normalize_security_level(security_level)
|
|
220
|
+
allowed = {name.lower() for name in (allowed_executables or _get_default_allowed_executables(normalized_level))}
|
|
221
|
+
|
|
222
|
+
# Extract executable path and name robustly
|
|
223
|
+
executable_path = Path(parts[0])
|
|
224
|
+
# Basic traversal check (works for both relative and absolute)
|
|
225
|
+
# Note: This is intentionally strict to prevent directory traversal attacks
|
|
226
|
+
# Legitimate paths like /usr/bin/../bin/python should use /usr/bin/python instead
|
|
227
|
+
if any(part == ".." for part in executable_path.parts):
|
|
228
|
+
raise ValueError("MCP command path cannot contain parent directory components ('..')")
|
|
229
|
+
|
|
230
|
+
# Derive base executable name (strip common extensions)
|
|
231
|
+
base_name = executable_path.name
|
|
232
|
+
lower_name = base_name.lower()
|
|
233
|
+
for ext in (".exe", ".bat", ".cmd", ".ps1"):
|
|
234
|
+
if lower_name.endswith(ext):
|
|
235
|
+
base_name = base_name[: -len(ext)]
|
|
236
|
+
lower_name = lower_name[: -len(ext)]
|
|
237
|
+
break
|
|
238
|
+
|
|
239
|
+
if lower_name not in allowed:
|
|
240
|
+
raise ValueError(f"MCP command executable '{base_name}' is not allowed (level={security_level}). " f"Allowed executables: {sorted(allowed)}")
|
|
241
|
+
|
|
242
|
+
return parts
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def validate_url(
|
|
246
|
+
url: str,
|
|
247
|
+
*,
|
|
248
|
+
resolve_dns: bool = False,
|
|
249
|
+
allow_private_ips: bool = False,
|
|
250
|
+
allow_localhost: bool = False,
|
|
251
|
+
allowed_hostnames: Optional[Set[str]] = None,
|
|
252
|
+
) -> bool:
|
|
253
|
+
"""
|
|
254
|
+
Validate URL for security and correctness.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
url: URL to validate
|
|
258
|
+
resolve_dns: If True, resolve hostnames and validate the resulting IPs
|
|
259
|
+
allow_private_ips: If True, do not block private/link-local/reserved ranges
|
|
260
|
+
allow_localhost: If True, allow localhost/loopback addresses
|
|
261
|
+
allowed_hostnames: Optional explicit allowlist for hostnames
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
True if URL is valid and safe
|
|
265
|
+
|
|
266
|
+
Raises:
|
|
267
|
+
ValueError: If URL is invalid or potentially dangerous
|
|
268
|
+
"""
|
|
269
|
+
if not url or not isinstance(url, str):
|
|
270
|
+
raise ValueError("URL must be a non-empty string")
|
|
271
|
+
|
|
272
|
+
if len(url) > MAX_URL_LENGTH:
|
|
273
|
+
raise ValueError(f"URL too long: {len(url)} > {MAX_URL_LENGTH} characters")
|
|
274
|
+
|
|
275
|
+
try:
|
|
276
|
+
parsed = urllib.parse.urlparse(url)
|
|
277
|
+
except Exception as e:
|
|
278
|
+
raise ValueError(f"Invalid URL format: {e}")
|
|
279
|
+
|
|
280
|
+
# Validate scheme
|
|
281
|
+
if parsed.scheme not in ("http", "https"):
|
|
282
|
+
raise ValueError(f"Unsupported URL scheme: {parsed.scheme}. Only http and https are allowed.")
|
|
283
|
+
|
|
284
|
+
# Validate hostname
|
|
285
|
+
if not parsed.hostname:
|
|
286
|
+
raise ValueError("URL must include a hostname")
|
|
287
|
+
|
|
288
|
+
hostname = parsed.hostname.lower()
|
|
289
|
+
|
|
290
|
+
# Explicit allowlist for hostnames overrides most checks (still validate scheme/port)
|
|
291
|
+
# WARNING: Ensure allowed_hostnames contains only trusted hostnames as this bypasses IP validation
|
|
292
|
+
if allowed_hostnames and hostname in {h.lower() for h in allowed_hostnames}:
|
|
293
|
+
pass
|
|
294
|
+
else:
|
|
295
|
+
# Fast-path string checks for common loopback names
|
|
296
|
+
if not allow_localhost and hostname in {"localhost", "ip6-localhost"}:
|
|
297
|
+
raise ValueError(f"Hostname not allowed for security reasons: {hostname}")
|
|
298
|
+
|
|
299
|
+
# Try to interpret hostname as an IP address (IPv4/IPv6)
|
|
300
|
+
ip_obj: Optional[Union[ipaddress.IPv4Address, ipaddress.IPv6Address]]
|
|
301
|
+
try:
|
|
302
|
+
ip_obj = ipaddress.ip_address(hostname)
|
|
303
|
+
except ValueError:
|
|
304
|
+
ip_obj = None
|
|
305
|
+
|
|
306
|
+
def _is_forbidden_ip(ip: Union[ipaddress.IPv4Address, ipaddress.IPv6Address]) -> bool:
|
|
307
|
+
if allow_private_ips:
|
|
308
|
+
return False
|
|
309
|
+
return ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved or ip.is_multicast or ip.is_unspecified
|
|
310
|
+
|
|
311
|
+
if ip_obj is not None:
|
|
312
|
+
# Hostname is a literal IP
|
|
313
|
+
if _is_forbidden_ip(ip_obj) and not (allow_localhost and ip_obj.is_loopback):
|
|
314
|
+
raise ValueError(f"IP address not allowed for security reasons: {hostname}")
|
|
315
|
+
elif resolve_dns:
|
|
316
|
+
# Resolve and validate all resolved addresses
|
|
317
|
+
try:
|
|
318
|
+
port_for_resolution = parsed.port if parsed.port is not None else (443 if parsed.scheme == "https" else 80)
|
|
319
|
+
addrinfos = socket.getaddrinfo(hostname, port_for_resolution, proto=socket.IPPROTO_TCP)
|
|
320
|
+
for ai in addrinfos:
|
|
321
|
+
sockaddr = ai[4]
|
|
322
|
+
ip_literal = sockaddr[0]
|
|
323
|
+
try:
|
|
324
|
+
resolved_ip = ipaddress.ip_address(ip_literal)
|
|
325
|
+
if _is_forbidden_ip(resolved_ip) and not (allow_localhost and resolved_ip.is_loopback):
|
|
326
|
+
raise ValueError(f"Resolved IP not allowed for security reasons: {hostname} -> {resolved_ip}")
|
|
327
|
+
except ValueError:
|
|
328
|
+
# Skip unparseable entries
|
|
329
|
+
continue
|
|
330
|
+
except socket.gaierror as e:
|
|
331
|
+
raise ValueError(f"Failed to resolve hostname '{hostname}': {e}")
|
|
332
|
+
|
|
333
|
+
# Validate port if specified
|
|
334
|
+
if parsed.port is not None:
|
|
335
|
+
if not (1 <= parsed.port <= 65535):
|
|
336
|
+
raise ValueError(f"Invalid port number: {parsed.port}")
|
|
337
|
+
|
|
338
|
+
# Block dangerous ports
|
|
339
|
+
dangerous_ports = {
|
|
340
|
+
22,
|
|
341
|
+
23,
|
|
342
|
+
25,
|
|
343
|
+
53,
|
|
344
|
+
135,
|
|
345
|
+
139,
|
|
346
|
+
445,
|
|
347
|
+
1433,
|
|
348
|
+
1521,
|
|
349
|
+
3306,
|
|
350
|
+
3389,
|
|
351
|
+
5432,
|
|
352
|
+
6379,
|
|
353
|
+
}
|
|
354
|
+
if parsed.port in dangerous_ports:
|
|
355
|
+
raise ValueError(f"Port {parsed.port} is not allowed for security reasons")
|
|
356
|
+
|
|
357
|
+
return True
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def validate_environment_variables(
|
|
361
|
+
env: Dict[str, str],
|
|
362
|
+
*,
|
|
363
|
+
level: str = "strict",
|
|
364
|
+
mode: str = "denylist",
|
|
365
|
+
allowed_vars: Optional[Set[str]] = None,
|
|
366
|
+
denied_vars: Optional[Set[str]] = None,
|
|
367
|
+
max_key_length: int = MAX_ENV_KEY_LENGTH,
|
|
368
|
+
max_value_length: int = MAX_ENV_VALUE_LENGTH,
|
|
369
|
+
) -> Dict[str, str]:
|
|
370
|
+
"""
|
|
371
|
+
Validate environment variables for security.
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
env: Environment variables dictionary
|
|
375
|
+
level: Security level {"strict", "moderate", "permissive"}
|
|
376
|
+
mode: Validation mode {"denylist", "allowlist"}
|
|
377
|
+
allowed_vars: Optional explicit allowlist (case-insensitive) when mode is allowlist
|
|
378
|
+
denied_vars: Optional explicit denylist (case-insensitive) when mode is denylist
|
|
379
|
+
max_key_length: Maximum allowed environment variable name length
|
|
380
|
+
max_value_length: Maximum allowed environment variable value length
|
|
381
|
+
|
|
382
|
+
Returns:
|
|
383
|
+
Validated environment variables
|
|
384
|
+
|
|
385
|
+
Raises:
|
|
386
|
+
ValueError: If environment variables contain dangerous values
|
|
387
|
+
"""
|
|
388
|
+
if not isinstance(env, dict):
|
|
389
|
+
raise ValueError("Environment variables must be a dictionary")
|
|
390
|
+
|
|
391
|
+
validated_env: Dict[str, str] = {}
|
|
392
|
+
|
|
393
|
+
# Normalize security level for consistency
|
|
394
|
+
normalized_level = _normalize_security_level(level)
|
|
395
|
+
|
|
396
|
+
# Defaults tuned per level
|
|
397
|
+
default_deny: Set[str] = {
|
|
398
|
+
"LD_LIBRARY_PATH",
|
|
399
|
+
"DYLD_LIBRARY_PATH",
|
|
400
|
+
"PYTHONPATH",
|
|
401
|
+
"PWD",
|
|
402
|
+
"OLDPWD",
|
|
403
|
+
}
|
|
404
|
+
# In strict mode, also block these commonly sensitive variables
|
|
405
|
+
if normalized_level == "strict":
|
|
406
|
+
default_deny |= {"PATH", "HOME", "USER", "USERNAME", "SHELL"}
|
|
407
|
+
elif normalized_level == "moderate":
|
|
408
|
+
# Allow PATH and HOME by default in moderate/permissive
|
|
409
|
+
default_deny |= set()
|
|
410
|
+
elif normalized_level == "permissive":
|
|
411
|
+
default_deny |= set()
|
|
412
|
+
|
|
413
|
+
# Fix logic issue: if denied_vars is explicitly set to empty set, respect that choice
|
|
414
|
+
denylist_active = {v.upper() for v in (denied_vars if denied_vars is not None else default_deny)}
|
|
415
|
+
allowlist_active = {v.upper() for v in (allowed_vars or set())}
|
|
416
|
+
|
|
417
|
+
for key, value in env.items():
|
|
418
|
+
if not isinstance(key, str) or not isinstance(value, str):
|
|
419
|
+
raise ValueError(f"Environment variable key and value must be strings: {key}={value}")
|
|
420
|
+
|
|
421
|
+
if len(key) > max_key_length:
|
|
422
|
+
raise ValueError(f"Environment variable name too long: {len(key)} > {max_key_length}")
|
|
423
|
+
|
|
424
|
+
if len(value) > max_value_length:
|
|
425
|
+
raise ValueError(f"Environment variable value too long: {len(value)} > {max_value_length}")
|
|
426
|
+
|
|
427
|
+
upper_key = key.upper()
|
|
428
|
+
|
|
429
|
+
# Apply allow/deny policies
|
|
430
|
+
if mode == "allowlist":
|
|
431
|
+
if allowlist_active and upper_key not in allowlist_active:
|
|
432
|
+
raise ValueError(f"Environment variable '{key}' is not permitted by allowlist policy")
|
|
433
|
+
else: # denylist
|
|
434
|
+
if upper_key in denylist_active:
|
|
435
|
+
raise ValueError(f"Environment variable '{key}' is not allowed for security reasons")
|
|
436
|
+
|
|
437
|
+
# Check for dangerous patterns in values
|
|
438
|
+
dangerous_patterns = ["$(", "`", "&", ";", "|"]
|
|
439
|
+
for pattern in dangerous_patterns:
|
|
440
|
+
if pattern in value:
|
|
441
|
+
raise ValueError(f"Environment variable '{key}' contains dangerous pattern: {pattern}")
|
|
442
|
+
|
|
443
|
+
# Special check for ${...} - allow only simple environment variable references
|
|
444
|
+
if "${" in value:
|
|
445
|
+
# Allow patterns like ${VARIABLE_NAME} but block complex expressions
|
|
446
|
+
if not re.match(r"^[^$]*\$\{[A-Z_][A-Z0-9_]*\}[^$]*$", value):
|
|
447
|
+
raise ValueError(f"Environment variable '{key}' contains dangerous pattern: ${{")
|
|
448
|
+
|
|
449
|
+
validated_env[key] = value
|
|
450
|
+
|
|
451
|
+
return validated_env
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
def validate_server_security(config: dict) -> dict:
|
|
455
|
+
"""
|
|
456
|
+
Validate and sanitize MCP server configuration with comprehensive security checks.
|
|
457
|
+
|
|
458
|
+
Args:
|
|
459
|
+
config: Server configuration dictionary
|
|
460
|
+
|
|
461
|
+
Returns:
|
|
462
|
+
Validated configuration dictionary
|
|
463
|
+
|
|
464
|
+
Raises:
|
|
465
|
+
ValueError: If configuration is invalid or insecure
|
|
466
|
+
"""
|
|
467
|
+
if not isinstance(config, dict):
|
|
468
|
+
raise ValueError("Server configuration must be a dictionary")
|
|
469
|
+
|
|
470
|
+
# Create a copy to avoid modifying the original
|
|
471
|
+
validated_config = config.copy()
|
|
472
|
+
|
|
473
|
+
# Required fields
|
|
474
|
+
if "name" not in validated_config:
|
|
475
|
+
raise ValueError("Server configuration must include 'name'")
|
|
476
|
+
|
|
477
|
+
# Validate server name
|
|
478
|
+
server_name = validated_config["name"]
|
|
479
|
+
_validate_non_empty_string(server_name, "Server name")
|
|
480
|
+
_validate_string_length(server_name, MAX_SERVER_NAME_LENGTH, "Server name")
|
|
481
|
+
|
|
482
|
+
# Sanitize server name
|
|
483
|
+
if not re.match(r"^[a-zA-Z0-9_-]+$", server_name):
|
|
484
|
+
raise ValueError("Server name can only contain alphanumeric characters, underscores, and hyphens")
|
|
485
|
+
|
|
486
|
+
transport_type = validated_config.get("type", "stdio")
|
|
487
|
+
|
|
488
|
+
# Optional security policy configuration
|
|
489
|
+
security_cfg = _get_dict_from_config(validated_config, "security")
|
|
490
|
+
security_level = security_cfg.get("level", "strict")
|
|
491
|
+
|
|
492
|
+
if transport_type == "stdio":
|
|
493
|
+
# Validate stdio configuration
|
|
494
|
+
if "command" not in validated_config and "args" not in validated_config:
|
|
495
|
+
raise ValueError("Stdio server configuration must include 'command' or 'args'")
|
|
496
|
+
|
|
497
|
+
# Sanitize command if present
|
|
498
|
+
if "command" in validated_config:
|
|
499
|
+
if isinstance(validated_config["command"], str):
|
|
500
|
+
# Convert string command to list with validation
|
|
501
|
+
validated_config["command"] = prepare_command(
|
|
502
|
+
validated_config["command"],
|
|
503
|
+
security_level=security_level,
|
|
504
|
+
allowed_executables=_get_set_from_config(security_cfg, "allowed_executables"),
|
|
505
|
+
)
|
|
506
|
+
elif isinstance(validated_config["command"], list):
|
|
507
|
+
# Validate each part
|
|
508
|
+
if not validated_config["command"]:
|
|
509
|
+
raise ValueError("Command list cannot be empty")
|
|
510
|
+
# Validate the command list by joining and re-parsing
|
|
511
|
+
command_str = " ".join(shlex.quote(arg) for arg in validated_config["command"])
|
|
512
|
+
validated_config["command"] = prepare_command(
|
|
513
|
+
command_str,
|
|
514
|
+
security_level=security_level,
|
|
515
|
+
allowed_executables=_get_set_from_config(security_cfg, "allowed_executables"),
|
|
516
|
+
)
|
|
517
|
+
else:
|
|
518
|
+
raise ValueError("Command must be a string or list")
|
|
519
|
+
|
|
520
|
+
# Validate arguments if present
|
|
521
|
+
if "args" in validated_config:
|
|
522
|
+
args = validated_config["args"]
|
|
523
|
+
if not isinstance(args, list):
|
|
524
|
+
raise ValueError("Arguments must be a list")
|
|
525
|
+
|
|
526
|
+
for i, arg in enumerate(args):
|
|
527
|
+
if not isinstance(arg, str):
|
|
528
|
+
raise ValueError(f"Argument {i} must be a string")
|
|
529
|
+
if len(arg) > MAX_ARG_LENGTH:
|
|
530
|
+
raise ValueError(f"Argument {i} too long: {len(arg)} > {MAX_ARG_LENGTH} characters")
|
|
531
|
+
|
|
532
|
+
# Validate environment variables if present
|
|
533
|
+
if "env" in validated_config:
|
|
534
|
+
env_policy = _get_dict_from_config(security_cfg, "env")
|
|
535
|
+
validated_config["env"] = validate_environment_variables(
|
|
536
|
+
validated_config["env"],
|
|
537
|
+
level=env_policy.get("level", security_level),
|
|
538
|
+
mode=env_policy.get("mode", "denylist"),
|
|
539
|
+
allowed_vars=_get_set_from_config(env_policy, "allowed_vars") or set(),
|
|
540
|
+
denied_vars=_get_set_from_config(env_policy, "denied_vars"),
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
# Validate working directory if present
|
|
544
|
+
if "cwd" in validated_config:
|
|
545
|
+
cwd = validated_config["cwd"]
|
|
546
|
+
if not isinstance(cwd, str):
|
|
547
|
+
raise ValueError("Working directory must be a string")
|
|
548
|
+
_validate_string_length(cwd, MAX_CWD_LENGTH, "Working directory path")
|
|
549
|
+
cwd_path = Path(cwd)
|
|
550
|
+
# Allow absolute or relative paths, but forbid parent traversal
|
|
551
|
+
if any(part == ".." for part in cwd_path.parts):
|
|
552
|
+
raise ValueError("Working directory cannot contain parent directory components ('..')")
|
|
553
|
+
|
|
554
|
+
elif transport_type == "streamable-http":
|
|
555
|
+
# Validate streamable HTTP configuration
|
|
556
|
+
if "url" not in validated_config:
|
|
557
|
+
raise ValueError(f"{transport_type} server configuration must include 'url'")
|
|
558
|
+
|
|
559
|
+
# Prepare optional allowlist for hostnames if provided
|
|
560
|
+
allowed_hostnames_cfg = security_cfg.get("allowed_hostnames")
|
|
561
|
+
allowed_hostnames = None
|
|
562
|
+
if isinstance(allowed_hostnames_cfg, (list, set, tuple)):
|
|
563
|
+
# Keep only string-like entries and normalize to strings
|
|
564
|
+
allowed_hostnames = {str(h) for h in allowed_hostnames_cfg if isinstance(h, (str, bytes))}
|
|
565
|
+
|
|
566
|
+
# Use enhanced URL validation
|
|
567
|
+
validate_url(
|
|
568
|
+
validated_config["url"],
|
|
569
|
+
resolve_dns=bool(security_cfg.get("resolve_dns", False)),
|
|
570
|
+
allow_private_ips=bool(security_cfg.get("allow_private_ips", False)),
|
|
571
|
+
allow_localhost=bool(security_cfg.get("allow_localhost", False)),
|
|
572
|
+
allowed_hostnames=allowed_hostnames,
|
|
573
|
+
)
|
|
574
|
+
# Validate headers if present
|
|
575
|
+
if "headers" in validated_config:
|
|
576
|
+
headers = validated_config["headers"]
|
|
577
|
+
if not isinstance(headers, dict):
|
|
578
|
+
raise ValueError("Headers must be a dictionary")
|
|
579
|
+
|
|
580
|
+
for key, value in headers.items():
|
|
581
|
+
if not isinstance(key, str) or not isinstance(value, str):
|
|
582
|
+
raise ValueError("Header keys and values must be strings")
|
|
583
|
+
_validate_string_length(key, MAX_HEADER_KEY_LENGTH, "Header name")
|
|
584
|
+
_validate_string_length(value, MAX_HEADER_VALUE_LENGTH, "Header value")
|
|
585
|
+
|
|
586
|
+
# Validate timeout if present
|
|
587
|
+
if "timeout" in validated_config:
|
|
588
|
+
timeout = validated_config["timeout"]
|
|
589
|
+
if not isinstance(timeout, (int, float)) or timeout <= 0:
|
|
590
|
+
raise ValueError("Timeout must be a positive number")
|
|
591
|
+
if timeout > MAX_TIMEOUT_SECONDS:
|
|
592
|
+
raise ValueError(f"Timeout too large: {timeout} > {MAX_TIMEOUT_SECONDS} seconds")
|
|
593
|
+
|
|
594
|
+
# Validate http_read_timeout if present
|
|
595
|
+
if "http_read_timeout" in validated_config:
|
|
596
|
+
http_read_timeout = validated_config["http_read_timeout"]
|
|
597
|
+
if not isinstance(http_read_timeout, (int, float)) or http_read_timeout <= 0:
|
|
598
|
+
raise ValueError("http_read_timeout must be a positive number")
|
|
599
|
+
if http_read_timeout > MAX_TIMEOUT_SECONDS:
|
|
600
|
+
raise ValueError(f"http_read_timeout too large: {http_read_timeout} > {MAX_TIMEOUT_SECONDS} seconds")
|
|
601
|
+
|
|
602
|
+
else:
|
|
603
|
+
# List supported transport types for better error messages
|
|
604
|
+
supported_types = ["stdio", "streamable-http"]
|
|
605
|
+
raise ValueError(
|
|
606
|
+
f"Unsupported transport type: {transport_type}. " f"Supported types: {supported_types}. " f"Note: 'sse' transport was deprecated in MCP v2025-03-26, use 'streamable-http' instead.",
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
return validated_config
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
def sanitize_tool_name(tool_name: str, server_name: str) -> str:
|
|
613
|
+
"""
|
|
614
|
+
Create a sanitized tool name with server prefix and comprehensive validation.
|
|
615
|
+
|
|
616
|
+
Args:
|
|
617
|
+
tool_name: Original tool name
|
|
618
|
+
server_name: Server name for prefixing
|
|
619
|
+
|
|
620
|
+
Returns:
|
|
621
|
+
Sanitized tool name with prefix
|
|
622
|
+
|
|
623
|
+
Raises:
|
|
624
|
+
ValueError: If tool name or server name is invalid
|
|
625
|
+
"""
|
|
626
|
+
_validate_non_empty_string(tool_name, "Tool name")
|
|
627
|
+
_validate_non_empty_string(server_name, "Server name")
|
|
628
|
+
|
|
629
|
+
# Length limits
|
|
630
|
+
_validate_string_length(tool_name, MAX_TOOL_NAME_LENGTH, "Tool name")
|
|
631
|
+
_validate_string_length(server_name, MAX_SERVER_NAME_FOR_TOOL_LENGTH, "Server name")
|
|
632
|
+
|
|
633
|
+
# Remove any existing mcp__ prefix to avoid double-prefixing
|
|
634
|
+
if tool_name.startswith("mcp__"):
|
|
635
|
+
tool_name = tool_name[5:]
|
|
636
|
+
# Re-extract server and tool parts if double-prefixed
|
|
637
|
+
if "__" in tool_name:
|
|
638
|
+
parts = tool_name.split("__", 1)
|
|
639
|
+
if len(parts) == 2:
|
|
640
|
+
tool_name = parts[1]
|
|
641
|
+
|
|
642
|
+
# Reserved tool names that shouldn't be used
|
|
643
|
+
reserved_names = {
|
|
644
|
+
"connect",
|
|
645
|
+
"disconnect",
|
|
646
|
+
"list",
|
|
647
|
+
"help",
|
|
648
|
+
"version",
|
|
649
|
+
"status",
|
|
650
|
+
"health",
|
|
651
|
+
"ping",
|
|
652
|
+
"debug",
|
|
653
|
+
"admin",
|
|
654
|
+
"system",
|
|
655
|
+
"config",
|
|
656
|
+
"settings",
|
|
657
|
+
"auth",
|
|
658
|
+
"login",
|
|
659
|
+
"logout",
|
|
660
|
+
"exit",
|
|
661
|
+
"quit",
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
if tool_name.lower() in reserved_names:
|
|
665
|
+
raise ValueError(f"Tool name '{tool_name}' is reserved and cannot be used")
|
|
666
|
+
|
|
667
|
+
# Validate characters - allow alphanumeric, underscore, hyphen, and dot
|
|
668
|
+
if not re.match(r"^[a-zA-Z0-9_.-]+$", tool_name):
|
|
669
|
+
raise ValueError(f"Tool name '{tool_name}' contains invalid characters. Only alphanumeric, underscore, hyphen, and dot are allowed.")
|
|
670
|
+
|
|
671
|
+
if not re.match(r"^[a-zA-Z0-9_-]+$", server_name):
|
|
672
|
+
raise ValueError(f"Server name '{server_name}' contains invalid characters. Only alphanumeric, underscore, and hyphen are allowed.")
|
|
673
|
+
|
|
674
|
+
# Ensure names don't start or end with special characters
|
|
675
|
+
safe_server_name = server_name.strip("_-")
|
|
676
|
+
safe_tool_name = tool_name.strip("_.-")
|
|
677
|
+
|
|
678
|
+
if not safe_server_name:
|
|
679
|
+
raise ValueError(f"Server name '{server_name}' becomes empty after sanitization")
|
|
680
|
+
|
|
681
|
+
if not safe_tool_name:
|
|
682
|
+
raise ValueError(f"Tool name '{tool_name}' becomes empty after sanitization")
|
|
683
|
+
|
|
684
|
+
# Create final tool name
|
|
685
|
+
final_name = f"mcp__{safe_server_name}__{safe_tool_name}"
|
|
686
|
+
|
|
687
|
+
# Final length check
|
|
688
|
+
_validate_string_length(final_name, MAX_FINAL_TOOL_NAME_LENGTH, "Final tool name")
|
|
689
|
+
|
|
690
|
+
return final_name
|
|
691
|
+
|
|
692
|
+
|
|
693
|
+
def validate_tool_arguments(arguments: Dict[str, Any], max_depth: int = MAX_TOOL_ARG_DEPTH, max_size: int = MAX_TOOL_ARG_SIZE) -> Dict[str, Any]:
|
|
694
|
+
"""
|
|
695
|
+
Validate tool arguments for security and size limits.
|
|
696
|
+
|
|
697
|
+
Args:
|
|
698
|
+
arguments: Tool arguments dictionary
|
|
699
|
+
max_depth: Maximum nesting depth allowed
|
|
700
|
+
max_size: Maximum total size of arguments (rough estimate)
|
|
701
|
+
|
|
702
|
+
Returns:
|
|
703
|
+
Validated arguments dictionary
|
|
704
|
+
|
|
705
|
+
Raises:
|
|
706
|
+
ValueError: If arguments are invalid or too large
|
|
707
|
+
"""
|
|
708
|
+
if not isinstance(arguments, dict):
|
|
709
|
+
raise ValueError("Tool arguments must be a dictionary")
|
|
710
|
+
|
|
711
|
+
current_size = 0
|
|
712
|
+
|
|
713
|
+
def _add_size(amount: int) -> None:
|
|
714
|
+
nonlocal current_size
|
|
715
|
+
current_size += amount
|
|
716
|
+
if current_size > max_size:
|
|
717
|
+
raise ValueError(f"Tool arguments too large: ~{current_size} > {max_size} bytes")
|
|
718
|
+
|
|
719
|
+
def _size_for_primitive(value: Any) -> int:
|
|
720
|
+
# Rough JSON-like size estimation for preventing extremely large payloads
|
|
721
|
+
# Note: This is an approximation and may not account for all JSON encoding overhead
|
|
722
|
+
if value is None:
|
|
723
|
+
return 4 # null
|
|
724
|
+
if isinstance(value, bool):
|
|
725
|
+
return 4 if value else 5 # true/false
|
|
726
|
+
if isinstance(value, (int, float)):
|
|
727
|
+
return len(str(value))
|
|
728
|
+
if isinstance(value, str):
|
|
729
|
+
return len(value) + 2
|
|
730
|
+
return len(str(value)) + 2
|
|
731
|
+
|
|
732
|
+
def _validate_value(value: Any, depth: int = 0) -> Any:
|
|
733
|
+
if depth > max_depth:
|
|
734
|
+
raise ValueError(f"Tool arguments nested too deeply: {depth} > {max_depth}")
|
|
735
|
+
|
|
736
|
+
if isinstance(value, dict):
|
|
737
|
+
if len(value) > MAX_DICT_KEYS:
|
|
738
|
+
raise ValueError(f"Dictionary too large: {len(value)} > {MAX_DICT_KEYS} keys")
|
|
739
|
+
_add_size(2)
|
|
740
|
+
validated: Dict[str, Any] = {}
|
|
741
|
+
first = True
|
|
742
|
+
for k, v in value.items():
|
|
743
|
+
if not isinstance(k, str):
|
|
744
|
+
k = str(k)
|
|
745
|
+
if not first:
|
|
746
|
+
_add_size(1)
|
|
747
|
+
first = False
|
|
748
|
+
_add_size(_size_for_primitive(k) + 1)
|
|
749
|
+
validated[k] = _validate_value(v, depth + 1)
|
|
750
|
+
return validated
|
|
751
|
+
|
|
752
|
+
elif isinstance(value, list):
|
|
753
|
+
if len(value) > MAX_LIST_ITEMS:
|
|
754
|
+
raise ValueError(f"List too large: {len(value)} > {MAX_LIST_ITEMS} items")
|
|
755
|
+
_add_size(2)
|
|
756
|
+
validated_list = []
|
|
757
|
+
for idx, item in enumerate(value):
|
|
758
|
+
if idx > 0:
|
|
759
|
+
_add_size(1)
|
|
760
|
+
validated_list.append(_validate_value(item, depth + 1))
|
|
761
|
+
return validated_list
|
|
762
|
+
|
|
763
|
+
elif isinstance(value, str):
|
|
764
|
+
if len(value) > MAX_STRING_LENGTH:
|
|
765
|
+
raise ValueError(f"String too long: {len(value)} > {MAX_STRING_LENGTH} characters")
|
|
766
|
+
_add_size(_size_for_primitive(value))
|
|
767
|
+
return value
|
|
768
|
+
|
|
769
|
+
elif isinstance(value, (int, float, bool)) or value is None:
|
|
770
|
+
_add_size(_size_for_primitive(value))
|
|
771
|
+
return value
|
|
772
|
+
|
|
773
|
+
else:
|
|
774
|
+
str_value = str(value)
|
|
775
|
+
if len(str_value) > MAX_STRING_LENGTH:
|
|
776
|
+
raise ValueError(f"Value too large when converted to string: {len(str_value)} > {MAX_STRING_LENGTH}")
|
|
777
|
+
_add_size(_size_for_primitive(str_value))
|
|
778
|
+
return str_value
|
|
779
|
+
|
|
780
|
+
return _validate_value(arguments)
|