amazon-ads-mcp 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- amazon_ads_mcp/__init__.py +11 -0
- amazon_ads_mcp/auth/__init__.py +33 -0
- amazon_ads_mcp/auth/base.py +211 -0
- amazon_ads_mcp/auth/hooks.py +172 -0
- amazon_ads_mcp/auth/manager.py +791 -0
- amazon_ads_mcp/auth/oauth_state_store.py +277 -0
- amazon_ads_mcp/auth/providers/__init__.py +14 -0
- amazon_ads_mcp/auth/providers/direct.py +393 -0
- amazon_ads_mcp/auth/providers/example_auth0.py.example +216 -0
- amazon_ads_mcp/auth/providers/openbridge.py +512 -0
- amazon_ads_mcp/auth/registry.py +146 -0
- amazon_ads_mcp/auth/secure_token_store.py +297 -0
- amazon_ads_mcp/auth/token_store.py +723 -0
- amazon_ads_mcp/config/__init__.py +5 -0
- amazon_ads_mcp/config/sampling.py +111 -0
- amazon_ads_mcp/config/settings.py +366 -0
- amazon_ads_mcp/exceptions.py +314 -0
- amazon_ads_mcp/middleware/__init__.py +11 -0
- amazon_ads_mcp/middleware/authentication.py +1474 -0
- amazon_ads_mcp/middleware/caching.py +177 -0
- amazon_ads_mcp/middleware/oauth.py +175 -0
- amazon_ads_mcp/middleware/sampling.py +112 -0
- amazon_ads_mcp/models/__init__.py +320 -0
- amazon_ads_mcp/models/amc_models.py +837 -0
- amazon_ads_mcp/models/api_responses.py +847 -0
- amazon_ads_mcp/models/base_models.py +215 -0
- amazon_ads_mcp/models/builtin_responses.py +496 -0
- amazon_ads_mcp/models/dsp_models.py +556 -0
- amazon_ads_mcp/models/stores_brands.py +610 -0
- amazon_ads_mcp/server/__init__.py +6 -0
- amazon_ads_mcp/server/__main__.py +6 -0
- amazon_ads_mcp/server/builtin_prompts.py +269 -0
- amazon_ads_mcp/server/builtin_tools.py +962 -0
- amazon_ads_mcp/server/file_routes.py +547 -0
- amazon_ads_mcp/server/html_templates.py +149 -0
- amazon_ads_mcp/server/mcp_server.py +327 -0
- amazon_ads_mcp/server/openapi_utils.py +158 -0
- amazon_ads_mcp/server/sampling_handler.py +251 -0
- amazon_ads_mcp/server/server_builder.py +751 -0
- amazon_ads_mcp/server/sidecar_loader.py +178 -0
- amazon_ads_mcp/server/transform_executor.py +827 -0
- amazon_ads_mcp/tools/__init__.py +22 -0
- amazon_ads_mcp/tools/cache_management.py +105 -0
- amazon_ads_mcp/tools/download_tools.py +267 -0
- amazon_ads_mcp/tools/identity.py +236 -0
- amazon_ads_mcp/tools/oauth.py +598 -0
- amazon_ads_mcp/tools/profile.py +150 -0
- amazon_ads_mcp/tools/profile_listing.py +285 -0
- amazon_ads_mcp/tools/region.py +320 -0
- amazon_ads_mcp/tools/region_identity.py +175 -0
- amazon_ads_mcp/utils/__init__.py +6 -0
- amazon_ads_mcp/utils/async_compat.py +215 -0
- amazon_ads_mcp/utils/errors.py +452 -0
- amazon_ads_mcp/utils/export_content_type_resolver.py +249 -0
- amazon_ads_mcp/utils/export_download_handler.py +579 -0
- amazon_ads_mcp/utils/header_resolver.py +81 -0
- amazon_ads_mcp/utils/http/__init__.py +56 -0
- amazon_ads_mcp/utils/http/circuit_breaker.py +127 -0
- amazon_ads_mcp/utils/http/client_manager.py +329 -0
- amazon_ads_mcp/utils/http/request.py +207 -0
- amazon_ads_mcp/utils/http/resilience.py +512 -0
- amazon_ads_mcp/utils/http/resilient_client.py +195 -0
- amazon_ads_mcp/utils/http/retry.py +76 -0
- amazon_ads_mcp/utils/http_client.py +873 -0
- amazon_ads_mcp/utils/media/__init__.py +21 -0
- amazon_ads_mcp/utils/media/negotiator.py +243 -0
- amazon_ads_mcp/utils/media/types.py +199 -0
- amazon_ads_mcp/utils/openapi/__init__.py +16 -0
- amazon_ads_mcp/utils/openapi/json.py +55 -0
- amazon_ads_mcp/utils/openapi/loader.py +263 -0
- amazon_ads_mcp/utils/openapi/refs.py +46 -0
- amazon_ads_mcp/utils/region_config.py +200 -0
- amazon_ads_mcp/utils/response_wrapper.py +171 -0
- amazon_ads_mcp/utils/sampling_helpers.py +156 -0
- amazon_ads_mcp/utils/sampling_wrapper.py +173 -0
- amazon_ads_mcp/utils/security.py +630 -0
- amazon_ads_mcp/utils/tool_naming.py +137 -0
- amazon_ads_mcp-0.2.7.dist-info/METADATA +664 -0
- amazon_ads_mcp-0.2.7.dist-info/RECORD +82 -0
- amazon_ads_mcp-0.2.7.dist-info/WHEEL +4 -0
- amazon_ads_mcp-0.2.7.dist-info/entry_points.txt +3 -0
- amazon_ads_mcp-0.2.7.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,630 @@
|
|
|
1
|
+
"""Security utilities for sanitization, validation, and secure logging.
|
|
2
|
+
|
|
3
|
+
This module consolidates all security-related functionality including:
|
|
4
|
+
- Log sanitization and secure logging setup
|
|
5
|
+
- Input validation and sanitization
|
|
6
|
+
- Pattern matching for sensitive data
|
|
7
|
+
- SQL injection prevention
|
|
8
|
+
- XSS protection
|
|
9
|
+
- Secure logging with automatic redaction
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import copy
|
|
13
|
+
import html
|
|
14
|
+
import logging
|
|
15
|
+
import re
|
|
16
|
+
import sys
|
|
17
|
+
from typing import Any, Dict, List, Optional
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# Import ValidationError - using a different approach to avoid circular imports
|
|
21
|
+
def _import_validation_error():
|
|
22
|
+
"""Import ValidationError from error module.
|
|
23
|
+
|
|
24
|
+
:return: ValidationError class
|
|
25
|
+
:rtype: Type[Exception]
|
|
26
|
+
"""
|
|
27
|
+
from amazon_ads_mcp.utils.errors import ValidationError
|
|
28
|
+
|
|
29
|
+
return ValidationError
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# Get ValidationError class at module load time
|
|
33
|
+
try:
|
|
34
|
+
# Try to import ValidationError from error module
|
|
35
|
+
ValidationError = _import_validation_error()
|
|
36
|
+
except ImportError:
|
|
37
|
+
# Fallback to local definition if circular import occurs
|
|
38
|
+
class ValidationError(Exception):
|
|
39
|
+
"""Input validation error.
|
|
40
|
+
|
|
41
|
+
Custom exception for validation errors with optional field
|
|
42
|
+
information for better error reporting.
|
|
43
|
+
|
|
44
|
+
:param message: Error message describing the validation failure
|
|
45
|
+
:type message: str
|
|
46
|
+
:param field: Optional field name that failed validation
|
|
47
|
+
:type field: Optional[str]
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def __init__(self, message: str, field: Optional[str] = None):
|
|
51
|
+
super().__init__(message)
|
|
52
|
+
self.field = field
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# Patterns for sensitive data detection
|
|
56
|
+
SENSITIVE_PATTERNS = {
|
|
57
|
+
"jwt_token": re.compile(r"eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+"),
|
|
58
|
+
"bearer_token": re.compile(r"Bearer\s+[A-Za-z0-9_-]+", re.IGNORECASE),
|
|
59
|
+
"api_key": re.compile(r"[A-Za-z0-9]{32,}"),
|
|
60
|
+
"basic_auth": re.compile(r"Basic\s+[A-Za-z0-9+/=]+", re.IGNORECASE),
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# Headers that should never be logged
|
|
64
|
+
SENSITIVE_HEADERS = {
|
|
65
|
+
"authorization",
|
|
66
|
+
"x-api-key",
|
|
67
|
+
"x-auth-token",
|
|
68
|
+
"cookie",
|
|
69
|
+
"set-cookie",
|
|
70
|
+
"x-csrf-token",
|
|
71
|
+
"x-access-token",
|
|
72
|
+
"x-refresh-token",
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
# SQL injection patterns
|
|
76
|
+
SQL_INJECTION_PATTERNS = [
|
|
77
|
+
r"(\b(INSERT|UPDATE|DELETE|DROP|UNION|CREATE|ALTER|EXEC|EXECUTE)\b)",
|
|
78
|
+
r"(--|\#|\/\*|\*\/)", # SQL comments
|
|
79
|
+
r"(\bOR\b.*=.*)", # OR conditions
|
|
80
|
+
r"(;.*\b(SELECT|INSERT|UPDATE|DELETE)\b)", # Multiple statements
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
# XSS patterns
|
|
84
|
+
XSS_PATTERNS = [
|
|
85
|
+
r"<script[^>]*>.*?</script>",
|
|
86
|
+
r"javascript:",
|
|
87
|
+
r"on\w+\s*=", # Event handlers
|
|
88
|
+
r"<iframe[^>]*>",
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
# =============================================================================
|
|
92
|
+
# String and Log Sanitization
|
|
93
|
+
# =============================================================================
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def sanitize_string(value: str, partial: bool = False) -> str:
|
|
97
|
+
"""Sanitize a string containing potential sensitive data.
|
|
98
|
+
|
|
99
|
+
Removes or redacts sensitive information like JWT tokens,
|
|
100
|
+
API keys, and authentication headers from strings.
|
|
101
|
+
|
|
102
|
+
:param value: String to sanitize
|
|
103
|
+
:type value: str
|
|
104
|
+
:param partial: If True, show length instead of full redaction
|
|
105
|
+
:type partial: bool
|
|
106
|
+
:return: Sanitized string with sensitive data redacted
|
|
107
|
+
:rtype: str
|
|
108
|
+
"""
|
|
109
|
+
if not value:
|
|
110
|
+
return value
|
|
111
|
+
for pattern_name, pattern in SENSITIVE_PATTERNS.items():
|
|
112
|
+
if pattern.search(value):
|
|
113
|
+
if partial and len(value) > 10:
|
|
114
|
+
return f"<{pattern_name}:length={len(value)}>"
|
|
115
|
+
else:
|
|
116
|
+
return f"<{pattern_name}:REDACTED>"
|
|
117
|
+
return value
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def sanitize_headers(headers: Dict[str, Any]) -> Dict[str, Any]:
|
|
121
|
+
"""Sanitize HTTP headers for logging.
|
|
122
|
+
|
|
123
|
+
Removes or redacts sensitive headers like Authorization,
|
|
124
|
+
API keys, and cookies from header dictionaries.
|
|
125
|
+
|
|
126
|
+
:param headers: Dictionary of HTTP headers
|
|
127
|
+
:type headers: Dict[str, Any]
|
|
128
|
+
:return: Sanitized headers dictionary
|
|
129
|
+
:rtype: Dict[str, Any]
|
|
130
|
+
"""
|
|
131
|
+
if not headers:
|
|
132
|
+
return headers
|
|
133
|
+
sanitized = copy.deepcopy(headers)
|
|
134
|
+
for key, value in headers.items():
|
|
135
|
+
lower_key = key.lower()
|
|
136
|
+
if lower_key in SENSITIVE_HEADERS:
|
|
137
|
+
if isinstance(value, str) and len(value) > 0:
|
|
138
|
+
sanitized[key] = f"<REDACTED:length={len(value)}>"
|
|
139
|
+
else:
|
|
140
|
+
sanitized[key] = "<REDACTED>"
|
|
141
|
+
elif isinstance(value, str):
|
|
142
|
+
sanitized[key] = sanitize_string(value)
|
|
143
|
+
return sanitized
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def sanitize_url(url: str) -> str:
|
|
147
|
+
"""Sanitize URLs that might contain tokens or keys.
|
|
148
|
+
|
|
149
|
+
Removes sensitive query parameters and path segments
|
|
150
|
+
from URLs that might contain authentication tokens
|
|
151
|
+
or API keys.
|
|
152
|
+
|
|
153
|
+
:param url: URL to sanitize
|
|
154
|
+
:type url: str
|
|
155
|
+
:return: Sanitized URL with sensitive parameters redacted
|
|
156
|
+
:rtype: str
|
|
157
|
+
"""
|
|
158
|
+
if not url:
|
|
159
|
+
return url
|
|
160
|
+
sensitive_params = [
|
|
161
|
+
"token",
|
|
162
|
+
"key",
|
|
163
|
+
"secret",
|
|
164
|
+
"password",
|
|
165
|
+
"auth",
|
|
166
|
+
"access_token",
|
|
167
|
+
"api_key",
|
|
168
|
+
"client_secret",
|
|
169
|
+
]
|
|
170
|
+
for param in sensitive_params:
|
|
171
|
+
patterns = [
|
|
172
|
+
rf"({param}=)[^&\s]+",
|
|
173
|
+
rf"({param}/)[^/\s]+",
|
|
174
|
+
rf"({param}:)[^/\s]+",
|
|
175
|
+
]
|
|
176
|
+
for pattern in patterns:
|
|
177
|
+
url = re.sub(pattern, r"\1<REDACTED>", url, flags=re.IGNORECASE)
|
|
178
|
+
return url
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def safe_log_dict(
|
|
182
|
+
data: Dict[str, Any], sanitize_keys: List[str] = None
|
|
183
|
+
) -> Dict[str, Any]:
|
|
184
|
+
"""Create a safe version of a dictionary for logging.
|
|
185
|
+
|
|
186
|
+
Recursively sanitizes dictionary values, removing sensitive
|
|
187
|
+
data like passwords, tokens, and secrets.
|
|
188
|
+
|
|
189
|
+
:param data: Dictionary to sanitize
|
|
190
|
+
:type data: Dict[str, Any]
|
|
191
|
+
:param sanitize_keys: Additional keys to sanitize beyond defaults
|
|
192
|
+
:type sanitize_keys: List[str]
|
|
193
|
+
:return: Sanitized dictionary safe for logging
|
|
194
|
+
:rtype: Dict[str, Any]
|
|
195
|
+
"""
|
|
196
|
+
if not data:
|
|
197
|
+
return data
|
|
198
|
+
default_keys = {"password", "token", "secret", "key", "auth"}
|
|
199
|
+
if sanitize_keys:
|
|
200
|
+
default_keys.update(sanitize_keys)
|
|
201
|
+
sanitized = copy.deepcopy(data)
|
|
202
|
+
|
|
203
|
+
def _sanitize_nested(obj: Any, path: str = "") -> Any:
|
|
204
|
+
if isinstance(obj, dict):
|
|
205
|
+
for key, value in obj.items():
|
|
206
|
+
lower_key = key.lower()
|
|
207
|
+
current_path = f"{path}.{key}" if path else key
|
|
208
|
+
if any(sensitive in lower_key for sensitive in default_keys):
|
|
209
|
+
obj[key] = "<REDACTED>"
|
|
210
|
+
elif isinstance(value, str):
|
|
211
|
+
obj[key] = sanitize_string(value)
|
|
212
|
+
elif isinstance(value, (dict, list)):
|
|
213
|
+
obj[key] = _sanitize_nested(value, current_path)
|
|
214
|
+
elif isinstance(obj, list):
|
|
215
|
+
return [_sanitize_nested(item, path) for item in obj]
|
|
216
|
+
return obj
|
|
217
|
+
|
|
218
|
+
return _sanitize_nested(sanitized)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
# =============================================================================
|
|
222
|
+
# Secure Logging Setup
|
|
223
|
+
# =============================================================================
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
class SanitizingFormatter(logging.Formatter):
|
|
227
|
+
"""Formatter that automatically sanitizes sensitive data.
|
|
228
|
+
|
|
229
|
+
Custom logging formatter that automatically removes
|
|
230
|
+
sensitive information from log messages and arguments.
|
|
231
|
+
|
|
232
|
+
:param record: Log record to format
|
|
233
|
+
:type record: logging.LogRecord
|
|
234
|
+
:return: Formatted log message with sensitive data removed
|
|
235
|
+
:rtype: str
|
|
236
|
+
"""
|
|
237
|
+
|
|
238
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
239
|
+
"""Format log record with automatic sanitization.
|
|
240
|
+
|
|
241
|
+
Sanitizes both the log message and any arguments
|
|
242
|
+
to remove sensitive information.
|
|
243
|
+
|
|
244
|
+
:param record: Log record to format
|
|
245
|
+
:type record: logging.LogRecord
|
|
246
|
+
:return: Sanitized log message
|
|
247
|
+
:rtype: str
|
|
248
|
+
"""
|
|
249
|
+
try:
|
|
250
|
+
# First get the formatted message to avoid format string errors
|
|
251
|
+
# This handles cases where args don't match the format string
|
|
252
|
+
if hasattr(record, "args") and record.args:
|
|
253
|
+
# Try to format the message first
|
|
254
|
+
try:
|
|
255
|
+
formatted_msg = record.msg % record.args
|
|
256
|
+
# Now sanitize the formatted message
|
|
257
|
+
record.msg = sanitize_string(formatted_msg)
|
|
258
|
+
record.args = None # Clear args since we've already formatted
|
|
259
|
+
except (TypeError, ValueError):
|
|
260
|
+
# If formatting fails, just sanitize the message and args separately
|
|
261
|
+
record.msg = sanitize_string(str(record.msg))
|
|
262
|
+
if record.args:
|
|
263
|
+
sanitized_args = []
|
|
264
|
+
for arg in record.args:
|
|
265
|
+
if isinstance(arg, str):
|
|
266
|
+
sanitized_args.append(sanitize_string(arg))
|
|
267
|
+
else:
|
|
268
|
+
sanitized_args.append(arg)
|
|
269
|
+
record.args = tuple(sanitized_args)
|
|
270
|
+
else:
|
|
271
|
+
# No args, just sanitize the message
|
|
272
|
+
record.msg = sanitize_string(str(record.msg))
|
|
273
|
+
|
|
274
|
+
except Exception as e:
|
|
275
|
+
# If sanitization fails, log the error and use original message
|
|
276
|
+
# This prevents the logging system from completely failing
|
|
277
|
+
import sys
|
|
278
|
+
|
|
279
|
+
print(f"Warning: Failed to sanitize log record: {e}", file=sys.stderr)
|
|
280
|
+
|
|
281
|
+
return super().format(record)
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
# Global flag to track if logging has been set up
|
|
285
|
+
_LOGGING_CONFIGURED = False
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def setup_secure_logging(level: str = "INFO") -> None:
|
|
289
|
+
"""Set up logging with automatic sanitization.
|
|
290
|
+
|
|
291
|
+
Configures logging to automatically sanitize sensitive
|
|
292
|
+
data in all log messages across the application.
|
|
293
|
+
Uses a singleton pattern to prevent duplicate handlers.
|
|
294
|
+
|
|
295
|
+
:param level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
|
|
296
|
+
:type level: str
|
|
297
|
+
:return: None
|
|
298
|
+
:rtype: None
|
|
299
|
+
"""
|
|
300
|
+
global _LOGGING_CONFIGURED
|
|
301
|
+
|
|
302
|
+
# Skip if already configured (singleton pattern)
|
|
303
|
+
if _LOGGING_CONFIGURED:
|
|
304
|
+
logger = logging.getLogger(__name__)
|
|
305
|
+
logger.debug("Logging already configured, skipping duplicate setup")
|
|
306
|
+
return
|
|
307
|
+
|
|
308
|
+
# Clear any existing handlers on root logger
|
|
309
|
+
root_logger = logging.getLogger()
|
|
310
|
+
root_logger.handlers.clear()
|
|
311
|
+
|
|
312
|
+
# Create and configure our handler
|
|
313
|
+
formatter = SanitizingFormatter(
|
|
314
|
+
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
315
|
+
)
|
|
316
|
+
handler = logging.StreamHandler(sys.stdout)
|
|
317
|
+
handler.setFormatter(formatter)
|
|
318
|
+
|
|
319
|
+
# Configure root logger with force=True to override any existing config
|
|
320
|
+
logging.basicConfig(
|
|
321
|
+
level=getattr(logging, level.upper()),
|
|
322
|
+
handlers=[handler],
|
|
323
|
+
force=True, # Override any existing configuration
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
# Suppress duplicate logging from uvicorn/httpx if running in HTTP mode
|
|
327
|
+
for logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error"]:
|
|
328
|
+
uv_logger = logging.getLogger(logger_name)
|
|
329
|
+
uv_logger.handlers.clear() # Remove default handlers
|
|
330
|
+
uv_logger.propagate = True # Let root logger handle it
|
|
331
|
+
|
|
332
|
+
# Also configure specific loggers
|
|
333
|
+
for logger_name in ["mcp_query_engine", "sql_tools", "auth_tools"]:
|
|
334
|
+
logger = logging.getLogger(logger_name)
|
|
335
|
+
logger.handlers = []
|
|
336
|
+
logger.addHandler(handler)
|
|
337
|
+
logger.setLevel(getattr(logging, level.upper()))
|
|
338
|
+
|
|
339
|
+
_LOGGING_CONFIGURED = True
|
|
340
|
+
|
|
341
|
+
# Log handler counts for diagnostics
|
|
342
|
+
if level.upper() == "DEBUG":
|
|
343
|
+
root_logger = logging.getLogger()
|
|
344
|
+
logger = logging.getLogger(__name__)
|
|
345
|
+
logger.debug(f"Root logger has {len(root_logger.handlers)} handler(s)")
|
|
346
|
+
for key_logger in ["httpx", "mcp.server.lowlevel.server"]:
|
|
347
|
+
test_logger = logging.getLogger(key_logger)
|
|
348
|
+
logger.debug(
|
|
349
|
+
f"Logger '{key_logger}' has {len(test_logger.handlers)} handler(s)"
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
# =============================================================================
|
|
354
|
+
# Input Validation and Sanitization
|
|
355
|
+
# =============================================================================
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def sanitize_sql_input(value: str, allow_wildcards: bool = False) -> str:
|
|
359
|
+
"""Sanitize input that will be used in SQL queries.
|
|
360
|
+
|
|
361
|
+
Prevents SQL injection by detecting and rejecting
|
|
362
|
+
dangerous patterns and escaping special characters.
|
|
363
|
+
|
|
364
|
+
:param value: Input value to sanitize
|
|
365
|
+
:type value: str
|
|
366
|
+
:param allow_wildcards: Whether to allow SQL wildcards (% and _)
|
|
367
|
+
:type allow_wildcards: bool
|
|
368
|
+
:return: Sanitized value safe for SQL queries
|
|
369
|
+
:rtype: str
|
|
370
|
+
:raises ValidationError: If input contains dangerous patterns
|
|
371
|
+
"""
|
|
372
|
+
if not value:
|
|
373
|
+
return value
|
|
374
|
+
|
|
375
|
+
# Check for SQL injection patterns
|
|
376
|
+
for pattern in SQL_INJECTION_PATTERNS:
|
|
377
|
+
if re.search(pattern, value, re.IGNORECASE):
|
|
378
|
+
raise ValidationError(
|
|
379
|
+
"Invalid input: potential SQL injection detected",
|
|
380
|
+
field="query",
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
# Escape SQL special characters
|
|
384
|
+
value = value.replace("'", "''") # Escape single quotes
|
|
385
|
+
|
|
386
|
+
if not allow_wildcards:
|
|
387
|
+
value = value.replace("%", "\\%") # Escape wildcards
|
|
388
|
+
value = value.replace("_", "\\_")
|
|
389
|
+
|
|
390
|
+
return value
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
def sanitize_html_input(value: str, allowed_tags: Optional[List[str]] = None) -> str:
|
|
394
|
+
"""Sanitize HTML input to prevent XSS.
|
|
395
|
+
|
|
396
|
+
Removes potentially dangerous HTML elements and
|
|
397
|
+
escapes HTML entities to prevent cross-site scripting.
|
|
398
|
+
|
|
399
|
+
:param value: HTML input to sanitize
|
|
400
|
+
:type value: str
|
|
401
|
+
:param allowed_tags: List of allowed HTML tags
|
|
402
|
+
:type allowed_tags: Optional[List[str]]
|
|
403
|
+
:return: Sanitized HTML safe for display
|
|
404
|
+
:rtype: str
|
|
405
|
+
"""
|
|
406
|
+
if not value:
|
|
407
|
+
return value
|
|
408
|
+
|
|
409
|
+
# Default allowed tags
|
|
410
|
+
if allowed_tags is None:
|
|
411
|
+
allowed_tags = ["b", "i", "u", "em", "strong", "p", "br"]
|
|
412
|
+
|
|
413
|
+
# Simple HTML sanitization (without bleach dependency)
|
|
414
|
+
# Remove script tags and event handlers
|
|
415
|
+
for pattern in XSS_PATTERNS:
|
|
416
|
+
value = re.sub(pattern, "", value, flags=re.IGNORECASE)
|
|
417
|
+
|
|
418
|
+
# Escape HTML entities
|
|
419
|
+
value = html.escape(value)
|
|
420
|
+
|
|
421
|
+
return value
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def sanitize_filename(filename: str) -> str:
|
|
425
|
+
"""Sanitize filename to prevent directory traversal.
|
|
426
|
+
|
|
427
|
+
Removes dangerous path components and limits filename
|
|
428
|
+
length to prevent directory traversal attacks.
|
|
429
|
+
|
|
430
|
+
:param filename: Filename to sanitize
|
|
431
|
+
:type filename: str
|
|
432
|
+
:return: Sanitized filename safe for file operations
|
|
433
|
+
:rtype: str
|
|
434
|
+
"""
|
|
435
|
+
if not filename:
|
|
436
|
+
return filename
|
|
437
|
+
|
|
438
|
+
# Remove any path components
|
|
439
|
+
filename = filename.replace("..", "")
|
|
440
|
+
filename = filename.replace("/", "")
|
|
441
|
+
filename = filename.replace("\\", "")
|
|
442
|
+
|
|
443
|
+
# Remove null bytes
|
|
444
|
+
filename = filename.replace("\x00", "")
|
|
445
|
+
|
|
446
|
+
# Limit length
|
|
447
|
+
max_length = 255
|
|
448
|
+
if len(filename) > max_length:
|
|
449
|
+
name, ext = filename.rsplit(".", 1) if "." in filename else (filename, "")
|
|
450
|
+
filename = (
|
|
451
|
+
f"{name[: max_length - len(ext) - 1]}.{ext}" if ext else name[:max_length]
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
return filename
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
def validate_url(url: str, allowed_schemes: Optional[List[str]] = None) -> str:
|
|
458
|
+
"""Validate URL to prevent various attacks.
|
|
459
|
+
|
|
460
|
+
Validates URL format and scheme to prevent
|
|
461
|
+
dangerous URLs like javascript: or data: schemes.
|
|
462
|
+
|
|
463
|
+
:param url: URL to validate
|
|
464
|
+
:type url: str
|
|
465
|
+
:param allowed_schemes: Allowed URL schemes
|
|
466
|
+
:type allowed_schemes: Optional[List[str]]
|
|
467
|
+
:return: Validated URL
|
|
468
|
+
:rtype: str
|
|
469
|
+
:raises ValidationError: If URL is invalid or dangerous
|
|
470
|
+
"""
|
|
471
|
+
if not url:
|
|
472
|
+
return url
|
|
473
|
+
|
|
474
|
+
if allowed_schemes is None:
|
|
475
|
+
allowed_schemes = ["http", "https"]
|
|
476
|
+
|
|
477
|
+
# Basic URL validation
|
|
478
|
+
url = url.strip()
|
|
479
|
+
|
|
480
|
+
# Check scheme
|
|
481
|
+
scheme = url.split(":", 1)[0].lower() if ":" in url else ""
|
|
482
|
+
if scheme not in allowed_schemes:
|
|
483
|
+
raise ValidationError(
|
|
484
|
+
f"Invalid URL scheme. Allowed: {', '.join(allowed_schemes)}",
|
|
485
|
+
field="url",
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
# Prevent javascript: and data: URLs
|
|
489
|
+
if url.lower().startswith(("javascript:", "data:", "vbscript:")):
|
|
490
|
+
raise ValidationError("Invalid URL: potentially dangerous scheme", field="url")
|
|
491
|
+
|
|
492
|
+
return url
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def sanitize_dict(
|
|
496
|
+
data: Dict[str, Any], rules: Dict[str, callable], strict: bool = False
|
|
497
|
+
) -> Dict[str, Any]:
|
|
498
|
+
"""Sanitize dictionary values based on rules.
|
|
499
|
+
|
|
500
|
+
Applies custom sanitization functions to dictionary
|
|
501
|
+
values based on field-specific rules.
|
|
502
|
+
|
|
503
|
+
:param data: Dictionary to sanitize
|
|
504
|
+
:type data: Dict[str, Any]
|
|
505
|
+
:param rules: Dictionary of field -> sanitization function
|
|
506
|
+
:type rules: Dict[str, callable]
|
|
507
|
+
:param strict: If True, reject unknown fields
|
|
508
|
+
:type strict: bool
|
|
509
|
+
:return: Sanitized dictionary
|
|
510
|
+
:rtype: Dict[str, Any]
|
|
511
|
+
:raises ValidationError: If strict mode and unknown field found
|
|
512
|
+
"""
|
|
513
|
+
sanitized = {}
|
|
514
|
+
|
|
515
|
+
for key, value in data.items():
|
|
516
|
+
if key in rules:
|
|
517
|
+
# Apply sanitization rule
|
|
518
|
+
try:
|
|
519
|
+
sanitized[key] = rules[key](value)
|
|
520
|
+
except Exception as e:
|
|
521
|
+
raise ValidationError(f"Invalid {key}: {e}", field=key)
|
|
522
|
+
elif strict:
|
|
523
|
+
raise ValidationError(f"Unknown field: {key}", field=key)
|
|
524
|
+
else:
|
|
525
|
+
# Pass through if no rule
|
|
526
|
+
sanitized[key] = value
|
|
527
|
+
|
|
528
|
+
return sanitized
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
# =============================================================================
|
|
532
|
+
# Validation Helpers
|
|
533
|
+
# =============================================================================
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def validate_email(email: str) -> str:
|
|
537
|
+
"""Validate and normalize email address.
|
|
538
|
+
|
|
539
|
+
Validates email format and normalizes to lowercase.
|
|
540
|
+
|
|
541
|
+
:param email: Email address to validate
|
|
542
|
+
:type email: str
|
|
543
|
+
:return: Normalized email address
|
|
544
|
+
:rtype: str
|
|
545
|
+
:raises ValidationError: If email format is invalid
|
|
546
|
+
"""
|
|
547
|
+
email = email.strip().lower()
|
|
548
|
+
|
|
549
|
+
# Basic email regex
|
|
550
|
+
pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
|
|
551
|
+
if not re.match(pattern, email):
|
|
552
|
+
raise ValidationError("Invalid email address", field="email")
|
|
553
|
+
|
|
554
|
+
return email
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
def validate_storage_key(key: str) -> str:
|
|
558
|
+
"""Validate storage key format.
|
|
559
|
+
|
|
560
|
+
Ensures storage keys contain only safe characters
|
|
561
|
+
and meet format requirements.
|
|
562
|
+
|
|
563
|
+
:param key: Storage key to validate
|
|
564
|
+
:type key: str
|
|
565
|
+
:return: Validated storage key
|
|
566
|
+
:rtype: str
|
|
567
|
+
:raises ValidationError: If key format is invalid
|
|
568
|
+
"""
|
|
569
|
+
if not key or not key.strip():
|
|
570
|
+
raise ValidationError("Storage key is required", field="storage_key")
|
|
571
|
+
|
|
572
|
+
# Check format (alphanumeric with underscores/hyphens)
|
|
573
|
+
if not re.match(r"^[a-zA-Z0-9_-]+$", key):
|
|
574
|
+
raise ValidationError(
|
|
575
|
+
"Invalid storage key format. Use only letters, numbers, underscores, and hyphens.",
|
|
576
|
+
field="storage_key",
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
return key.strip()
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
# =============================================================================
|
|
583
|
+
# Logging Convenience Functions
|
|
584
|
+
# =============================================================================
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
def log_headers(headers: Dict[str, Any], logger, level: str = "debug") -> None:
|
|
588
|
+
"""Log headers safely with sanitization.
|
|
589
|
+
|
|
590
|
+
Logs HTTP headers with automatic sanitization of
|
|
591
|
+
sensitive information.
|
|
592
|
+
|
|
593
|
+
:param headers: HTTP headers to log
|
|
594
|
+
:type headers: Dict[str, Any]
|
|
595
|
+
:param logger: Logger instance to use
|
|
596
|
+
:type logger: logging.Logger
|
|
597
|
+
:param level: Log level (debug, info, warning, error)
|
|
598
|
+
:type level: str
|
|
599
|
+
:return: None
|
|
600
|
+
:rtype: None
|
|
601
|
+
"""
|
|
602
|
+
safe_headers = sanitize_headers(headers)
|
|
603
|
+
getattr(logger, level)(f"Headers: {safe_headers}")
|
|
604
|
+
|
|
605
|
+
|
|
606
|
+
def log_request(url: str, headers: Dict[str, Any], body: Any, logger) -> None:
|
|
607
|
+
"""Log request details safely with sanitization.
|
|
608
|
+
|
|
609
|
+
Logs complete HTTP request information with automatic
|
|
610
|
+
sanitization of sensitive data.
|
|
611
|
+
|
|
612
|
+
:param url: Request URL
|
|
613
|
+
:type url: str
|
|
614
|
+
:param headers: Request headers
|
|
615
|
+
:type headers: Dict[str, Any]
|
|
616
|
+
:param body: Request body
|
|
617
|
+
:type body: Any
|
|
618
|
+
:param logger: Logger instance to use
|
|
619
|
+
:type logger: logging.Logger
|
|
620
|
+
:return: None
|
|
621
|
+
:rtype: None
|
|
622
|
+
"""
|
|
623
|
+
safe_url = sanitize_url(url)
|
|
624
|
+
safe_headers = sanitize_headers(headers)
|
|
625
|
+
safe_body = (
|
|
626
|
+
safe_log_dict(body) if isinstance(body, dict) else str(body)[:100] + "..."
|
|
627
|
+
)
|
|
628
|
+
logger.debug(f"Request to: {safe_url}")
|
|
629
|
+
logger.debug(f"Headers: {safe_headers}")
|
|
630
|
+
logger.debug(f"Body: {safe_body}")
|