cloudflare-images-migrator 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
src/security.py ADDED
@@ -0,0 +1,528 @@
1
+ """
2
+ Enterprise-grade security module for Cloudflare Images Migration Tool
3
+ """
4
+
5
+ import hashlib
6
+ import hmac
7
+ import time
8
+ import re
9
+ import mimetypes
10
+ from pathlib import Path
11
+ from typing import Dict, List, Optional, Tuple, Set
12
+ from PIL import Image, ImageFile
13
+ from PIL.ExifTags import TAGS
14
+ import magic
15
+ import requests
16
+
17
+ from .utils import get_file_hash, get_file_size_mb
18
+
19
+
20
+ class SecurityValidator:
21
+ """Enterprise-grade security validator for image uploads."""
22
+
23
+ def __init__(self, config, logger=None):
24
+ self.config = config
25
+ self.logger = logger
26
+
27
+ # Security configurations
28
+ self.max_file_size_mb = getattr(config, 'max_file_size_mb', 10)
29
+ self.allowed_mime_types = {
30
+ 'image/png', 'image/jpeg', 'image/gif', 'image/webp',
31
+ 'image/svg+xml', 'image/bmp', 'image/x-icon'
32
+ }
33
+ self.allowed_extensions = {
34
+ '.png', '.jpg', '.jpeg', '.gif', '.webp', '.svg', '.bmp', '.ico'
35
+ }
36
+
37
+ # Advanced security settings
38
+ self.max_image_dimensions = (12000, 12000) # Cloudflare limit
39
+ self.max_image_area = 100_000_000 # 100 megapixels
40
+ self.min_image_dimensions = (1, 1)
41
+
42
+ # Malicious pattern detection
43
+ self.suspicious_patterns = [
44
+ rb'<script',
45
+ rb'javascript:',
46
+ rb'data:text/html',
47
+ rb'<?php',
48
+ rb'<%',
49
+ rb'eval\s*\(',
50
+ rb'document\.cookie',
51
+ rb'window\.location'
52
+ ]
53
+
54
+ # Rate limiting
55
+ self.upload_timestamps = []
56
+ self.max_uploads_per_minute = 60
57
+
58
+ # Content validation
59
+ self.enable_deep_scan = True
60
+ self.quarantine_suspicious = True
61
+
62
+ def validate_file_security(self, file_path: Path) -> Dict[str, any]:
63
+ """
64
+ Comprehensive security validation of an image file.
65
+
66
+ Returns:
67
+ Dict with validation results and security metadata
68
+ """
69
+ result = {
70
+ 'is_safe': True,
71
+ 'security_level': 'HIGH',
72
+ 'issues': [],
73
+ 'metadata': {},
74
+ 'content_hash': '',
75
+ 'file_signature': '',
76
+ 'recommendations': []
77
+ }
78
+
79
+ try:
80
+ # 1. File existence and basic checks
81
+ if not file_path.exists():
82
+ result['is_safe'] = False
83
+ result['issues'].append('File does not exist')
84
+ return result
85
+
86
+ # 2. File size validation
87
+ file_size_mb = get_file_size_mb(file_path)
88
+ if file_size_mb > self.max_file_size_mb:
89
+ result['is_safe'] = False
90
+ result['issues'].append(f'File too large: {file_size_mb:.2f}MB > {self.max_file_size_mb}MB')
91
+
92
+ # 3. File extension validation
93
+ if file_path.suffix.lower() not in self.allowed_extensions:
94
+ result['is_safe'] = False
95
+ result['issues'].append(f'Invalid file extension: {file_path.suffix}')
96
+
97
+ # 4. MIME type validation using python-magic
98
+ try:
99
+ file_mime = magic.from_file(str(file_path), mime=True)
100
+ result['metadata']['detected_mime'] = file_mime
101
+
102
+ if file_mime not in self.allowed_mime_types:
103
+ result['is_safe'] = False
104
+ result['issues'].append(f'Invalid MIME type: {file_mime}')
105
+ except Exception as e:
106
+ result['issues'].append(f'MIME detection failed: {str(e)}')
107
+
108
+ # 5. File signature validation
109
+ file_signature = self._get_file_signature(file_path)
110
+ result['file_signature'] = file_signature
111
+
112
+ if not self._validate_file_signature(file_path, file_signature):
113
+ result['is_safe'] = False
114
+ result['issues'].append('File signature mismatch')
115
+
116
+ # 6. Content hash for integrity
117
+ result['content_hash'] = get_file_hash(file_path)
118
+
119
+ # 7. Deep content scanning
120
+ if self.enable_deep_scan:
121
+ scan_results = self._deep_content_scan(file_path)
122
+ if not scan_results['is_clean']:
123
+ result['is_safe'] = False
124
+ result['security_level'] = 'CRITICAL'
125
+ result['issues'].extend(scan_results['threats'])
126
+
127
+ # 8. Image-specific validation
128
+ if file_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp']:
129
+ img_validation = self._validate_image_content(file_path)
130
+ result['metadata'].update(img_validation['metadata'])
131
+
132
+ if not img_validation['is_valid']:
133
+ result['is_safe'] = False
134
+ result['issues'].extend(img_validation['issues'])
135
+
136
+ # 9. Security recommendations
137
+ result['recommendations'] = self._generate_security_recommendations(result)
138
+
139
+ # 10. Final security level assessment
140
+ if len(result['issues']) == 0:
141
+ result['security_level'] = 'HIGH'
142
+ elif len(result['issues']) <= 2 and result['is_safe']:
143
+ result['security_level'] = 'MEDIUM'
144
+ else:
145
+ result['security_level'] = 'LOW'
146
+
147
+ self._log_security_event(file_path, result)
148
+
149
+ except Exception as e:
150
+ result['is_safe'] = False
151
+ result['security_level'] = 'UNKNOWN'
152
+ result['issues'].append(f'Security validation error: {str(e)}')
153
+
154
+ return result
155
+
156
+ def validate_url_security(self, url: str) -> Dict[str, any]:
157
+ """
158
+ Validate security of external image URLs.
159
+
160
+ Returns:
161
+ Dict with URL security validation results
162
+ """
163
+ result = {
164
+ 'is_safe': True,
165
+ 'security_level': 'HIGH',
166
+ 'issues': [],
167
+ 'metadata': {},
168
+ 'recommendations': []
169
+ }
170
+
171
+ try:
172
+ # 1. URL format validation
173
+ if not self._is_valid_url_format(url):
174
+ result['is_safe'] = False
175
+ result['issues'].append('Invalid URL format')
176
+
177
+ # 2. Protocol validation (must be HTTPS for security)
178
+ if not url.lower().startswith('https://'):
179
+ result['security_level'] = 'MEDIUM'
180
+ result['issues'].append('Non-HTTPS URL - security risk')
181
+ result['recommendations'].append('Use HTTPS URLs for better security')
182
+
183
+ # 3. Domain validation
184
+ domain_check = self._validate_domain_security(url)
185
+ if not domain_check['is_safe']:
186
+ result['is_safe'] = False
187
+ result['security_level'] = 'LOW'
188
+ result['issues'].extend(domain_check['issues'])
189
+
190
+ # 4. Content-Type validation via HEAD request
191
+ try:
192
+ head_response = requests.head(url, timeout=10, allow_redirects=True)
193
+ content_type = head_response.headers.get('content-type', '').lower()
194
+
195
+ result['metadata']['content_type'] = content_type
196
+ result['metadata']['status_code'] = head_response.status_code
197
+
198
+ if not any(mime in content_type for mime in self.allowed_mime_types):
199
+ result['is_safe'] = False
200
+ result['issues'].append(f'Invalid content type: {content_type}')
201
+
202
+ except Exception as e:
203
+ result['issues'].append(f'URL validation failed: {str(e)}')
204
+
205
+ # 5. Rate limiting check
206
+ if not self._check_rate_limit():
207
+ result['is_safe'] = False
208
+ result['security_level'] = 'LOW'
209
+ result['issues'].append('Rate limit exceeded')
210
+
211
+ except Exception as e:
212
+ result['is_safe'] = False
213
+ result['security_level'] = 'UNKNOWN'
214
+ result['issues'].append(f'URL security validation error: {str(e)}')
215
+
216
+ return result
217
+
218
+ def _get_file_signature(self, file_path: Path) -> str:
219
+ """Get file signature (magic bytes) for validation."""
220
+ try:
221
+ with open(file_path, 'rb') as f:
222
+ signature = f.read(16).hex()
223
+ return signature
224
+ except Exception:
225
+ return ""
226
+
227
+ def _validate_file_signature(self, file_path: Path, signature: str) -> bool:
228
+ """Validate file signature matches expected format."""
229
+ ext = file_path.suffix.lower()
230
+
231
+ # Known file signatures
232
+ signatures = {
233
+ '.png': ['89504e47'],
234
+ '.jpg': ['ffd8ffe0', 'ffd8ffe1', 'ffd8ffe2', 'ffd8ffe3'],
235
+ '.jpeg': ['ffd8ffe0', 'ffd8ffe1', 'ffd8ffe2', 'ffd8ffe3'],
236
+ '.gif': ['47494638'],
237
+ '.webp': ['52494646'],
238
+ '.bmp': ['424d'],
239
+ '.ico': ['00000100']
240
+ }
241
+
242
+ if ext in signatures:
243
+ return any(signature.lower().startswith(sig) for sig in signatures[ext])
244
+
245
+ return True # Allow unknown extensions for now
246
+
247
+ def _deep_content_scan(self, file_path: Path) -> Dict[str, any]:
248
+ """Perform deep content scanning for malicious patterns."""
249
+ result = {
250
+ 'is_clean': True,
251
+ 'threats': [],
252
+ 'scan_details': {}
253
+ }
254
+
255
+ try:
256
+ # Read file in chunks to avoid memory issues
257
+ chunk_size = 8192
258
+ with open(file_path, 'rb') as f:
259
+ while chunk := f.read(chunk_size):
260
+ # Check for suspicious patterns
261
+ for pattern in self.suspicious_patterns:
262
+ if re.search(pattern, chunk, re.IGNORECASE):
263
+ result['is_clean'] = False
264
+ result['threats'].append(f'Suspicious pattern detected: {pattern.decode("utf-8", errors="ignore")}')
265
+
266
+ # Additional checks for specific file types
267
+ if file_path.suffix.lower() == '.svg':
268
+ result.update(self._scan_svg_content(file_path))
269
+
270
+ except Exception as e:
271
+ result['threats'].append(f'Content scan error: {str(e)}')
272
+
273
+ return result
274
+
275
+ def _scan_svg_content(self, file_path: Path) -> Dict[str, any]:
276
+ """Special scanning for SVG files (potential XSS risks)."""
277
+ result = {'is_clean': True, 'threats': []}
278
+
279
+ try:
280
+ content = file_path.read_text(encoding='utf-8')
281
+
282
+ # SVG-specific threats
283
+ svg_threats = [
284
+ r'<script[^>]*>',
285
+ r'javascript:',
286
+ r'onload\s*=',
287
+ r'onerror\s*=',
288
+ r'onclick\s*=',
289
+ r'onmouseover\s*=',
290
+ r'<iframe',
291
+ r'<object',
292
+ r'<embed'
293
+ ]
294
+
295
+ for threat in svg_threats:
296
+ if re.search(threat, content, re.IGNORECASE):
297
+ result['is_clean'] = False
298
+ result['threats'].append(f'SVG security threat: {threat}')
299
+
300
+ except Exception as e:
301
+ result['threats'].append(f'SVG scan error: {str(e)}')
302
+
303
+ return result
304
+
305
+ def _validate_image_content(self, file_path: Path) -> Dict[str, any]:
306
+ """Validate image content and extract metadata."""
307
+ result = {
308
+ 'is_valid': True,
309
+ 'issues': [],
310
+ 'metadata': {}
311
+ }
312
+
313
+ try:
314
+ # Prevent decompression bomb attacks
315
+ ImageFile.LOAD_TRUNCATED_IMAGES = True
316
+ Image.MAX_IMAGE_PIXELS = self.max_image_area
317
+
318
+ with Image.open(file_path) as img:
319
+ # Basic image properties
320
+ result['metadata']['format'] = img.format
321
+ result['metadata']['mode'] = img.mode
322
+ result['metadata']['size'] = img.size
323
+ result['metadata']['width'] = img.width
324
+ result['metadata']['height'] = img.height
325
+
326
+ # Dimension validation
327
+ if img.width > self.max_image_dimensions[0] or img.height > self.max_image_dimensions[1]:
328
+ result['is_valid'] = False
329
+ result['issues'].append(f'Image dimensions too large: {img.size}')
330
+
331
+ if img.width < self.min_image_dimensions[0] or img.height < self.min_image_dimensions[1]:
332
+ result['is_valid'] = False
333
+ result['issues'].append(f'Image dimensions too small: {img.size}')
334
+
335
+ # Area validation
336
+ area = img.width * img.height
337
+ if area > self.max_image_area:
338
+ result['is_valid'] = False
339
+ result['issues'].append(f'Image area too large: {area} pixels')
340
+
341
+ # EXIF data extraction and sanitization
342
+ exif_data = self._extract_safe_exif(img)
343
+ result['metadata']['exif'] = exif_data
344
+
345
+ # Check for suspicious EXIF data
346
+ if self._has_suspicious_exif(exif_data):
347
+ result['issues'].append('Suspicious EXIF data detected')
348
+
349
+ except Exception as e:
350
+ result['is_valid'] = False
351
+ result['issues'].append(f'Image validation error: {str(e)}')
352
+
353
+ return result
354
+
355
+ def _extract_safe_exif(self, img: Image.Image) -> Dict:
356
+ """Extract safe EXIF data, excluding potentially sensitive information."""
357
+ safe_exif = {}
358
+
359
+ try:
360
+ exif = img._getexif()
361
+ if exif:
362
+ # Only extract safe, useful metadata
363
+ safe_tags = {
364
+ 'DateTime', 'DateTimeOriginal', 'ColorSpace', 'ExifImageWidth',
365
+ 'ExifImageHeight', 'Orientation', 'Software'
366
+ }
367
+
368
+ for tag_id, value in exif.items():
369
+ tag = TAGS.get(tag_id, tag_id)
370
+ if tag in safe_tags and isinstance(value, (str, int, float)):
371
+ safe_exif[tag] = value
372
+
373
+ except Exception:
374
+ pass # EXIF extraction is optional
375
+
376
+ return safe_exif
377
+
378
+ def _has_suspicious_exif(self, exif_data: Dict) -> bool:
379
+ """Check for suspicious EXIF data patterns."""
380
+ # Check for suspicious software signatures
381
+ suspicious_software = ['steganography', 'hidden', 'inject', 'payload']
382
+
383
+ software = exif_data.get('Software', '').lower()
384
+ return any(sus in software for sus in suspicious_software)
385
+
386
+ def _is_valid_url_format(self, url: str) -> bool:
387
+ """Validate URL format."""
388
+ url_pattern = re.compile(
389
+ r'^https?://' # http:// or https://
390
+ r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|' # domain...
391
+ r'localhost|' # localhost...
392
+ r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
393
+ r'(?::\d+)?' # optional port
394
+ r'(?:/?|[/?]\S+)$', re.IGNORECASE)
395
+ return url_pattern.match(url) is not None
396
+
397
+ def _validate_domain_security(self, url: str) -> Dict[str, any]:
398
+ """Validate domain security and reputation."""
399
+ result = {'is_safe': True, 'issues': []}
400
+
401
+ try:
402
+ from urllib.parse import urlparse
403
+ parsed = urlparse(url)
404
+ domain = parsed.netloc.lower()
405
+
406
+ # Check against known malicious domains (simplified)
407
+ malicious_patterns = [
408
+ 'malware', 'phishing', 'suspicious', 'temp', 'throwaway'
409
+ ]
410
+
411
+ if any(pattern in domain for pattern in malicious_patterns):
412
+ result['is_safe'] = False
413
+ result['issues'].append(f'Suspicious domain: {domain}')
414
+
415
+ # IP address check (generally less trusted)
416
+ if re.match(r'^\d+\.\d+\.\d+\.\d+$', domain):
417
+ result['issues'].append('Direct IP address used - lower trust level')
418
+
419
+ except Exception as e:
420
+ result['issues'].append(f'Domain validation error: {str(e)}')
421
+
422
+ return result
423
+
424
+ def _check_rate_limit(self) -> bool:
425
+ """Check if we're within rate limits."""
426
+ current_time = time.time()
427
+
428
+ # Remove timestamps older than 1 minute
429
+ self.upload_timestamps = [
430
+ ts for ts in self.upload_timestamps
431
+ if current_time - ts < 60
432
+ ]
433
+
434
+ # Check if we're under the limit
435
+ if len(self.upload_timestamps) >= self.max_uploads_per_minute:
436
+ return False
437
+
438
+ # Add current timestamp
439
+ self.upload_timestamps.append(current_time)
440
+ return True
441
+
442
+ def _generate_security_recommendations(self, validation_result: Dict) -> List[str]:
443
+ """Generate security recommendations based on validation results."""
444
+ recommendations = []
445
+
446
+ if validation_result['security_level'] != 'HIGH':
447
+ recommendations.append('Consider additional security scanning')
448
+
449
+ if any('Non-HTTPS' in issue for issue in validation_result['issues']):
450
+ recommendations.append('Use HTTPS URLs for secure image delivery')
451
+
452
+ if any('large' in issue.lower() for issue in validation_result['issues']):
453
+ recommendations.append('Optimize image size before upload')
454
+
455
+ recommendations.append('Regularly update security policies')
456
+ recommendations.append('Monitor upload patterns for anomalies')
457
+
458
+ return recommendations
459
+
460
+ def _log_security_event(self, file_path: Path, result: Dict):
461
+ """Log security validation events for audit trail."""
462
+ if self.logger:
463
+ level = 'warning' if not result['is_safe'] else 'info'
464
+ message = f"Security scan: {file_path.name} - {result['security_level']} - Issues: {len(result['issues'])}"
465
+ getattr(self.logger, level)(message)
466
+
467
+ def generate_security_report(self, validations: List[Dict]) -> Dict:
468
+ """Generate comprehensive security report."""
469
+ report = {
470
+ 'total_files': len(validations),
471
+ 'safe_files': sum(1 for v in validations if v['is_safe']),
472
+ 'security_levels': {'HIGH': 0, 'MEDIUM': 0, 'LOW': 0, 'CRITICAL': 0, 'UNKNOWN': 0},
473
+ 'common_issues': {},
474
+ 'recommendations': set()
475
+ }
476
+
477
+ for validation in validations:
478
+ report['security_levels'][validation['security_level']] += 1
479
+
480
+ for issue in validation['issues']:
481
+ report['common_issues'][issue] = report['common_issues'].get(issue, 0) + 1
482
+
483
+ report['recommendations'].update(validation['recommendations'])
484
+
485
+ report['recommendations'] = list(report['recommendations'])
486
+ report['security_score'] = (report['safe_files'] / report['total_files'] * 100) if report['total_files'] > 0 else 0
487
+
488
+ return report
489
+
490
+
491
+ class SecureUploadManager:
492
+ """Manages secure upload workflows with Direct Creator Upload."""
493
+
494
+ def __init__(self, cloudflare_client, security_validator, logger=None):
495
+ self.cf_client = cloudflare_client
496
+ self.security_validator = security_validator
497
+ self.logger = logger
498
+
499
+ def create_secure_upload_url(self, custom_metadata: Dict = None) -> Dict:
500
+ """
501
+ Create a Direct Creator Upload URL for secure uploads.
502
+
503
+ Returns:
504
+ Dict with secure upload URL and metadata
505
+ """
506
+ try:
507
+ # This would integrate with Cloudflare's Direct Creator Upload API
508
+ # For now, we'll use the standard upload with enhanced security
509
+
510
+ upload_token = self._generate_upload_token()
511
+
512
+ return {
513
+ 'upload_url': 'https://upload.imagedelivery.net/direct',
514
+ 'upload_token': upload_token,
515
+ 'expires_at': int(time.time()) + 3600, # 1 hour
516
+ 'metadata': custom_metadata or {}
517
+ }
518
+
519
+ except Exception as e:
520
+ if self.logger:
521
+ self.logger.error(f"Failed to create secure upload URL: {str(e)}")
522
+ return None
523
+
524
+ def _generate_upload_token(self) -> str:
525
+ """Generate secure upload token."""
526
+ timestamp = str(int(time.time()))
527
+ random_data = hashlib.sha256(f"{timestamp}{time.time()}".encode()).hexdigest()[:16]
528
+ return f"secure_{timestamp}_{random_data}"