iflow-mcp_democratize-technology-chronos-mcp 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. chronos_mcp/__init__.py +5 -0
  2. chronos_mcp/__main__.py +9 -0
  3. chronos_mcp/accounts.py +410 -0
  4. chronos_mcp/bulk.py +946 -0
  5. chronos_mcp/caldav_utils.py +149 -0
  6. chronos_mcp/calendars.py +204 -0
  7. chronos_mcp/config.py +187 -0
  8. chronos_mcp/credentials.py +190 -0
  9. chronos_mcp/events.py +515 -0
  10. chronos_mcp/exceptions.py +477 -0
  11. chronos_mcp/journals.py +477 -0
  12. chronos_mcp/logging_config.py +23 -0
  13. chronos_mcp/models.py +202 -0
  14. chronos_mcp/py.typed +0 -0
  15. chronos_mcp/rrule.py +259 -0
  16. chronos_mcp/search.py +315 -0
  17. chronos_mcp/server.py +121 -0
  18. chronos_mcp/tasks.py +518 -0
  19. chronos_mcp/tools/__init__.py +29 -0
  20. chronos_mcp/tools/accounts.py +151 -0
  21. chronos_mcp/tools/base.py +59 -0
  22. chronos_mcp/tools/bulk.py +557 -0
  23. chronos_mcp/tools/calendars.py +142 -0
  24. chronos_mcp/tools/events.py +698 -0
  25. chronos_mcp/tools/journals.py +310 -0
  26. chronos_mcp/tools/tasks.py +414 -0
  27. chronos_mcp/utils.py +163 -0
  28. chronos_mcp/validation.py +636 -0
  29. iflow_mcp_democratize_technology_chronos_mcp-2.0.0.dist-info/METADATA +299 -0
  30. iflow_mcp_democratize_technology_chronos_mcp-2.0.0.dist-info/RECORD +68 -0
  31. iflow_mcp_democratize_technology_chronos_mcp-2.0.0.dist-info/WHEEL +5 -0
  32. iflow_mcp_democratize_technology_chronos_mcp-2.0.0.dist-info/entry_points.txt +2 -0
  33. iflow_mcp_democratize_technology_chronos_mcp-2.0.0.dist-info/licenses/LICENSE +21 -0
  34. iflow_mcp_democratize_technology_chronos_mcp-2.0.0.dist-info/top_level.txt +2 -0
  35. tests/__init__.py +0 -0
  36. tests/conftest.py +91 -0
  37. tests/unit/__init__.py +0 -0
  38. tests/unit/test_accounts.py +380 -0
  39. tests/unit/test_accounts_ssrf.py +134 -0
  40. tests/unit/test_base.py +135 -0
  41. tests/unit/test_bulk.py +380 -0
  42. tests/unit/test_bulk_create.py +408 -0
  43. tests/unit/test_bulk_delete.py +341 -0
  44. tests/unit/test_bulk_resource_limits.py +74 -0
  45. tests/unit/test_caldav_utils.py +300 -0
  46. tests/unit/test_calendars.py +286 -0
  47. tests/unit/test_config.py +111 -0
  48. tests/unit/test_config_validation.py +128 -0
  49. tests/unit/test_credentials_security.py +189 -0
  50. tests/unit/test_cryptography_security.py +178 -0
  51. tests/unit/test_events.py +536 -0
  52. tests/unit/test_exceptions.py +58 -0
  53. tests/unit/test_journals.py +1097 -0
  54. tests/unit/test_models.py +95 -0
  55. tests/unit/test_race_conditions.py +202 -0
  56. tests/unit/test_recurring_events.py +156 -0
  57. tests/unit/test_rrule.py +217 -0
  58. tests/unit/test_search.py +372 -0
  59. tests/unit/test_search_advanced.py +333 -0
  60. tests/unit/test_server_input_validation.py +219 -0
  61. tests/unit/test_ssrf_protection.py +505 -0
  62. tests/unit/test_tasks.py +918 -0
  63. tests/unit/test_thread_safety.py +301 -0
  64. tests/unit/test_tools_journals.py +617 -0
  65. tests/unit/test_tools_tasks.py +968 -0
  66. tests/unit/test_url_validation_security.py +234 -0
  67. tests/unit/test_utils.py +180 -0
  68. tests/unit/test_validation.py +983 -0
@@ -0,0 +1,636 @@
1
+ """Input validation for Chronos MCP with SSRF protection.
2
+
3
+ This module provides comprehensive input validation for CalDAV operations,
4
+ including enhanced URL validation with Server-Side Request Forgery (SSRF)
5
+ protection. By default, URLs pointing to localhost, private IP ranges, and
6
+ other potentially dangerous addresses are blocked to prevent SSRF attacks.
7
+
8
+ Security Features:
9
+ - SSRF Protection: Blocks requests to localhost, private IPs, and link-local addresses
10
+ - HTTPS Enforcement: Only HTTPS URLs are allowed for CalDAV connections
11
+ - Pattern Validation: Prevents injection attacks through input sanitization
12
+ - DNS Resolution: Validates that domains don't resolve to private IPs
13
+
14
+ For local development or trusted environments, SSRF protection can be
15
+ disabled by setting allow_private_ips=True when calling validate_url().
16
+ """
17
+
18
+ import ipaddress
19
+ import re
20
+ import socket
21
+ import unicodedata
22
+ from datetime import datetime
23
+ from typing import Any, Dict, List, Optional
24
+ from urllib.parse import urlparse
25
+
26
+ from .exceptions import ValidationError
27
+ from .models import TaskStatus
28
+
29
+
30
+ class InputValidator:
31
+ """Comprehensive input validation for CalDAV operations."""
32
+
33
+ # SSRF Protection - Private IP ranges that should be blocked
34
+ PRIVATE_IP_RANGES = [
35
+ ipaddress.ip_network("10.0.0.0/8"), # Class A private
36
+ ipaddress.ip_network("172.16.0.0/12"), # Class B private
37
+ ipaddress.ip_network("192.168.0.0/16"), # Class C private
38
+ ipaddress.ip_network("127.0.0.0/8"), # Loopback
39
+ ipaddress.ip_network("169.254.0.0/16"), # Link-local
40
+ ipaddress.ip_network("::1/128"), # IPv6 loopback
41
+ ipaddress.ip_network("fe80::/10"), # IPv6 link-local
42
+ ipaddress.ip_network("fc00::/7"), # IPv6 private
43
+ ]
44
+
45
+ # SSRF Protection - Blocked hostnames
46
+ BLOCKED_HOSTNAMES = [
47
+ "localhost",
48
+ "localhost.localdomain",
49
+ "127.0.0.1",
50
+ "0.0.0.0",
51
+ "::1",
52
+ "::ffff:127.0.0.1",
53
+ ]
54
+
55
+ MAX_LENGTHS = {
56
+ "summary": 255,
57
+ "description": 5000,
58
+ "location": 255,
59
+ "uid": 255,
60
+ "attendee_email": 254,
61
+ "url": 2048,
62
+ "alias": 50,
63
+ "calendar_name": 100,
64
+ }
65
+
66
+ PATTERNS = {
67
+ "uid": re.compile(r"^[a-zA-Z0-9\-_.@]+$"),
68
+ "email": re.compile(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"),
69
+ "url": re.compile(
70
+ r"^https://(?:[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.[a-zA-Z]{2,}|[a-zA-Z0-9-]+|localhost|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))(?::(?:[1-9][0-9]{0,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5]))?(?:/[^\s]*)?$"
71
+ ),
72
+ "color": re.compile(r"^#[0-9A-Fa-f]{6}$"),
73
+ }
74
+
75
+ # ReDoS-safe patterns with simplified regex and input length limits
76
+ MAX_VALIDATION_LENGTH = 10000 # Pre-filter before regex validation
77
+
78
+ DANGEROUS_PATTERNS = [
79
+ # Script tags (simplified, non-backtracking)
80
+ re.compile(r"<script\b", re.IGNORECASE),
81
+ re.compile(r"</script\s*>", re.IGNORECASE),
82
+ # JavaScript protocols (simplified)
83
+ re.compile(r"javascript\s*:", re.IGNORECASE),
84
+ re.compile(r"vbscript\s*:", re.IGNORECASE),
85
+ re.compile(r"data\s*:", re.IGNORECASE),
86
+ # Event handlers (simplified, non-greedy)
87
+ re.compile(r"\bon\w+\s*=", re.IGNORECASE),
88
+ # Dangerous HTML elements (simplified)
89
+ re.compile(
90
+ r"<(?:iframe|frame|object|embed|applet|form|meta|link)\b", re.IGNORECASE
91
+ ),
92
+ # Expression and eval patterns (simplified)
93
+ re.compile(r"\bexpression\s*\(", re.IGNORECASE),
94
+ re.compile(r"\beval\s*\(", re.IGNORECASE),
95
+ re.compile(r"\bsetTimeout\s*\(", re.IGNORECASE),
96
+ re.compile(r"\bsetInterval\s*\(", re.IGNORECASE),
97
+ # Control characters (unchanged - safe pattern)
98
+ re.compile(r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]"),
99
+ # Encoded patterns (simplified)
100
+ re.compile(r"&#[xX]?[0-9a-fA-F]+;"),
101
+ re.compile(r"%[0-9a-fA-F]{2}"),
102
+ re.compile(r"\\u[0-9a-fA-F]{4}", re.IGNORECASE),
103
+ # CSS injection (simplified)
104
+ re.compile(r"@import\b", re.IGNORECASE),
105
+ # SVG patterns (simplified)
106
+ re.compile(r"<svg\b", re.IGNORECASE),
107
+ re.compile(r"<foreignobject\b", re.IGNORECASE),
108
+ ]
109
+
110
+ @classmethod
111
+ def validate_event(cls, event_data: Dict[str, Any]) -> Dict[str, Any]:
112
+ """Validate and sanitize event data."""
113
+ sanitized = {}
114
+
115
+ if not event_data.get("summary"):
116
+ raise ValidationError("Event summary is required")
117
+ if not event_data.get("dtstart"):
118
+ raise ValidationError("Event start time is required")
119
+ if not event_data.get("dtend"):
120
+ raise ValidationError("Event end time is required")
121
+
122
+ sanitized["summary"] = cls.validate_text_field(
123
+ event_data["summary"], "summary", required=True
124
+ )
125
+
126
+ if "description" in event_data:
127
+ sanitized["description"] = cls.validate_text_field(
128
+ event_data["description"], "description"
129
+ )
130
+
131
+ if "location" in event_data:
132
+ sanitized["location"] = cls.validate_text_field(
133
+ event_data["location"], "location"
134
+ )
135
+
136
+ sanitized["dtstart"] = cls.validate_datetime(event_data["dtstart"], "dtstart")
137
+ sanitized["dtend"] = cls.validate_datetime(event_data["dtend"], "dtend")
138
+
139
+ if sanitized["dtend"] <= sanitized["dtstart"]:
140
+ raise ValidationError("Event end time must be after start time")
141
+
142
+ if "uid" in event_data:
143
+ sanitized["uid"] = cls.validate_uid(event_data["uid"])
144
+
145
+ if "attendees" in event_data:
146
+ sanitized["attendees"] = cls.validate_attendees(event_data["attendees"])
147
+
148
+ if "recurrence_rule" in event_data:
149
+ sanitized["recurrence_rule"] = cls.validate_rrule(
150
+ event_data["recurrence_rule"]
151
+ )
152
+
153
+ return sanitized
154
+
155
+ @classmethod
156
+ def _decode_and_normalize(cls, value: str) -> str:
157
+ """Decode and normalize potentially obfuscated content for pattern matching"""
158
+ import urllib.parse
159
+
160
+ # Create a copy for testing (don't modify original)
161
+ test_value = value
162
+
163
+ # Decode common encodings
164
+ try:
165
+ # HTML entities
166
+ import html
167
+
168
+ test_value = html.unescape(test_value)
169
+
170
+ # URL encoding
171
+ test_value = urllib.parse.unquote(test_value)
172
+
173
+ # Unicode escapes
174
+ test_value = test_value.encode().decode("unicode_escape", errors="ignore")
175
+
176
+ except Exception:
177
+ # If decoding fails, use original value
178
+ test_value = value
179
+
180
+ return test_value
181
+
182
+ @classmethod
183
+ def validate_text_field(
184
+ cls, value: str, field_name: str, required: bool = False
185
+ ) -> str:
186
+ """Validate and sanitize text fields."""
187
+ if not value and required:
188
+ raise ValidationError(f"{field_name} is required")
189
+
190
+ if not value:
191
+ return ""
192
+
193
+ value = str(value).strip()
194
+
195
+ # Pre-filter: Reject extremely long inputs before regex validation
196
+ if len(value) > cls.MAX_VALIDATION_LENGTH:
197
+ raise ValidationError(
198
+ f"{field_name} exceeds maximum validation length of {cls.MAX_VALIDATION_LENGTH} characters"
199
+ )
200
+
201
+ max_length = cls.MAX_LENGTHS.get(field_name, 1000)
202
+ if len(value) > max_length:
203
+ raise ValidationError(
204
+ f"{field_name} exceeds maximum length of {max_length} characters"
205
+ )
206
+
207
+ # Normalize Unicode
208
+ value = unicodedata.normalize("NFKC", value)
209
+
210
+ # Check for dangerous patterns on both original and decoded versions
211
+ test_values = [value, cls._decode_and_normalize(value)]
212
+
213
+ for test_val in test_values:
214
+ # Additional length check after decoding
215
+ if len(test_val) > cls.MAX_VALIDATION_LENGTH:
216
+ raise ValidationError(
217
+ f"{field_name} contains excessively long decoded content"
218
+ )
219
+
220
+ for pattern in cls.DANGEROUS_PATTERNS:
221
+ if pattern.search(test_val):
222
+ raise ValidationError(
223
+ f"{field_name} contains potentially dangerous content"
224
+ )
225
+
226
+ # NOTE: HTML escaping removed - should happen at display layer, not storage
227
+ # CalDAV expects unescaped data
228
+
229
+ return value
230
+
231
+ @classmethod
232
+ def validate_datetime(cls, value: Any, field_name: str) -> datetime:
233
+ """Validate datetime values."""
234
+ if isinstance(value, datetime):
235
+ return value
236
+
237
+ if isinstance(value, str):
238
+ try:
239
+ cleaned = value.replace("Z", "+00:00")
240
+ return datetime.fromisoformat(cleaned)
241
+ except ValueError:
242
+ raise ValidationError(f"Invalid datetime format for {field_name}")
243
+
244
+ raise ValidationError(f"{field_name} must be a datetime or ISO format string")
245
+
246
+ @classmethod
247
+ def validate_uid(cls, uid: str) -> str:
248
+ """Validate UID format."""
249
+ if not uid:
250
+ raise ValidationError("UID cannot be empty")
251
+
252
+ if len(uid) > cls.MAX_LENGTHS["uid"]:
253
+ raise ValidationError(
254
+ f"UID exceeds maximum length of {cls.MAX_LENGTHS['uid']}"
255
+ )
256
+
257
+ if not cls.PATTERNS["uid"].match(uid):
258
+ raise ValidationError(
259
+ "UID contains invalid characters. "
260
+ "Only alphanumeric, dash, underscore, dot, and @ are allowed"
261
+ )
262
+
263
+ return uid
264
+
265
+ @classmethod
266
+ def validate_email(cls, email: str) -> str:
267
+ """Validate email address."""
268
+ email = email.strip().lower()
269
+
270
+ if len(email) > cls.MAX_LENGTHS["attendee_email"]:
271
+ raise ValidationError("Email address too long")
272
+
273
+ if not cls.PATTERNS["email"].match(email):
274
+ raise ValidationError(f"Invalid email address format: {email}")
275
+
276
+ return email
277
+
278
+ @classmethod
279
+ def validate_attendees(
280
+ cls, attendees: List[Dict[str, Any]]
281
+ ) -> List[Dict[str, Any]]:
282
+ """Validate attendee list."""
283
+ if not isinstance(attendees, list):
284
+ raise ValidationError("Attendees must be a list")
285
+
286
+ validated = []
287
+
288
+ for attendee in attendees:
289
+ if not isinstance(attendee, dict):
290
+ raise ValidationError("Each attendee must be a dictionary")
291
+
292
+ if "email" not in attendee:
293
+ raise ValidationError("Attendee email is required")
294
+
295
+ validated_attendee = {"email": cls.validate_email(attendee["email"])}
296
+
297
+ if "name" in attendee:
298
+ validated_attendee["name"] = cls.validate_text_field(
299
+ attendee["name"], "attendee_name"
300
+ )
301
+
302
+ # Preserve other attendee fields
303
+ for field in ["role", "status", "rsvp"]:
304
+ if field in attendee:
305
+ if field == "role":
306
+ valid_roles = [
307
+ "CHAIR",
308
+ "REQ-PARTICIPANT",
309
+ "OPT-PARTICIPANT",
310
+ "NON-PARTICIPANT",
311
+ ]
312
+ if attendee[field] not in valid_roles:
313
+ raise ValidationError(
314
+ f"Invalid attendee role: {attendee[field]}"
315
+ )
316
+ validated_attendee[field] = attendee[field]
317
+
318
+ validated.append(validated_attendee)
319
+
320
+ return validated
321
+
322
+ @classmethod
323
+ def validate_rrule(cls, rrule: str) -> str:
324
+ """Validate recurrence rule."""
325
+ rrule = rrule.strip().upper()
326
+
327
+ if not rrule.startswith("FREQ="):
328
+ raise ValidationError("RRULE must start with FREQ=")
329
+
330
+ valid_freqs = ["DAILY", "WEEKLY", "MONTHLY", "YEARLY"]
331
+ freq_match = re.match(r"FREQ=(\w+)", rrule)
332
+ if not freq_match or freq_match.group(1) not in valid_freqs:
333
+ raise ValidationError(f"Invalid frequency. Must be one of: {valid_freqs}")
334
+
335
+ if len(rrule) > 500:
336
+ raise ValidationError("RRULE too complex (exceeds 500 characters)")
337
+
338
+ return rrule
339
+
340
+ @classmethod
341
+ def validate_task(cls, task_data: Dict[str, Any]) -> Dict[str, Any]:
342
+ """Validate and sanitize task data."""
343
+ sanitized = {}
344
+
345
+ if not task_data.get("summary"):
346
+ raise ValidationError("Task summary is required")
347
+
348
+ sanitized["summary"] = cls.validate_text_field(
349
+ task_data["summary"], "summary", required=True
350
+ )
351
+
352
+ if "description" in task_data:
353
+ sanitized["description"] = cls.validate_text_field(
354
+ task_data["description"], "description"
355
+ )
356
+
357
+ if "due" in task_data and task_data["due"] is not None:
358
+ sanitized["due"] = cls.validate_datetime(task_data["due"], "due")
359
+
360
+ if "priority" in task_data and task_data["priority"] is not None:
361
+ sanitized["priority"] = cls.validate_priority(task_data["priority"])
362
+
363
+ if "status" in task_data and task_data["status"] is not None:
364
+ sanitized["status"] = cls.validate_task_status(task_data["status"])
365
+
366
+ if (
367
+ "percent_complete" in task_data
368
+ and task_data["percent_complete"] is not None
369
+ ):
370
+ sanitized["percent_complete"] = cls.validate_percent_complete(
371
+ task_data["percent_complete"]
372
+ )
373
+
374
+ if "uid" in task_data:
375
+ sanitized["uid"] = cls.validate_uid(task_data["uid"])
376
+
377
+ if "related_to" in task_data:
378
+ sanitized["related_to"] = cls.validate_related_to(task_data["related_to"])
379
+
380
+ return sanitized
381
+
382
+ @classmethod
383
+ def validate_journal(cls, journal_data: Dict[str, Any]) -> Dict[str, Any]:
384
+ """Validate and sanitize journal data."""
385
+ sanitized = {}
386
+
387
+ if not journal_data.get("summary"):
388
+ raise ValidationError("Journal summary is required")
389
+
390
+ sanitized["summary"] = cls.validate_text_field(
391
+ journal_data["summary"], "summary", required=True
392
+ )
393
+
394
+ if "description" in journal_data:
395
+ sanitized["description"] = cls.validate_text_field(
396
+ journal_data["description"], "description"
397
+ )
398
+
399
+ if "dtstart" in journal_data and journal_data["dtstart"] is not None:
400
+ sanitized["dtstart"] = cls.validate_datetime(
401
+ journal_data["dtstart"], "dtstart"
402
+ )
403
+
404
+ if "categories" in journal_data:
405
+ sanitized["categories"] = cls.validate_categories(
406
+ journal_data["categories"]
407
+ )
408
+
409
+ if "uid" in journal_data:
410
+ sanitized["uid"] = cls.validate_uid(journal_data["uid"])
411
+
412
+ if "related_to" in journal_data:
413
+ sanitized["related_to"] = cls.validate_related_to(
414
+ journal_data["related_to"]
415
+ )
416
+
417
+ return sanitized
418
+
419
+ @classmethod
420
+ def validate_priority(cls, priority: Any) -> int:
421
+ """Validate task priority (1-9, RFC 5545 compliant)."""
422
+ try:
423
+ priority_val = int(priority)
424
+ except (ValueError, TypeError):
425
+ raise ValidationError("Priority must be an integer")
426
+
427
+ if priority_val < 1 or priority_val > 9:
428
+ raise ValidationError("Priority must be between 1-9 (1 is highest)")
429
+
430
+ return priority_val
431
+
432
+ @classmethod
433
+ def validate_task_status(cls, status: Any) -> TaskStatus:
434
+ """Validate task status."""
435
+ if isinstance(status, TaskStatus):
436
+ return status
437
+
438
+ try:
439
+ return TaskStatus(str(status))
440
+ except ValueError:
441
+ valid_statuses = [s.value for s in TaskStatus]
442
+ raise ValidationError(
443
+ f"Invalid task status. Must be one of: {valid_statuses}"
444
+ )
445
+
446
+ @classmethod
447
+ def validate_percent_complete(cls, percent: Any) -> int:
448
+ """Validate percent complete (0-100)."""
449
+ try:
450
+ percent_val = int(percent)
451
+ except (ValueError, TypeError):
452
+ raise ValidationError("Percent complete must be an integer")
453
+
454
+ if percent_val < 0 or percent_val > 100:
455
+ raise ValidationError("Percent complete must be between 0-100")
456
+
457
+ return percent_val
458
+
459
+ @classmethod
460
+ def validate_categories(cls, categories: Any) -> List[str]:
461
+ """Validate categories list."""
462
+ if not isinstance(categories, list):
463
+ if isinstance(categories, str):
464
+ # Single category as string
465
+ categories = [categories]
466
+ else:
467
+ raise ValidationError("Categories must be a list or string")
468
+
469
+ validated_categories = []
470
+ for category in categories:
471
+ if not isinstance(category, str):
472
+ raise ValidationError("Each category must be a string")
473
+
474
+ category_clean = cls.validate_text_field(str(category), "category")
475
+ if category_clean: # Only add non-empty categories
476
+ validated_categories.append(category_clean)
477
+
478
+ return validated_categories
479
+
480
+ @classmethod
481
+ def validate_related_to(cls, related_to: Any) -> List[str]:
482
+ """Validate RELATED-TO UIDs list."""
483
+ if not isinstance(related_to, list):
484
+ if isinstance(related_to, str):
485
+ # Single UID as string
486
+ related_to = [related_to]
487
+ else:
488
+ raise ValidationError("RELATED-TO must be a list or string")
489
+
490
+ validated_uids = []
491
+ for uid in related_to:
492
+ if not isinstance(uid, str):
493
+ raise ValidationError("Each RELATED-TO UID must be a string")
494
+
495
+ validated_uid = cls.validate_uid(uid)
496
+ validated_uids.append(validated_uid)
497
+
498
+ return validated_uids
499
+
500
+ @classmethod
501
+ def validate_url(
502
+ cls, url: str, allow_private_ips: bool = False, field_name: str = "url"
503
+ ) -> str:
504
+ """Validate URL with SSRF protection.
505
+
506
+ Args:
507
+ url: The URL to validate
508
+ allow_private_ips: If False (default), block localhost and private IPs for SSRF protection
509
+ field_name: Name of the field for error messages
510
+
511
+ Returns:
512
+ The validated URL
513
+
514
+ Raises:
515
+ ValidationError: If URL is invalid or blocked by SSRF protection
516
+ """
517
+ if not url:
518
+ raise ValidationError(f"{field_name} cannot be empty")
519
+
520
+ url = url.strip()
521
+
522
+ # Check URL length
523
+ if len(url) > cls.MAX_LENGTHS.get("url", 2048):
524
+ raise ValidationError(
525
+ f"{field_name} exceeds maximum length of {cls.MAX_LENGTHS.get('url', 2048)} characters"
526
+ )
527
+
528
+ # Check URL format using existing pattern
529
+ if not cls.PATTERNS["url"].match(url):
530
+ raise ValidationError(
531
+ f"Invalid URL format for {field_name}. Must be a valid HTTPS URL."
532
+ )
533
+
534
+ # Handle FieldInfo objects from Pydantic Field defaults
535
+ from pydantic.fields import FieldInfo
536
+
537
+ if isinstance(allow_private_ips, FieldInfo):
538
+ allow_private_ips = allow_private_ips.default
539
+
540
+ # If SSRF protection is disabled, return early
541
+ if allow_private_ips:
542
+ return url
543
+
544
+ # Parse URL for SSRF validation
545
+ try:
546
+ parsed = urlparse(url)
547
+ hostname = parsed.hostname
548
+
549
+ if not hostname:
550
+ raise ValidationError(
551
+ f"Invalid URL format for {field_name}: no hostname found"
552
+ )
553
+
554
+ # Check against blocked hostnames (case-insensitive)
555
+ if hostname.lower() in [h.lower() for h in cls.BLOCKED_HOSTNAMES]:
556
+ raise ValidationError(
557
+ f"URL validation failed for {field_name}: "
558
+ f"localhost and loopback addresses are not allowed for security reasons"
559
+ )
560
+
561
+ # Try to resolve the hostname to check for private IPs
562
+ try:
563
+ # Get all IP addresses for the hostname
564
+ addr_info = socket.getaddrinfo(hostname, None)
565
+ ip_addresses = set()
566
+
567
+ for info in addr_info:
568
+ # info[4][0] contains the IP address
569
+ ip_addresses.add(info[4][0])
570
+
571
+ # Check each resolved IP
572
+ for ip_str in ip_addresses:
573
+ try:
574
+ ip = ipaddress.ip_address(ip_str)
575
+
576
+ # Check if IP is private or in blocked ranges
577
+ for private_range in cls.PRIVATE_IP_RANGES:
578
+ if ip in private_range:
579
+ raise ValidationError(
580
+ f"URL validation failed for {field_name}: "
581
+ f"URL resolves to a private or internal IP address ({ip_str}) "
582
+ f"which is not allowed for security reasons"
583
+ )
584
+
585
+ # Additional checks for special addresses
586
+ if ip.is_private or ip.is_loopback or ip.is_link_local:
587
+ raise ValidationError(
588
+ f"URL validation failed for {field_name}: "
589
+ f"URL resolves to a restricted IP address ({ip_str}) "
590
+ f"which is not allowed for security reasons"
591
+ )
592
+
593
+ except ValueError:
594
+ # If we can't parse as IP, it might be IPv6 or malformed
595
+ # Be conservative and reject
596
+ pass
597
+
598
+ except (socket.gaierror, socket.error) as e:
599
+ # If DNS resolution fails, we should be cautious
600
+ # Could be a non-existent domain or network issue
601
+ raise ValidationError(
602
+ f"URL validation failed for {field_name}: "
603
+ f"Unable to resolve hostname '{hostname}'. "
604
+ f"Please verify the URL is correct and accessible."
605
+ )
606
+
607
+ except ValueError as e:
608
+ # URL parsing failed
609
+ raise ValidationError(f"Invalid URL format for {field_name}: {str(e)}")
610
+
611
+ return url
612
+
613
+ @classmethod
614
+ def is_private_ip(cls, ip_str: str) -> bool:
615
+ """Check if an IP address is private or restricted.
616
+
617
+ Args:
618
+ ip_str: IP address as string
619
+
620
+ Returns:
621
+ True if the IP is private/restricted, False otherwise
622
+ """
623
+ try:
624
+ ip = ipaddress.ip_address(ip_str)
625
+
626
+ # Check against our defined private ranges
627
+ for private_range in cls.PRIVATE_IP_RANGES:
628
+ if ip in private_range:
629
+ return True
630
+
631
+ # Use built-in checks as well
632
+ return ip.is_private or ip.is_loopback or ip.is_link_local
633
+
634
+ except ValueError:
635
+ # If we can't parse it, consider it suspicious
636
+ return True