ostruct-cli 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ostruct/cli/security.py DELETED
@@ -1,964 +0,0 @@
1
- """Security management for file access.
2
-
3
- This module provides security checks for file access, including:
4
- - Base directory restrictions
5
- - Allowed directory validation
6
- - Path traversal prevention
7
- - Temporary directory handling
8
- """
9
-
10
- import errno
11
- import logging
12
- import os
13
- import posixpath
14
- import re
15
- import sys
16
- import tempfile
17
- import unicodedata
18
- from contextlib import contextmanager
19
- from pathlib import Path
20
- from typing import Generator, List, Optional, Union
21
- from unicodedata import category # noqa: F401 - Used in docstring
22
- from unicodedata import normalize # noqa: F401 - Used in docstring
23
-
24
- from .errors import DirectoryNotFoundError, PathSecurityError
25
- from .security_types import SecurityManagerProtocol
26
-
27
- # Compute alternative separators (if any) that differ from "/"
28
- _os_alt_seps = list(
29
- {sep for sep in [os.path.sep, os.path.altsep] if sep and sep != "/"}
30
- )
31
-
32
- # Add these constants
33
- _UNICODE_SAFETY_PATTERN = re.compile(
34
- r"[\u0000-\u001F\u007F-\u009F\u2028-\u2029\u0085]" # Control chars and line separators
35
- r"|\.{2,}" # Directory traversal attempts
36
- r"|[\\/]{2,}" # Multiple consecutive separators
37
- r"|[\u2024\u2025\uFE52\u2024\u2025\u2026\uFE19\uFE30\uFE52\uFF0E\uFF61]" # Alternative dots and separators
38
- )
39
-
40
-
41
- class CaseManager:
42
- """Manages original case preservation for paths.
43
-
44
- This class provides a thread-safe way to track original path cases
45
- without modifying Path objects. This is particularly important on
46
- case-insensitive systems (macOS, Windows) where we normalize paths
47
- to lowercase but want to preserve the original case for display.
48
- """
49
-
50
- _case_mapping: dict[str, str] = {}
51
-
52
- @classmethod
53
- def set_original_case(
54
- cls, normalized_path: Path, original_case: str
55
- ) -> None:
56
- """Store the original case for a normalized path.
57
-
58
- Args:
59
- normalized_path: The normalized (potentially lowercased) Path
60
- original_case: The original case string to preserve
61
- """
62
- cls._case_mapping[str(normalized_path)] = original_case
63
-
64
- @classmethod
65
- def get_original_case(cls, normalized_path: Path) -> str:
66
- """Retrieve the original case for a normalized path.
67
-
68
- Args:
69
- normalized_path: The normalized Path to look up
70
-
71
- Returns:
72
- The original case string if found, otherwise the normalized path string
73
- """
74
- path_str = str(normalized_path)
75
- return cls._case_mapping.get(path_str, path_str)
76
-
77
- @classmethod
78
- def clear(cls) -> None:
79
- """Clear all stored case mappings."""
80
- cls._case_mapping.clear()
81
-
82
-
83
- class SecurityErrorReasons:
84
- """Constants for security error reasons to ensure consistency."""
85
-
86
- SYMLINK_LOOP = "symlink_loop"
87
- MAX_DEPTH_EXCEEDED = "max_depth_exceeded"
88
- BROKEN_SYMLINK = "broken_symlink"
89
- PATH_TRAVERSAL = "path_traversal"
90
- SYMLINK_ERROR = "symlink_error"
91
- PATH_NOT_ALLOWED = "path_not_allowed"
92
- TEMP_PATHS_NOT_ALLOWED = "temp_paths_not_allowed"
93
- VALIDATION_ERROR = "validation_error"
94
- UNSAFE_UNICODE = "unsafe_unicode"
95
- NORMALIZATION_ERROR = "normalization_error"
96
- SYMLINK_TARGET_NOT_ALLOWED = "symlink_target_not_allowed"
97
- RESOLUTION_ERROR = "resolution_error"
98
- FILE_NOT_FOUND = "file_not_found"
99
- OUTSIDE_ALLOWED_DIRS = "outside_allowed_dirs"
100
- CASE_MISMATCH = "case_mismatch"
101
-
102
-
103
- def normalize_path(
104
- path: Union[str, Path], check_traversal: bool = True
105
- ) -> Path:
106
- """
107
- Normalize a path following secure path handling best practices.
108
-
109
- Order of operations:
110
- 1. Input normalization (Unicode NFKC)
111
- 2. Security checks for dangerous Unicode
112
- 3. Convert to absolute path
113
- 4. Handle case sensitivity
114
- 5. Final validation
115
- """
116
- try:
117
- # Step 1: Input normalization
118
- path_str = str(path)
119
- path_str = unicodedata.normalize("NFKC", path_str)
120
-
121
- # Normalize path separators
122
- path_str = path_str.replace("\\", "/")
123
-
124
- # Remove redundant separators and normalize dots
125
- path_str = os.path.normpath(path_str)
126
-
127
- # Step 2: Security checks for dangerous Unicode
128
- if _UNICODE_SAFETY_PATTERN.search(path_str):
129
- raise PathSecurityError(
130
- f"Path contains potentially dangerous Unicode characters: {path_str}",
131
- path=str(path),
132
- context={
133
- "reason": SecurityErrorReasons.UNSAFE_UNICODE,
134
- "path": path_str,
135
- },
136
- )
137
-
138
- # Step 3: Convert to Path object and make absolute
139
- path_obj = Path(path_str)
140
- if not path_obj.is_absolute():
141
- path_obj = Path.cwd() / path_obj
142
-
143
- # Normalize path without resolving symlinks
144
- path_obj = path_obj.absolute()
145
-
146
- # Step 4: Handle case sensitivity based on platform
147
- if sys.platform == "darwin" or os.name == "nt":
148
- try:
149
- # Store original case before normalization
150
- original_case = str(path_obj)
151
- normalized_case = original_case.lower()
152
-
153
- # Create new path object with normalized case
154
- path_obj = Path(normalized_case)
155
-
156
- # Store original case in CaseManager
157
- CaseManager.set_original_case(path_obj, original_case)
158
-
159
- except (OSError, RuntimeError) as e:
160
- raise PathSecurityError(
161
- f"Error normalizing path case: {e}",
162
- path=str(path),
163
- context={
164
- "reason": SecurityErrorReasons.CASE_MISMATCH,
165
- "error": str(e),
166
- },
167
- )
168
-
169
- # Step 5: Final validation - check for path traversal
170
- if check_traversal:
171
- # Check for path traversal without resolving symlinks
172
- clean_parts: list[str] = []
173
- for part in path_obj.parts:
174
- if part == "..":
175
- if not clean_parts:
176
- raise PathSecurityError(
177
- f"Path traversal attempt detected: {path}",
178
- path=str(path),
179
- context={
180
- "reason": SecurityErrorReasons.PATH_TRAVERSAL,
181
- "path": str(path_obj),
182
- },
183
- )
184
- clean_parts.pop()
185
- elif part not in ("", "."):
186
- clean_parts.append(part)
187
-
188
- # Reconstruct path from clean parts
189
- path_obj = Path(*clean_parts)
190
-
191
- return path_obj
192
-
193
- except OSError as e:
194
- raise PathSecurityError(
195
- f"Error normalizing path: {e}",
196
- path=str(path),
197
- context={
198
- "reason": SecurityErrorReasons.NORMALIZATION_ERROR,
199
- "error": str(e),
200
- },
201
- )
202
-
203
-
204
- def safe_join(directory: str, *pathnames: str) -> Optional[str]:
205
- """Safely join path components with a base directory.
206
-
207
- This function:
208
- 1. Normalizes each path component
209
- 2. Rejects absolute paths and traversal attempts
210
- 3. Handles alternative separators
211
- 4. Normalizes Unicode and case (on case-insensitive systems)
212
-
213
- Args:
214
- directory: Base directory to join with
215
- *pathnames: Path components to join
216
-
217
- Returns:
218
- Optional[str]: Joined path if safe, None if unsafe
219
- """
220
- if not directory:
221
- directory = "."
222
-
223
- # Normalize Unicode and case for base directory
224
- directory = unicodedata.normalize("NFC", str(directory))
225
- if os.name == "nt" or (os.name == "posix" and sys.platform == "darwin"):
226
- directory = directory.lower()
227
-
228
- parts = [directory]
229
-
230
- for filename in pathnames:
231
- if not filename:
232
- continue
233
-
234
- # Normalize Unicode and case
235
- filename = unicodedata.normalize("NFC", str(filename))
236
- if os.name == "nt" or (
237
- os.name == "posix" and sys.platform == "darwin"
238
- ):
239
- filename = filename.lower()
240
-
241
- # Normalize path separators and collapse dots
242
- filename = posixpath.normpath(filename.replace("\\", "/"))
243
-
244
- # Reject unsafe components
245
- if (
246
- os.path.isabs(filename)
247
- or filename == ".."
248
- or filename.startswith("../")
249
- or filename.startswith("/")
250
- or any(sep in filename for sep in _os_alt_seps)
251
- ):
252
- return None
253
-
254
- parts.append(filename)
255
-
256
- return posixpath.join(*parts)
257
-
258
-
259
- def is_temp_file(path: str) -> bool:
260
- """Check if a file is in a temporary directory.
261
-
262
- Args:
263
- path: Path to check (will be converted to absolute path)
264
-
265
- Returns:
266
- True if the path is in a temporary directory, False otherwise
267
-
268
- Note:
269
- This function handles platform-specific path normalization, including symlinks
270
- (e.g., on macOS where /var is symlinked to /private/var).
271
- """
272
- try:
273
- # Normalize paths for comparison
274
- abs_path = normalize_path(path)
275
- temp_dir = normalize_path(tempfile.gettempdir())
276
-
277
- # Check if file is in the temp directory using is_relative_to
278
- return abs_path.is_relative_to(temp_dir)
279
- except (ValueError, OSError):
280
- return False
281
-
282
-
283
- class SecurityManager(SecurityManagerProtocol):
284
- """Manages security for file access.
285
-
286
- Validates all file access against a base directory and optional
287
- allowed directories. Prevents unauthorized access and directory
288
- traversal attacks.
289
-
290
- The security model is based on:
291
- 1. A base directory that serves as the root for all file operations
292
- 2. A set of explicitly allowed directories that can be accessed outside the base directory
293
- 3. Special handling for temporary directories that are always allowed
294
- 4. Case-sensitive or case-insensitive path handling based on platform
295
-
296
- Case Sensitivity Handling:
297
- - All paths are normalized using normalize_path() before comparison
298
- - On case-insensitive systems (macOS, Windows):
299
- * Directory comparisons are case-insensitive
300
- * Base and allowed directories are stored in normalized case
301
- * Path validation preserves original case in error messages
302
- - On case-sensitive systems (Linux):
303
- * Directory comparisons are case-sensitive
304
- * Base and allowed directories maintain original case
305
- * Path validation requires exact case matches
306
-
307
- Security Implications of Case Sensitivity:
308
- - Path traversal checks work on normalized paths
309
- - Symlink resolution uses case-aware path comparison
310
- - Allowed directory checks respect platform case sensitivity
311
- - Error messages maintain original case for debugging
312
- - Temporary path detection is case-aware
313
-
314
- Example:
315
- >>> # On macOS (case-insensitive):
316
- >>> sm = SecurityManager("/base/dir")
317
- >>> sm.is_path_allowed("/base/DIR/file.txt") # True
318
- >>> sm.is_path_allowed("/BASE/dir/file.txt") # True
319
-
320
- >>> # On Linux (case-sensitive):
321
- >>> sm = SecurityManager("/base/dir")
322
- >>> sm.is_path_allowed("/base/DIR/file.txt") # False
323
- >>> sm.is_path_allowed("/base/dir/file.txt") # True
324
-
325
- All paths are normalized using realpath() to handle symlinks consistently across platforms.
326
- """
327
-
328
- def __init__(
329
- self,
330
- base_dir: str,
331
- allowed_dirs: Optional[List[str]] = None,
332
- allow_temp_paths: bool = False,
333
- max_symlink_depth: int = 16,
334
- ):
335
- """Initialize the SecurityManager.
336
-
337
- Args:
338
- base_dir: Base directory for path validation
339
- allowed_dirs: Additional allowed directories
340
- allow_temp_paths: Whether to allow paths in temporary directories
341
- max_symlink_depth: Maximum depth for symlink resolution
342
-
343
- Raises:
344
- DirectoryNotFoundError: If base_dir or any allowed directory does not exist or is not a directory
345
- """
346
- logger = logging.getLogger("ostruct")
347
- logger.debug("Initializing SecurityManager")
348
-
349
- # Normalize base directory
350
- try:
351
- self._base_dir = normalize_path(base_dir)
352
- if not self._base_dir.is_dir():
353
- raise DirectoryNotFoundError(
354
- f"Base path is not a directory: {base_dir}"
355
- )
356
- except OSError as e:
357
- raise DirectoryNotFoundError(
358
- f"Base directory does not exist: {base_dir}"
359
- ) from e
360
-
361
- # Set up allowed directories, starting with base_dir
362
- self._allowed_dirs = [self._base_dir]
363
- if allowed_dirs:
364
- for directory in allowed_dirs:
365
- try:
366
- real_path = normalize_path(directory)
367
- if not real_path.is_dir():
368
- raise DirectoryNotFoundError(
369
- f"Allowed path is not a directory: {directory}"
370
- )
371
- if real_path not in self._allowed_dirs:
372
- self._allowed_dirs.append(real_path)
373
- except OSError as e:
374
- raise DirectoryNotFoundError(
375
- f"Allowed path does not exist: {directory}"
376
- ) from e
377
-
378
- # Set up temp directory handling - resolve it to handle platform symlinks
379
- self.allow_temp_paths = allow_temp_paths
380
- self._temp_dir = Path(tempfile.gettempdir()).resolve()
381
- logger.debug("Resolved temp directory: %s", self._temp_dir)
382
-
383
- # Set up symlink handling
384
- self.max_symlink_depth = max_symlink_depth
385
- self._symlink_cache: dict[str, str] = {}
386
-
387
- @contextmanager
388
- def initializing(self) -> Generator[None, None, None]:
389
- """Context manager to disable validation during initialization."""
390
- self._initialization_context = True
391
- try:
392
- yield
393
- finally:
394
- self._initialization_context = False
395
-
396
- @contextmanager
397
- def symlink_context(self) -> Generator[None, None, None]:
398
- """Clear symlink tracking cache for a fresh symlink resolution context."""
399
- old_cache = self._symlink_cache
400
- self._symlink_cache = {}
401
- try:
402
- yield
403
- finally:
404
- self._symlink_cache = old_cache
405
-
406
- @property
407
- def base_dir(self) -> Path:
408
- """Get the base directory."""
409
- return self._base_dir
410
-
411
- @property
412
- def allowed_dirs(self) -> List[Path]:
413
- """Get the list of allowed directories."""
414
- return sorted(self._allowed_dirs) # Sort for consistent ordering
415
-
416
- def add_allowed_directory(self, directory: str) -> None:
417
- """Add a directory to the list of allowed directories.
418
-
419
- Args:
420
- directory: Directory to allow
421
-
422
- Raises:
423
- DirectoryNotFoundError: If directory does not exist or is not a directory
424
- """
425
- real_path = normalize_path(directory)
426
- if not real_path.is_dir():
427
- raise DirectoryNotFoundError(
428
- f"Allowed path is not a directory: {directory}"
429
- )
430
- if real_path not in self._allowed_dirs:
431
- self._allowed_dirs.append(real_path)
432
-
433
- def add_allowed_dirs_from_file(self, file_path: str) -> None:
434
- """Add allowed directories from a file.
435
-
436
- Args:
437
- file_path: Path to file containing allowed directories (one per line)
438
-
439
- Raises:
440
- PathSecurityError: If file_path is outside allowed directories
441
- FileNotFoundError: If file does not exist
442
- ValueError: If file contains invalid directories
443
-
444
- Note:
445
- This code is known to trigger a mypy "unreachable" error due to limitations
446
- in mypy's flow analysis. The code is actually reachable and works correctly
447
- at runtime, as verified by tests. A bug report should be filed with mypy.
448
- """
449
- if file_path is None:
450
- return # Skip None paths silently
451
-
452
- real_path = normalize_path(file_path)
453
- try:
454
- validated_path = self.validate_path(
455
- str(real_path), purpose="read allowed directories"
456
- )
457
- except PathSecurityError as e:
458
- raise PathSecurityError.from_expanded_paths(
459
- original_path=file_path,
460
- expanded_path=str(real_path),
461
- error_logged=True,
462
- base_dir=str(self._base_dir),
463
- allowed_dirs=[str(d) for d in self._allowed_dirs],
464
- ) from e
465
-
466
- with open(validated_path) as f:
467
- for line in f:
468
- directory = line.strip()
469
- if directory and not directory.startswith("#"):
470
- self.add_allowed_directory(directory)
471
-
472
- def is_temp_path(self, path: Union[str, Path]) -> bool:
473
- """Check if a path is in a temporary directory.
474
-
475
- Args:
476
- path: Path to check
477
-
478
- Returns:
479
- bool: True if path is in a temporary directory
480
-
481
- Note:
482
- This method handles platform-specific path normalization, including symlinks
483
- (e.g., on macOS where /tmp is symlinked to /private/tmp).
484
- """
485
- try:
486
- # Resolve both paths to handle symlinks
487
- resolved_path = Path(path).resolve()
488
- return resolved_path.is_relative_to(self._temp_dir)
489
- except (OSError, ValueError):
490
- return False
491
-
492
- def is_path_allowed(self, path: Union[str, Path]) -> bool:
493
- """Check if a path is allowed.
494
-
495
- A path is allowed if:
496
- 1. It is under the base directory, or
497
- 2. It is under one of the allowed directories, or
498
- 3. It is in a temporary directory and temp paths are allowed
499
-
500
- Args:
501
- path: Path to check
502
-
503
- Returns:
504
- bool: True if path is allowed
505
- """
506
- try:
507
- # First check if it's a temp path
508
- if self.allow_temp_paths and self.is_temp_path(path):
509
- return True
510
-
511
- # Normalize the path without resolving symlinks
512
- path_obj = normalize_path(path, check_traversal=True)
513
-
514
- # Check unresolved path first
515
- for allowed_dir in self._allowed_dirs:
516
- try:
517
- if path_obj.is_relative_to(allowed_dir):
518
- return True
519
- except ValueError:
520
- continue
521
-
522
- # Only resolve if necessary and the path exists
523
- try:
524
- if path_obj.exists():
525
- resolved = path_obj.resolve(strict=True)
526
- for allowed_dir in self._allowed_dirs:
527
- try:
528
- if resolved.is_relative_to(allowed_dir):
529
- return True
530
- except ValueError:
531
- continue
532
- except (OSError, RuntimeError):
533
- return False
534
-
535
- return False
536
-
537
- except (OSError, PathSecurityError):
538
- return False
539
-
540
- def validate_path(
541
- self, path: Union[str, Path], purpose: str = "access"
542
- ) -> Path:
543
- """Validate and resolve a path.
544
-
545
- Args:
546
- path: Path to validate
547
- purpose: Description of the intended use (for error messages)
548
-
549
- Returns:
550
- Path: Normalized path object
551
-
552
- Raises:
553
- PathSecurityError: If path is not allowed
554
- FileNotFoundError: If path does not exist
555
- """
556
- if path is None:
557
- raise ValueError("Path cannot be None")
558
-
559
- logger = logging.getLogger("ostruct")
560
- logger.debug("Validating path for %s: %s", purpose, path)
561
-
562
- try:
563
- # First normalize the path without security checks
564
- path_obj = normalize_path(path, check_traversal=False)
565
-
566
- # Check if it's a temp path first (this is always safe to check)
567
- if self.is_temp_path(path_obj):
568
- if not self.allow_temp_paths:
569
- logger.error("Temp paths are not allowed")
570
- raise PathSecurityError(
571
- "Access denied: Temporary paths are not allowed",
572
- path=str(path),
573
- context={
574
- "reason": SecurityErrorReasons.TEMP_PATHS_NOT_ALLOWED
575
- },
576
- error_logged=True,
577
- )
578
- # For temp paths, we check existence after allowing them
579
- if not path_obj.exists():
580
- raise FileNotFoundError(f"File not found: {path}")
581
- return path_obj
582
-
583
- # For non-temp paths, check existence first
584
- if not path_obj.exists():
585
- raise FileNotFoundError(f"File not found: {path}")
586
-
587
- # Resolve symlinks using our security-aware resolver
588
- try:
589
- if path_obj.is_symlink():
590
- resolved = self.resolve_symlink(path_obj)
591
- else:
592
- resolved = path_obj
593
- except PathSecurityError:
594
- raise # Re-raise security errors
595
- except FileNotFoundError:
596
- raise # Re-raise file not found errors
597
-
598
- # Final security check on resolved path
599
- if not self.is_path_allowed(resolved):
600
- logger.error(
601
- "Access denied: Attempted to %s path outside allowed directories: %s",
602
- purpose,
603
- resolved,
604
- )
605
- raise PathSecurityError(
606
- f"Access denied: {path} is outside base directory and not in allowed directories",
607
- path=str(path),
608
- context={
609
- "reason": SecurityErrorReasons.OUTSIDE_ALLOWED_DIRS,
610
- "base_dir": str(self._base_dir),
611
- "allowed_dirs": [str(d) for d in self._allowed_dirs],
612
- "expanded_path": str(resolved),
613
- },
614
- error_logged=True,
615
- )
616
-
617
- return resolved
618
-
619
- except OSError as e:
620
- if e.errno == errno.ENOENT:
621
- raise FileNotFoundError(f"File not found: {path}")
622
-
623
- logger.error("Error validating path: %s", e)
624
- raise PathSecurityError(
625
- f"Error validating path: {e}",
626
- path=str(path),
627
- context={
628
- "reason": SecurityErrorReasons.VALIDATION_ERROR,
629
- "error": str(e),
630
- },
631
- error_logged=True,
632
- ) from e
633
-
634
- def is_allowed_file(self, path: str) -> bool:
635
- """Check if file access is allowed.
636
-
637
- Args:
638
- path: Path to check
639
-
640
- Returns:
641
- bool: True if file exists and is allowed
642
- """
643
- try:
644
- real_path = normalize_path(path)
645
- return self.is_path_allowed(real_path) and real_path.is_file()
646
- except (ValueError, OSError):
647
- return False
648
-
649
- def is_allowed_path(self, path_str: str) -> bool:
650
- """Check if string path is allowed.
651
-
652
- Args:
653
- path_str: Path string to check
654
-
655
- Returns:
656
- bool: True if path is allowed
657
- """
658
- try:
659
- return self.is_path_allowed(path_str)
660
- except (ValueError, OSError):
661
- return False
662
-
663
- def _normalize_input(self, path: Union[str, Path]) -> Path:
664
- """Normalize input path to absolute path.
665
-
666
- Args:
667
- path: Input path to normalize
668
-
669
- Returns:
670
- Path: Normalized absolute path
671
-
672
- Raises:
673
- ValueError: If path is None
674
- """
675
- if path is None:
676
- raise ValueError("Path cannot be None")
677
-
678
- p = normalize_path(path)
679
- if not p.is_absolute():
680
- p = normalize_path(str(p))
681
-
682
- # Resolve the path to handle .. components
683
- try:
684
- return p.resolve()
685
- except OSError as e:
686
- if e.errno == errno.ENOENT:
687
- # If the file doesn't exist, still normalize the path
688
- # This allows security checks on non-existent files
689
- return p.absolute()
690
- raise
691
-
692
- def _check_security(self, path: Path, purpose: str) -> None:
693
- """Check if a path is allowed for a specific purpose.
694
-
695
- Args:
696
- path: Path to check
697
- purpose: Description of the intended use
698
-
699
- Raises:
700
- PathSecurityError: If path is not allowed
701
- """
702
- logger = logging.getLogger("ostruct")
703
-
704
- # First check if it's a temp path
705
- if self.is_temp_path(path):
706
- if not self.allow_temp_paths:
707
- logger.error("Temp paths are not allowed")
708
- raise PathSecurityError(
709
- "Access denied: Temporary paths are not allowed",
710
- path=str(path),
711
- context={"reason": "temp_paths_not_allowed"},
712
- error_logged=True,
713
- )
714
- return
715
-
716
- # Check against allowed directories
717
- if not self.is_path_allowed(path):
718
- logger.error(
719
- "Access denied: Attempted to %s path outside allowed directories: %s",
720
- purpose,
721
- path,
722
- )
723
- raise PathSecurityError(
724
- f"Access denied: {path} is outside base directory and not in allowed directories",
725
- path=str(path),
726
- context={
727
- "reason": "path_not_allowed",
728
- "base_dir": str(self._base_dir),
729
- "allowed_dirs": [str(d) for d in self._allowed_dirs],
730
- "expanded_path": str(path),
731
- },
732
- error_logged=True,
733
- )
734
-
735
- def resolve_path(self, path: str) -> Path:
736
- """Resolve and validate a path.
737
-
738
- Order of operations:
739
- 1. Normalize the input path
740
- 2. Check existence
741
- 3. Validate security permissions
742
- 4. Safely resolve symlinks with security checks at each step
743
- """
744
- logger = logging.getLogger("ostruct")
745
- logger.debug("Resolving path: %s", path)
746
-
747
- try:
748
- # Phase 1: Normalize input without security checks
749
- normalized = normalize_path(path, check_traversal=False)
750
- logger.debug("Normalized path: %s", normalized)
751
-
752
- # Phase 2: Check existence first
753
- if not normalized.exists():
754
- logger.error("File not found: %s", normalized)
755
- raise FileNotFoundError(f"File not found: {path}")
756
-
757
- # Phase 3: Initial security check
758
- if not self.is_path_allowed(normalized):
759
- logger.error(
760
- "Access denied: Path outside allowed directories: %s",
761
- normalized,
762
- )
763
- raise PathSecurityError(
764
- f"Access denied: {normalized} is outside base directory and not in allowed directories",
765
- path=str(path),
766
- context={
767
- "reason": SecurityErrorReasons.PATH_NOT_ALLOWED,
768
- "base_dir": str(self._base_dir),
769
- "allowed_dirs": [str(d) for d in self._allowed_dirs],
770
- "expanded_path": str(normalized),
771
- },
772
- error_logged=True,
773
- )
774
-
775
- # Phase 4: Safe symlink resolution with security checks at each step
776
- if normalized.is_symlink():
777
- resolved = self.resolve_symlink(normalized)
778
- logger.debug(
779
- "Resolved symlink: %s -> %s", normalized, resolved
780
- )
781
-
782
- # Final security check on resolved path
783
- if not self.is_path_allowed(resolved):
784
- logger.error(
785
- "Access denied: Symlink target outside allowed directories: %s -> %s",
786
- normalized,
787
- resolved,
788
- )
789
- raise PathSecurityError(
790
- f"Access denied: Symlink target {resolved} is outside allowed directories",
791
- path=str(path),
792
- context={
793
- "reason": SecurityErrorReasons.SYMLINK_TARGET_NOT_ALLOWED,
794
- "target": str(resolved),
795
- "source": str(normalized),
796
- },
797
- error_logged=True,
798
- )
799
-
800
- return resolved
801
-
802
- return normalized
803
-
804
- except FileNotFoundError:
805
- # Re-raise FileNotFoundError without wrapping
806
- raise
807
- except OSError as e:
808
- if e.errno == errno.ENOENT:
809
- raise FileNotFoundError(f"File not found: {path}")
810
- elif e.errno == errno.ELOOP:
811
- raise PathSecurityError(
812
- f"Symlink loop detected at {path}",
813
- path=str(path),
814
- context={"reason": SecurityErrorReasons.SYMLINK_LOOP},
815
- error_logged=True,
816
- )
817
- raise PathSecurityError(
818
- f"Error resolving path {path}: {e}",
819
- path=str(path),
820
- context={"reason": SecurityErrorReasons.RESOLUTION_ERROR},
821
- error_logged=True,
822
- )
823
-
824
- def resolve_symlink(
825
- self,
826
- path: Path,
827
- depth: int = 0,
828
- resolution_chain: Optional[List[str]] = None,
829
- ) -> Path:
830
- """
831
- Resolve a symlink with security checks at each step.
832
-
833
- Order of checks:
834
- 1. Loop detection (prevent infinite loops)
835
- 2. Max depth check (prevent resource exhaustion)
836
- 3. Process symlink and check existence
837
- 4. Security validation (prevent unauthorized access)
838
- """
839
- logger = logging.getLogger("ostruct")
840
- resolution_chain = resolution_chain or []
841
-
842
- # Convert to absolute path manually without resolve()
843
- if not path.is_absolute():
844
- path = Path.cwd() / path
845
- path = path.absolute()
846
-
847
- # Track current path before any operations
848
- current_path = str(path)
849
- new_chain = resolution_chain + [current_path]
850
- logger.debug("Processing path: %s (depth: %d)", current_path, depth)
851
- logger.debug("Resolution chain: %s", new_chain)
852
-
853
- # 1. Check for loops using the new chain
854
- if current_path in resolution_chain:
855
- loop_start = resolution_chain.index(current_path)
856
- loop_chain = resolution_chain[loop_start:] + [current_path]
857
- raise PathSecurityError(
858
- f"Symlink loop detected: {' -> '.join(loop_chain)}",
859
- path=current_path,
860
- context={
861
- "reason": SecurityErrorReasons.SYMLINK_LOOP,
862
- "resolution_chain": resolution_chain,
863
- "loop_chain": loop_chain,
864
- },
865
- )
866
-
867
- # 2. Check max depth
868
- if depth >= self.max_symlink_depth:
869
- raise PathSecurityError(
870
- f"Maximum symlink depth ({self.max_symlink_depth}) exceeded",
871
- path=current_path,
872
- context={
873
- "reason": SecurityErrorReasons.MAX_DEPTH_EXCEEDED,
874
- "max_depth": self.max_symlink_depth,
875
- "depth": depth,
876
- "resolution_chain": new_chain,
877
- },
878
- )
879
-
880
- try:
881
- # 3. Process symlink and check existence
882
- if path.is_symlink():
883
- # Read target without resolving
884
- target = path.readlink()
885
- logger.debug("Found symlink: %s -> %s", path, target)
886
-
887
- # Convert relative target to absolute
888
- if not target.is_absolute():
889
- target = path.parent / target
890
- target = target.absolute()
891
-
892
- # Check if target exists (using lstat to avoid resolving)
893
- try:
894
- target.lstat()
895
- except FileNotFoundError:
896
- raise PathSecurityError(
897
- f"Broken symlink detected: {path} -> {target}",
898
- path=current_path,
899
- context={
900
- "reason": SecurityErrorReasons.BROKEN_SYMLINK,
901
- "target": str(target),
902
- "resolution_chain": new_chain,
903
- },
904
- )
905
-
906
- # Check if target is allowed
907
- if not self.is_path_allowed(target):
908
- raise PathSecurityError(
909
- f"Symlink target not allowed: {path} -> {target}",
910
- path=current_path,
911
- context={
912
- "reason": SecurityErrorReasons.SYMLINK_TARGET_NOT_ALLOWED,
913
- "target": str(target),
914
- "resolution_chain": new_chain,
915
- },
916
- )
917
-
918
- # Recurse to resolve target
919
- return self.resolve_symlink(target, depth + 1, new_chain)
920
-
921
- # 4. Final security check on non-symlink
922
- if not self.is_path_allowed(path):
923
- raise PathSecurityError(
924
- f"Path not allowed: {path}",
925
- path=current_path,
926
- context={
927
- "reason": SecurityErrorReasons.PATH_NOT_ALLOWED,
928
- "path": str(path),
929
- },
930
- )
931
-
932
- return path
933
-
934
- except OSError as e:
935
- if e.errno == errno.ENOENT:
936
- raise FileNotFoundError(f"File not found: {path}")
937
- elif e.errno == errno.ELOOP:
938
- raise PathSecurityError(
939
- f"Symlink loop detected at {path}",
940
- path=current_path,
941
- context={
942
- "reason": SecurityErrorReasons.SYMLINK_LOOP,
943
- "resolution_chain": new_chain,
944
- },
945
- )
946
- raise PathSecurityError(
947
- f"Error resolving symlink {path}: {e}",
948
- path=current_path,
949
- context={
950
- "reason": SecurityErrorReasons.SYMLINK_ERROR,
951
- "error": str(e),
952
- "resolution_chain": new_chain,
953
- },
954
- )
955
-
956
- def is_raw_path_allowed(self, path: str) -> bool:
957
- """
958
- Check whether a raw path (already cleaned) is allowed without performing full resolution.
959
- """
960
- path_str = str(path)
961
- for allowed_dir in self._allowed_dirs:
962
- if path_str.startswith(str(allowed_dir)):
963
- return True
964
- return False