ostruct-cli 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ostruct/cli/security.py CHANGED
@@ -7,15 +7,254 @@ This module provides security checks for file access, including:
7
7
  - Temporary directory handling
8
8
  """
9
9
 
10
+ import errno
10
11
  import logging
11
12
  import os
13
+ import posixpath
14
+ import re
15
+ import sys
12
16
  import tempfile
17
+ import unicodedata
18
+ from contextlib import contextmanager
13
19
  from pathlib import Path
14
- from typing import List, Optional, Set
20
+ from typing import Generator, List, Optional, Union
21
+ from unicodedata import category # noqa: F401 - Used in docstring
22
+ from unicodedata import normalize # noqa: F401 - Used in docstring
15
23
 
16
24
  from .errors import DirectoryNotFoundError, PathSecurityError
17
25
  from .security_types import SecurityManagerProtocol
18
26
 
27
+ # Compute alternative separators (if any) that differ from "/"
28
+ _os_alt_seps = list(
29
+ {sep for sep in [os.path.sep, os.path.altsep] if sep and sep != "/"}
30
+ )
31
+
32
+ # Add these constants
33
+ _UNICODE_SAFETY_PATTERN = re.compile(
34
+ r"[\u0000-\u001F\u007F-\u009F\u2028-\u2029\u0085]" # Control chars and line separators
35
+ r"|\.{2,}" # Directory traversal attempts
36
+ r"|[\\/]{2,}" # Multiple consecutive separators
37
+ r"|[\u2024\u2025\uFE52\u2024\u2025\u2026\uFE19\uFE30\uFE52\uFF0E\uFF61]" # Alternative dots and separators
38
+ )
39
+
40
+
41
+ class CaseManager:
42
+ """Manages original case preservation for paths.
43
+
44
+ This class provides a thread-safe way to track original path cases
45
+ without modifying Path objects. This is particularly important on
46
+ case-insensitive systems (macOS, Windows) where we normalize paths
47
+ to lowercase but want to preserve the original case for display.
48
+ """
49
+
50
+ _case_mapping: dict[str, str] = {}
51
+
52
+ @classmethod
53
+ def set_original_case(
54
+ cls, normalized_path: Path, original_case: str
55
+ ) -> None:
56
+ """Store the original case for a normalized path.
57
+
58
+ Args:
59
+ normalized_path: The normalized (potentially lowercased) Path
60
+ original_case: The original case string to preserve
61
+ """
62
+ cls._case_mapping[str(normalized_path)] = original_case
63
+
64
+ @classmethod
65
+ def get_original_case(cls, normalized_path: Path) -> str:
66
+ """Retrieve the original case for a normalized path.
67
+
68
+ Args:
69
+ normalized_path: The normalized Path to look up
70
+
71
+ Returns:
72
+ The original case string if found, otherwise the normalized path string
73
+ """
74
+ path_str = str(normalized_path)
75
+ return cls._case_mapping.get(path_str, path_str)
76
+
77
+ @classmethod
78
+ def clear(cls) -> None:
79
+ """Clear all stored case mappings."""
80
+ cls._case_mapping.clear()
81
+
82
+
83
+ class SecurityErrorReasons:
84
+ """Constants for security error reasons to ensure consistency."""
85
+
86
+ SYMLINK_LOOP = "symlink_loop"
87
+ MAX_DEPTH_EXCEEDED = "max_depth_exceeded"
88
+ BROKEN_SYMLINK = "broken_symlink"
89
+ PATH_TRAVERSAL = "path_traversal"
90
+ SYMLINK_ERROR = "symlink_error"
91
+ PATH_NOT_ALLOWED = "path_not_allowed"
92
+ TEMP_PATHS_NOT_ALLOWED = "temp_paths_not_allowed"
93
+ VALIDATION_ERROR = "validation_error"
94
+ UNSAFE_UNICODE = "unsafe_unicode"
95
+ NORMALIZATION_ERROR = "normalization_error"
96
+ SYMLINK_TARGET_NOT_ALLOWED = "symlink_target_not_allowed"
97
+ RESOLUTION_ERROR = "resolution_error"
98
+ FILE_NOT_FOUND = "file_not_found"
99
+ OUTSIDE_ALLOWED_DIRS = "outside_allowed_dirs"
100
+ CASE_MISMATCH = "case_mismatch"
101
+
102
+
103
+ def normalize_path(
104
+ path: Union[str, Path], check_traversal: bool = True
105
+ ) -> Path:
106
+ """
107
+ Normalize a path following secure path handling best practices.
108
+
109
+ Order of operations:
110
+ 1. Input normalization (Unicode NFKC)
111
+ 2. Security checks for dangerous Unicode
112
+ 3. Convert to absolute path
113
+ 4. Handle case sensitivity
114
+ 5. Final validation
115
+ """
116
+ try:
117
+ # Step 1: Input normalization
118
+ path_str = str(path)
119
+ path_str = unicodedata.normalize("NFKC", path_str)
120
+
121
+ # Normalize path separators
122
+ path_str = path_str.replace("\\", "/")
123
+
124
+ # Remove redundant separators and normalize dots
125
+ path_str = os.path.normpath(path_str)
126
+
127
+ # Step 2: Security checks for dangerous Unicode
128
+ if _UNICODE_SAFETY_PATTERN.search(path_str):
129
+ raise PathSecurityError(
130
+ f"Path contains potentially dangerous Unicode characters: {path_str}",
131
+ path=str(path),
132
+ context={
133
+ "reason": SecurityErrorReasons.UNSAFE_UNICODE,
134
+ "path": path_str,
135
+ },
136
+ )
137
+
138
+ # Step 3: Convert to Path object and make absolute
139
+ path_obj = Path(path_str)
140
+ if not path_obj.is_absolute():
141
+ path_obj = Path.cwd() / path_obj
142
+
143
+ # Normalize path without resolving symlinks
144
+ path_obj = path_obj.absolute()
145
+
146
+ # Step 4: Handle case sensitivity based on platform
147
+ if sys.platform == "darwin" or os.name == "nt":
148
+ try:
149
+ # Store original case before normalization
150
+ original_case = str(path_obj)
151
+ normalized_case = original_case.lower()
152
+
153
+ # Create new path object with normalized case
154
+ path_obj = Path(normalized_case)
155
+
156
+ # Store original case in CaseManager
157
+ CaseManager.set_original_case(path_obj, original_case)
158
+
159
+ except (OSError, RuntimeError) as e:
160
+ raise PathSecurityError(
161
+ f"Error normalizing path case: {e}",
162
+ path=str(path),
163
+ context={
164
+ "reason": SecurityErrorReasons.CASE_MISMATCH,
165
+ "error": str(e),
166
+ },
167
+ )
168
+
169
+ # Step 5: Final validation - check for path traversal
170
+ if check_traversal:
171
+ # Check for path traversal without resolving symlinks
172
+ clean_parts: list[str] = []
173
+ for part in path_obj.parts:
174
+ if part == "..":
175
+ if not clean_parts:
176
+ raise PathSecurityError(
177
+ f"Path traversal attempt detected: {path}",
178
+ path=str(path),
179
+ context={
180
+ "reason": SecurityErrorReasons.PATH_TRAVERSAL,
181
+ "path": str(path_obj),
182
+ },
183
+ )
184
+ clean_parts.pop()
185
+ elif part not in ("", "."):
186
+ clean_parts.append(part)
187
+
188
+ # Reconstruct path from clean parts
189
+ path_obj = Path(*clean_parts)
190
+
191
+ return path_obj
192
+
193
+ except OSError as e:
194
+ raise PathSecurityError(
195
+ f"Error normalizing path: {e}",
196
+ path=str(path),
197
+ context={
198
+ "reason": SecurityErrorReasons.NORMALIZATION_ERROR,
199
+ "error": str(e),
200
+ },
201
+ )
202
+
203
+
204
+ def safe_join(directory: str, *pathnames: str) -> Optional[str]:
205
+ """Safely join path components with a base directory.
206
+
207
+ This function:
208
+ 1. Normalizes each path component
209
+ 2. Rejects absolute paths and traversal attempts
210
+ 3. Handles alternative separators
211
+ 4. Normalizes Unicode and case (on case-insensitive systems)
212
+
213
+ Args:
214
+ directory: Base directory to join with
215
+ *pathnames: Path components to join
216
+
217
+ Returns:
218
+ Optional[str]: Joined path if safe, None if unsafe
219
+ """
220
+ if not directory:
221
+ directory = "."
222
+
223
+ # Normalize Unicode and case for base directory
224
+ directory = unicodedata.normalize("NFC", str(directory))
225
+ if os.name == "nt" or (os.name == "posix" and sys.platform == "darwin"):
226
+ directory = directory.lower()
227
+
228
+ parts = [directory]
229
+
230
+ for filename in pathnames:
231
+ if not filename:
232
+ continue
233
+
234
+ # Normalize Unicode and case
235
+ filename = unicodedata.normalize("NFC", str(filename))
236
+ if os.name == "nt" or (
237
+ os.name == "posix" and sys.platform == "darwin"
238
+ ):
239
+ filename = filename.lower()
240
+
241
+ # Normalize path separators and collapse dots
242
+ filename = posixpath.normpath(filename.replace("\\", "/"))
243
+
244
+ # Reject unsafe components
245
+ if (
246
+ os.path.isabs(filename)
247
+ or filename == ".."
248
+ or filename.startswith("../")
249
+ or filename.startswith("/")
250
+ or any(sep in filename for sep in _os_alt_seps)
251
+ ):
252
+ return None
253
+
254
+ parts.append(filename)
255
+
256
+ return posixpath.join(*parts)
257
+
19
258
 
20
259
  def is_temp_file(path: str) -> bool:
21
260
  """Check if a file is in a temporary directory.
@@ -30,41 +269,15 @@ def is_temp_file(path: str) -> bool:
30
269
  This function handles platform-specific path normalization, including symlinks
31
270
  (e.g., on macOS where /var is symlinked to /private/var).
32
271
  """
33
- # Normalize the input path (resolve symlinks)
34
- abs_path = os.path.realpath(path)
35
-
36
- # Get all potential temp directories and normalize them
37
- temp_dirs = set()
38
- # System temp dir (platform independent)
39
- temp_dirs.add(os.path.realpath(tempfile.gettempdir()))
40
-
41
- # Common Unix/Linux/macOS temp locations
42
- unix_temp_dirs = ["/tmp", "/var/tmp", "/var/folders"]
43
- for temp_dir in unix_temp_dirs:
44
- if os.path.exists(temp_dir):
45
- temp_dirs.add(os.path.realpath(temp_dir))
46
-
47
- # Windows temp locations (if on Windows)
48
- if os.name == "nt":
49
- if "TEMP" in os.environ:
50
- temp_dirs.add(os.path.realpath(os.environ["TEMP"]))
51
- if "TMP" in os.environ:
52
- temp_dirs.add(os.path.realpath(os.environ["TMP"]))
53
-
54
- # Check if file is in any temp directory using normalized paths
55
- abs_path_parts = os.path.normpath(abs_path).split(os.sep)
56
- for temp_dir in temp_dirs:
57
- temp_dir_parts = os.path.normpath(temp_dir).split(os.sep)
58
- # Check if the path starts with the temp directory components
59
- if len(abs_path_parts) >= len(temp_dir_parts) and all(
60
- a == b
61
- for a, b in zip(
62
- abs_path_parts[: len(temp_dir_parts)], temp_dir_parts
63
- )
64
- ):
65
- return True
66
-
67
- return False
272
+ try:
273
+ # Normalize paths for comparison
274
+ abs_path = normalize_path(path)
275
+ temp_dir = normalize_path(tempfile.gettempdir())
276
+
277
+ # Check if file is in the temp directory using is_relative_to
278
+ return abs_path.is_relative_to(temp_dir)
279
+ except (ValueError, OSError):
280
+ return False
68
281
 
69
282
 
70
283
  class SecurityManager(SecurityManagerProtocol):
@@ -78,34 +291,117 @@ class SecurityManager(SecurityManagerProtocol):
78
291
  1. A base directory that serves as the root for all file operations
79
292
  2. A set of explicitly allowed directories that can be accessed outside the base directory
80
293
  3. Special handling for temporary directories that are always allowed
294
+ 4. Case-sensitive or case-insensitive path handling based on platform
295
+
296
+ Case Sensitivity Handling:
297
+ - All paths are normalized using normalize_path() before comparison
298
+ - On case-insensitive systems (macOS, Windows):
299
+ * Directory comparisons are case-insensitive
300
+ * Base and allowed directories are stored in normalized case
301
+ * Path validation preserves original case in error messages
302
+ - On case-sensitive systems (Linux):
303
+ * Directory comparisons are case-sensitive
304
+ * Base and allowed directories maintain original case
305
+ * Path validation requires exact case matches
306
+
307
+ Security Implications of Case Sensitivity:
308
+ - Path traversal checks work on normalized paths
309
+ - Symlink resolution uses case-aware path comparison
310
+ - Allowed directory checks respect platform case sensitivity
311
+ - Error messages maintain original case for debugging
312
+ - Temporary path detection is case-aware
313
+
314
+ Example:
315
+ >>> # On macOS (case-insensitive):
316
+ >>> sm = SecurityManager("/base/dir")
317
+ >>> sm.is_path_allowed("/base/DIR/file.txt") # True
318
+ >>> sm.is_path_allowed("/BASE/dir/file.txt") # True
319
+
320
+ >>> # On Linux (case-sensitive):
321
+ >>> sm = SecurityManager("/base/dir")
322
+ >>> sm.is_path_allowed("/base/DIR/file.txt") # False
323
+ >>> sm.is_path_allowed("/base/dir/file.txt") # True
81
324
 
82
325
  All paths are normalized using realpath() to handle symlinks consistently across platforms.
83
326
  """
84
327
 
85
328
  def __init__(
86
329
  self,
87
- base_dir: Optional[str] = None,
330
+ base_dir: str,
88
331
  allowed_dirs: Optional[List[str]] = None,
332
+ allow_temp_paths: bool = False,
333
+ max_symlink_depth: int = 16,
89
334
  ):
90
- """Initialize security manager.
335
+ """Initialize the SecurityManager.
91
336
 
92
337
  Args:
93
- base_dir: Base directory for file access. Defaults to current working directory.
94
- allowed_dirs: Optional list of additional allowed directories
338
+ base_dir: Base directory for path validation
339
+ allowed_dirs: Additional allowed directories
340
+ allow_temp_paths: Whether to allow paths in temporary directories
341
+ max_symlink_depth: Maximum depth for symlink resolution
95
342
 
96
- All paths are normalized using realpath to handle symlinks
97
- and relative paths consistently across platforms.
343
+ Raises:
344
+ DirectoryNotFoundError: If base_dir or any allowed directory does not exist or is not a directory
98
345
  """
99
346
  logger = logging.getLogger("ostruct")
100
347
  logger.debug("Initializing SecurityManager")
101
- self._base_dir = Path(os.path.realpath(base_dir or os.getcwd()))
102
- logger.debug("Base directory set to: %s", self._base_dir)
103
348
 
104
- self._allowed_dirs: Set[Path] = set()
349
+ # Normalize base directory
350
+ try:
351
+ self._base_dir = normalize_path(base_dir)
352
+ if not self._base_dir.is_dir():
353
+ raise DirectoryNotFoundError(
354
+ f"Base path is not a directory: {base_dir}"
355
+ )
356
+ except OSError as e:
357
+ raise DirectoryNotFoundError(
358
+ f"Base directory does not exist: {base_dir}"
359
+ ) from e
360
+
361
+ # Set up allowed directories, starting with base_dir
362
+ self._allowed_dirs = [self._base_dir]
105
363
  if allowed_dirs:
106
364
  for directory in allowed_dirs:
107
- logger.debug("Adding allowed directory: %s", directory)
108
- self.add_allowed_dir(directory)
365
+ try:
366
+ real_path = normalize_path(directory)
367
+ if not real_path.is_dir():
368
+ raise DirectoryNotFoundError(
369
+ f"Allowed path is not a directory: {directory}"
370
+ )
371
+ if real_path not in self._allowed_dirs:
372
+ self._allowed_dirs.append(real_path)
373
+ except OSError as e:
374
+ raise DirectoryNotFoundError(
375
+ f"Allowed path does not exist: {directory}"
376
+ ) from e
377
+
378
+ # Set up temp directory handling - resolve it to handle platform symlinks
379
+ self.allow_temp_paths = allow_temp_paths
380
+ self._temp_dir = Path(tempfile.gettempdir()).resolve()
381
+ logger.debug("Resolved temp directory: %s", self._temp_dir)
382
+
383
+ # Set up symlink handling
384
+ self.max_symlink_depth = max_symlink_depth
385
+ self._symlink_cache: dict[str, str] = {}
386
+
387
+ @contextmanager
388
+ def initializing(self) -> Generator[None, None, None]:
389
+ """Context manager to disable validation during initialization."""
390
+ self._initialization_context = True
391
+ try:
392
+ yield
393
+ finally:
394
+ self._initialization_context = False
395
+
396
+ @contextmanager
397
+ def symlink_context(self) -> Generator[None, None, None]:
398
+ """Clear symlink tracking cache for a fresh symlink resolution context."""
399
+ old_cache = self._symlink_cache
400
+ self._symlink_cache = {}
401
+ try:
402
+ yield
403
+ finally:
404
+ self._symlink_cache = old_cache
109
405
 
110
406
  @property
111
407
  def base_dir(self) -> Path:
@@ -117,30 +413,22 @@ class SecurityManager(SecurityManagerProtocol):
117
413
  """Get the list of allowed directories."""
118
414
  return sorted(self._allowed_dirs) # Sort for consistent ordering
119
415
 
120
- def add_allowed_dir(self, directory: str) -> None:
121
- """Add a directory to the set of allowed directories.
416
+ def add_allowed_directory(self, directory: str) -> None:
417
+ """Add a directory to the list of allowed directories.
122
418
 
123
419
  Args:
124
- directory: Directory to allow access to
420
+ directory: Directory to allow
125
421
 
126
422
  Raises:
127
- DirectoryNotFoundError: If directory does not exist
423
+ DirectoryNotFoundError: If directory does not exist or is not a directory
128
424
  """
129
- logger = logging.getLogger("ostruct")
130
- logger.debug("Adding allowed directory: %s", directory)
131
- real_path = Path(os.path.realpath(directory))
132
- logger.debug("Resolved real path: %s", real_path)
133
-
134
- if not real_path.exists():
135
- logger.debug("Directory not found: %s", directory)
136
- raise DirectoryNotFoundError(f"Directory not found: {directory}")
425
+ real_path = normalize_path(directory)
137
426
  if not real_path.is_dir():
138
- logger.debug("Path is not a directory: %s", directory)
139
427
  raise DirectoryNotFoundError(
140
- f"Path is not a directory: {directory}"
428
+ f"Allowed path is not a directory: {directory}"
141
429
  )
142
- self._allowed_dirs.add(real_path)
143
- logger.debug("Successfully added allowed directory: %s", real_path)
430
+ if real_path not in self._allowed_dirs:
431
+ self._allowed_dirs.append(real_path)
144
432
 
145
433
  def add_allowed_dirs_from_file(self, file_path: str) -> None:
146
434
  """Add allowed directories from a file.
@@ -152,130 +440,196 @@ class SecurityManager(SecurityManagerProtocol):
152
440
  PathSecurityError: If file_path is outside allowed directories
153
441
  FileNotFoundError: If file does not exist
154
442
  ValueError: If file contains invalid directories
443
+
444
+ Note:
445
+ This code is known to trigger a mypy "unreachable" error due to limitations
446
+ in mypy's flow analysis. The code is actually reachable and works correctly
447
+ at runtime, as verified by tests. A bug report should be filed with mypy.
155
448
  """
156
- real_path = Path(os.path.realpath(file_path))
449
+ if file_path is None:
450
+ return # Skip None paths silently
157
451
 
158
- # First validate the file path itself
452
+ real_path = normalize_path(file_path)
159
453
  try:
160
- self.validate_path(
454
+ validated_path = self.validate_path(
161
455
  str(real_path), purpose="read allowed directories"
162
456
  )
163
- except PathSecurityError:
457
+ except PathSecurityError as e:
164
458
  raise PathSecurityError.from_expanded_paths(
165
459
  original_path=file_path,
166
460
  expanded_path=str(real_path),
167
461
  error_logged=True,
168
462
  base_dir=str(self._base_dir),
169
463
  allowed_dirs=[str(d) for d in self._allowed_dirs],
170
- )
171
-
172
- if not real_path.exists():
173
- raise FileNotFoundError(f"File not found: {file_path}")
464
+ ) from e
174
465
 
175
- with open(real_path) as f:
466
+ with open(validated_path) as f:
176
467
  for line in f:
177
468
  directory = line.strip()
178
469
  if directory and not directory.startswith("#"):
179
- self.add_allowed_dir(directory)
180
-
181
- def is_path_allowed(self, path: str) -> bool:
182
- """Check if a path is allowed.
183
-
184
- A path is allowed if it is:
185
- 1. Under the normalized base directory
186
- 2. Under any normalized allowed directory
470
+ self.add_allowed_directory(directory)
187
471
 
188
- The path must also exist.
472
+ def is_temp_path(self, path: Union[str, Path]) -> bool:
473
+ """Check if a path is in a temporary directory.
189
474
 
190
475
  Args:
191
476
  path: Path to check
192
477
 
193
478
  Returns:
194
- bool: True if path exists and is allowed, False otherwise
195
- """
196
- logger = logging.getLogger("ostruct")
197
- logger.debug("Checking if path is allowed: %s", path)
198
- logger.debug("Base directory: %s", self._base_dir)
199
- logger.debug("Allowed directories: %s", self._allowed_dirs)
479
+ bool: True if path is in a temporary directory
200
480
 
481
+ Note:
482
+ This method handles platform-specific path normalization, including symlinks
483
+ (e.g., on macOS where /tmp is symlinked to /private/tmp).
484
+ """
201
485
  try:
202
- real_path = Path(os.path.realpath(path))
203
- logger.debug("Resolved real path: %s", real_path)
486
+ # Resolve both paths to handle symlinks
487
+ resolved_path = Path(path).resolve()
488
+ return resolved_path.is_relative_to(self._temp_dir)
489
+ except (OSError, ValueError):
490
+ return False
204
491
 
205
- # Check if the path exists
206
- if not real_path.exists():
207
- logger.debug("Path does not exist")
208
- return False
492
+ def is_path_allowed(self, path: Union[str, Path]) -> bool:
493
+ """Check if a path is allowed.
209
494
 
210
- except (ValueError, OSError) as e:
211
- logger.debug("Failed to resolve real path: %s", e)
212
- return False
495
+ A path is allowed if:
496
+ 1. It is under the base directory, or
497
+ 2. It is under one of the allowed directories, or
498
+ 3. It is in a temporary directory and temp paths are allowed
499
+
500
+ Args:
501
+ path: Path to check
213
502
 
503
+ Returns:
504
+ bool: True if path is allowed
505
+ """
214
506
  try:
215
- if real_path.is_relative_to(self._base_dir):
216
- logger.debug("Path is relative to base directory")
507
+ # First check if it's a temp path
508
+ if self.allow_temp_paths and self.is_temp_path(path):
217
509
  return True
218
- except ValueError:
219
- logger.debug("Path is not relative to base directory")
220
510
 
221
- for allowed_dir in self._allowed_dirs:
511
+ # Normalize the path without resolving symlinks
512
+ path_obj = normalize_path(path, check_traversal=True)
513
+
514
+ # Check unresolved path first
515
+ for allowed_dir in self._allowed_dirs:
516
+ try:
517
+ if path_obj.is_relative_to(allowed_dir):
518
+ return True
519
+ except ValueError:
520
+ continue
521
+
522
+ # Only resolve if necessary and the path exists
222
523
  try:
223
- if real_path.is_relative_to(allowed_dir):
224
- logger.debug(
225
- "Path is relative to allowed directory: %s",
226
- allowed_dir,
227
- )
228
- return True
229
- except ValueError:
230
- logger.debug(
231
- "Path is not relative to allowed directory: %s",
232
- allowed_dir,
233
- )
234
- continue
524
+ if path_obj.exists():
525
+ resolved = path_obj.resolve(strict=True)
526
+ for allowed_dir in self._allowed_dirs:
527
+ try:
528
+ if resolved.is_relative_to(allowed_dir):
529
+ return True
530
+ except ValueError:
531
+ continue
532
+ except (OSError, RuntimeError):
533
+ return False
235
534
 
236
- logger.debug("Path is not allowed")
237
- return False
535
+ return False
238
536
 
239
- def validate_path(self, path: str, purpose: str = "access") -> Path:
240
- """Validate and normalize a path.
537
+ except (OSError, PathSecurityError):
538
+ return False
539
+
540
+ def validate_path(
541
+ self, path: Union[str, Path], purpose: str = "access"
542
+ ) -> Path:
543
+ """Validate and resolve a path.
241
544
 
242
545
  Args:
243
546
  path: Path to validate
244
- purpose: Description of intended access (for error messages)
547
+ purpose: Description of the intended use (for error messages)
245
548
 
246
549
  Returns:
247
- Path: Normalized path if valid
550
+ Path: Normalized path object
248
551
 
249
552
  Raises:
250
553
  PathSecurityError: If path is not allowed
554
+ FileNotFoundError: If path does not exist
251
555
  """
556
+ if path is None:
557
+ raise ValueError("Path cannot be None")
558
+
252
559
  logger = logging.getLogger("ostruct")
253
560
  logger.debug("Validating path for %s: %s", purpose, path)
254
561
 
255
562
  try:
256
- real_path = Path(os.path.realpath(path))
257
- logger.debug("Resolved real path: %s", real_path)
258
- except (ValueError, OSError) as e:
259
- logger.error("Invalid path format: %s", e)
260
- raise PathSecurityError(
261
- f"Invalid path format: {e}", error_logged=True
262
- )
563
+ # First normalize the path without security checks
564
+ path_obj = normalize_path(path, check_traversal=False)
565
+
566
+ # Check if it's a temp path first (this is always safe to check)
567
+ if self.is_temp_path(path_obj):
568
+ if not self.allow_temp_paths:
569
+ logger.error("Temp paths are not allowed")
570
+ raise PathSecurityError(
571
+ "Access denied: Temporary paths are not allowed",
572
+ path=str(path),
573
+ context={
574
+ "reason": SecurityErrorReasons.TEMP_PATHS_NOT_ALLOWED
575
+ },
576
+ error_logged=True,
577
+ )
578
+ # For temp paths, we check existence after allowing them
579
+ if not path_obj.exists():
580
+ raise FileNotFoundError(f"File not found: {path}")
581
+ return path_obj
263
582
 
264
- if not self.is_path_allowed(str(real_path)):
265
- logger.error(
266
- "Access denied: %s is outside base directory and not in allowed directories",
267
- path,
268
- )
269
- raise PathSecurityError.from_expanded_paths(
270
- original_path=path,
271
- expanded_path=str(real_path),
272
- base_dir=str(self._base_dir),
273
- allowed_dirs=[str(d) for d in self._allowed_dirs],
274
- error_logged=True,
275
- )
583
+ # For non-temp paths, check existence first
584
+ if not path_obj.exists():
585
+ raise FileNotFoundError(f"File not found: {path}")
276
586
 
277
- logger.debug("Path validation successful")
278
- return real_path
587
+ # Resolve symlinks using our security-aware resolver
588
+ try:
589
+ if path_obj.is_symlink():
590
+ resolved = self.resolve_symlink(path_obj)
591
+ else:
592
+ resolved = path_obj
593
+ except PathSecurityError:
594
+ raise # Re-raise security errors
595
+ except FileNotFoundError:
596
+ raise # Re-raise file not found errors
597
+
598
+ # Final security check on resolved path
599
+ if not self.is_path_allowed(resolved):
600
+ logger.error(
601
+ "Access denied: Attempted to %s path outside allowed directories: %s",
602
+ purpose,
603
+ resolved,
604
+ )
605
+ raise PathSecurityError(
606
+ f"Access denied: {path} is outside base directory and not in allowed directories",
607
+ path=str(path),
608
+ context={
609
+ "reason": SecurityErrorReasons.OUTSIDE_ALLOWED_DIRS,
610
+ "base_dir": str(self._base_dir),
611
+ "allowed_dirs": [str(d) for d in self._allowed_dirs],
612
+ "expanded_path": str(resolved),
613
+ },
614
+ error_logged=True,
615
+ )
616
+
617
+ return resolved
618
+
619
+ except OSError as e:
620
+ if e.errno == errno.ENOENT:
621
+ raise FileNotFoundError(f"File not found: {path}")
622
+
623
+ logger.error("Error validating path: %s", e)
624
+ raise PathSecurityError(
625
+ f"Error validating path: {e}",
626
+ path=str(path),
627
+ context={
628
+ "reason": SecurityErrorReasons.VALIDATION_ERROR,
629
+ "error": str(e),
630
+ },
631
+ error_logged=True,
632
+ ) from e
279
633
 
280
634
  def is_allowed_file(self, path: str) -> bool:
281
635
  """Check if file access is allowed.
@@ -287,8 +641,8 @@ class SecurityManager(SecurityManagerProtocol):
287
641
  bool: True if file exists and is allowed
288
642
  """
289
643
  try:
290
- real_path = Path(os.path.realpath(path))
291
- return self.is_path_allowed(str(real_path)) and real_path.is_file()
644
+ real_path = normalize_path(path)
645
+ return self.is_path_allowed(real_path) and real_path.is_file()
292
646
  except (ValueError, OSError):
293
647
  return False
294
648
 
@@ -306,18 +660,305 @@ class SecurityManager(SecurityManagerProtocol):
306
660
  except (ValueError, OSError):
307
661
  return False
308
662
 
309
- def resolve_path(self, path: str) -> Path:
310
- """Resolve and validate a path.
311
-
312
- This is an alias for validate_path() for backward compatibility.
663
+ def _normalize_input(self, path: Union[str, Path]) -> Path:
664
+ """Normalize input path to absolute path.
313
665
 
314
666
  Args:
315
- path: Path to resolve and validate
667
+ path: Input path to normalize
316
668
 
317
669
  Returns:
318
- Path: Normalized path if valid
670
+ Path: Normalized absolute path
671
+
672
+ Raises:
673
+ ValueError: If path is None
674
+ """
675
+ if path is None:
676
+ raise ValueError("Path cannot be None")
677
+
678
+ p = normalize_path(path)
679
+ if not p.is_absolute():
680
+ p = normalize_path(str(p))
681
+
682
+ # Resolve the path to handle .. components
683
+ try:
684
+ return p.resolve()
685
+ except OSError as e:
686
+ if e.errno == errno.ENOENT:
687
+ # If the file doesn't exist, still normalize the path
688
+ # This allows security checks on non-existent files
689
+ return p.absolute()
690
+ raise
691
+
692
+ def _check_security(self, path: Path, purpose: str) -> None:
693
+ """Check if a path is allowed for a specific purpose.
694
+
695
+ Args:
696
+ path: Path to check
697
+ purpose: Description of the intended use
319
698
 
320
699
  Raises:
321
700
  PathSecurityError: If path is not allowed
322
701
  """
323
- return self.validate_path(path)
702
+ logger = logging.getLogger("ostruct")
703
+
704
+ # First check if it's a temp path
705
+ if self.is_temp_path(path):
706
+ if not self.allow_temp_paths:
707
+ logger.error("Temp paths are not allowed")
708
+ raise PathSecurityError(
709
+ "Access denied: Temporary paths are not allowed",
710
+ path=str(path),
711
+ context={"reason": "temp_paths_not_allowed"},
712
+ error_logged=True,
713
+ )
714
+ return
715
+
716
+ # Check against allowed directories
717
+ if not self.is_path_allowed(path):
718
+ logger.error(
719
+ "Access denied: Attempted to %s path outside allowed directories: %s",
720
+ purpose,
721
+ path,
722
+ )
723
+ raise PathSecurityError(
724
+ f"Access denied: {path} is outside base directory and not in allowed directories",
725
+ path=str(path),
726
+ context={
727
+ "reason": "path_not_allowed",
728
+ "base_dir": str(self._base_dir),
729
+ "allowed_dirs": [str(d) for d in self._allowed_dirs],
730
+ "expanded_path": str(path),
731
+ },
732
+ error_logged=True,
733
+ )
734
+
735
+ def resolve_path(self, path: str) -> Path:
736
+ """Resolve and validate a path.
737
+
738
+ Order of operations:
739
+ 1. Normalize the input path
740
+ 2. Check existence
741
+ 3. Validate security permissions
742
+ 4. Safely resolve symlinks with security checks at each step
743
+ """
744
+ logger = logging.getLogger("ostruct")
745
+ logger.debug("Resolving path: %s", path)
746
+
747
+ try:
748
+ # Phase 1: Normalize input without security checks
749
+ normalized = normalize_path(path, check_traversal=False)
750
+ logger.debug("Normalized path: %s", normalized)
751
+
752
+ # Phase 2: Check existence first
753
+ if not normalized.exists():
754
+ logger.error("File not found: %s", normalized)
755
+ raise FileNotFoundError(f"File not found: {path}")
756
+
757
+ # Phase 3: Initial security check
758
+ if not self.is_path_allowed(normalized):
759
+ logger.error(
760
+ "Access denied: Path outside allowed directories: %s",
761
+ normalized,
762
+ )
763
+ raise PathSecurityError(
764
+ f"Access denied: {normalized} is outside base directory and not in allowed directories",
765
+ path=str(path),
766
+ context={
767
+ "reason": SecurityErrorReasons.PATH_NOT_ALLOWED,
768
+ "base_dir": str(self._base_dir),
769
+ "allowed_dirs": [str(d) for d in self._allowed_dirs],
770
+ "expanded_path": str(normalized),
771
+ },
772
+ error_logged=True,
773
+ )
774
+
775
+ # Phase 4: Safe symlink resolution with security checks at each step
776
+ if normalized.is_symlink():
777
+ resolved = self.resolve_symlink(normalized)
778
+ logger.debug(
779
+ "Resolved symlink: %s -> %s", normalized, resolved
780
+ )
781
+
782
+ # Final security check on resolved path
783
+ if not self.is_path_allowed(resolved):
784
+ logger.error(
785
+ "Access denied: Symlink target outside allowed directories: %s -> %s",
786
+ normalized,
787
+ resolved,
788
+ )
789
+ raise PathSecurityError(
790
+ f"Access denied: Symlink target {resolved} is outside allowed directories",
791
+ path=str(path),
792
+ context={
793
+ "reason": SecurityErrorReasons.SYMLINK_TARGET_NOT_ALLOWED,
794
+ "target": str(resolved),
795
+ "source": str(normalized),
796
+ },
797
+ error_logged=True,
798
+ )
799
+
800
+ return resolved
801
+
802
+ return normalized
803
+
804
+ except FileNotFoundError:
805
+ # Re-raise FileNotFoundError without wrapping
806
+ raise
807
+ except OSError as e:
808
+ if e.errno == errno.ENOENT:
809
+ raise FileNotFoundError(f"File not found: {path}")
810
+ elif e.errno == errno.ELOOP:
811
+ raise PathSecurityError(
812
+ f"Symlink loop detected at {path}",
813
+ path=str(path),
814
+ context={"reason": SecurityErrorReasons.SYMLINK_LOOP},
815
+ error_logged=True,
816
+ )
817
+ raise PathSecurityError(
818
+ f"Error resolving path {path}: {e}",
819
+ path=str(path),
820
+ context={"reason": SecurityErrorReasons.RESOLUTION_ERROR},
821
+ error_logged=True,
822
+ )
823
+
824
+ def resolve_symlink(
825
+ self,
826
+ path: Path,
827
+ depth: int = 0,
828
+ resolution_chain: Optional[List[str]] = None,
829
+ ) -> Path:
830
+ """
831
+ Resolve a symlink with security checks at each step.
832
+
833
+ Order of checks:
834
+ 1. Loop detection (prevent infinite loops)
835
+ 2. Max depth check (prevent resource exhaustion)
836
+ 3. Process symlink and check existence
837
+ 4. Security validation (prevent unauthorized access)
838
+ """
839
+ logger = logging.getLogger("ostruct")
840
+ resolution_chain = resolution_chain or []
841
+
842
+ # Convert to absolute path manually without resolve()
843
+ if not path.is_absolute():
844
+ path = Path.cwd() / path
845
+ path = path.absolute()
846
+
847
+ # Track current path before any operations
848
+ current_path = str(path)
849
+ new_chain = resolution_chain + [current_path]
850
+ logger.debug("Processing path: %s (depth: %d)", current_path, depth)
851
+ logger.debug("Resolution chain: %s", new_chain)
852
+
853
+ # 1. Check for loops using the new chain
854
+ if current_path in resolution_chain:
855
+ loop_start = resolution_chain.index(current_path)
856
+ loop_chain = resolution_chain[loop_start:] + [current_path]
857
+ raise PathSecurityError(
858
+ f"Symlink loop detected: {' -> '.join(loop_chain)}",
859
+ path=current_path,
860
+ context={
861
+ "reason": SecurityErrorReasons.SYMLINK_LOOP,
862
+ "resolution_chain": resolution_chain,
863
+ "loop_chain": loop_chain,
864
+ },
865
+ )
866
+
867
+ # 2. Check max depth
868
+ if depth >= self.max_symlink_depth:
869
+ raise PathSecurityError(
870
+ f"Maximum symlink depth ({self.max_symlink_depth}) exceeded",
871
+ path=current_path,
872
+ context={
873
+ "reason": SecurityErrorReasons.MAX_DEPTH_EXCEEDED,
874
+ "max_depth": self.max_symlink_depth,
875
+ "depth": depth,
876
+ "resolution_chain": new_chain,
877
+ },
878
+ )
879
+
880
+ try:
881
+ # 3. Process symlink and check existence
882
+ if path.is_symlink():
883
+ # Read target without resolving
884
+ target = path.readlink()
885
+ logger.debug("Found symlink: %s -> %s", path, target)
886
+
887
+ # Convert relative target to absolute
888
+ if not target.is_absolute():
889
+ target = path.parent / target
890
+ target = target.absolute()
891
+
892
+ # Check if target exists (using lstat to avoid resolving)
893
+ try:
894
+ target.lstat()
895
+ except FileNotFoundError:
896
+ raise PathSecurityError(
897
+ f"Broken symlink detected: {path} -> {target}",
898
+ path=current_path,
899
+ context={
900
+ "reason": SecurityErrorReasons.BROKEN_SYMLINK,
901
+ "target": str(target),
902
+ "resolution_chain": new_chain,
903
+ },
904
+ )
905
+
906
+ # Check if target is allowed
907
+ if not self.is_path_allowed(target):
908
+ raise PathSecurityError(
909
+ f"Symlink target not allowed: {path} -> {target}",
910
+ path=current_path,
911
+ context={
912
+ "reason": SecurityErrorReasons.SYMLINK_TARGET_NOT_ALLOWED,
913
+ "target": str(target),
914
+ "resolution_chain": new_chain,
915
+ },
916
+ )
917
+
918
+ # Recurse to resolve target
919
+ return self.resolve_symlink(target, depth + 1, new_chain)
920
+
921
+ # 4. Final security check on non-symlink
922
+ if not self.is_path_allowed(path):
923
+ raise PathSecurityError(
924
+ f"Path not allowed: {path}",
925
+ path=current_path,
926
+ context={
927
+ "reason": SecurityErrorReasons.PATH_NOT_ALLOWED,
928
+ "path": str(path),
929
+ },
930
+ )
931
+
932
+ return path
933
+
934
+ except OSError as e:
935
+ if e.errno == errno.ENOENT:
936
+ raise FileNotFoundError(f"File not found: {path}")
937
+ elif e.errno == errno.ELOOP:
938
+ raise PathSecurityError(
939
+ f"Symlink loop detected at {path}",
940
+ path=current_path,
941
+ context={
942
+ "reason": SecurityErrorReasons.SYMLINK_LOOP,
943
+ "resolution_chain": new_chain,
944
+ },
945
+ )
946
+ raise PathSecurityError(
947
+ f"Error resolving symlink {path}: {e}",
948
+ path=current_path,
949
+ context={
950
+ "reason": SecurityErrorReasons.SYMLINK_ERROR,
951
+ "error": str(e),
952
+ "resolution_chain": new_chain,
953
+ },
954
+ )
955
+
956
+ def is_raw_path_allowed(self, path: str) -> bool:
957
+ """
958
+ Check whether a raw path (already cleaned) is allowed without performing full resolution.
959
+ """
960
+ path_str = str(path)
961
+ for allowed_dir in self._allowed_dirs:
962
+ if path_str.startswith(str(allowed_dir)):
963
+ return True
964
+ return False