ostruct-cli 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ostruct/cli/path_utils.py CHANGED
@@ -1,17 +1,16 @@
1
1
  """Path validation utilities for the CLI."""
2
2
 
3
- import os
4
3
  from pathlib import Path
5
4
  from typing import Optional, Tuple
6
5
 
7
- from .errors import (
6
+ from ostruct.cli.errors import (
8
7
  DirectoryNotFoundError,
9
8
  FileNotFoundError,
10
- PathSecurityError,
11
9
  VariableNameError,
12
10
  VariableValueError,
13
11
  )
14
- from .security import SecurityManager
12
+ from ostruct.cli.security.errors import PathSecurityError, SecurityErrorReasons
13
+ from ostruct.cli.security.security_manager import SecurityManager
15
14
 
16
15
 
17
16
  def validate_path_mapping(
@@ -45,79 +44,61 @@ def validate_path_mapping(
45
44
  >>> validate_path_mapping("data=config/", is_dir=True) # Validates directory
46
45
  ('data', 'config/')
47
46
  """
47
+ # Split into name and path parts
48
48
  try:
49
- if not mapping or "=" not in mapping:
50
- raise ValueError("Invalid mapping format")
51
-
52
- name, path = mapping.split("=", 1)
53
- if not name:
54
- raise VariableNameError(
55
- f"Empty name in {'directory' if is_dir else 'file'} mapping"
56
- )
57
-
58
- if not path:
59
- raise VariableValueError("Path cannot be empty")
60
-
61
- # Expand user home directory and environment variables
62
- path = os.path.expanduser(os.path.expandvars(path))
63
-
64
- # Convert to Path object and resolve against base_dir if provided
65
- path_obj = Path(path)
66
- if base_dir:
67
- path_obj = Path(base_dir) / path_obj
68
-
69
- # Resolve the path to catch directory traversal attempts
70
- try:
71
- resolved_path = path_obj.resolve()
72
- except OSError as e:
73
- raise OSError(f"Failed to resolve path: {e}")
74
-
75
- # Check if path exists
76
- if not resolved_path.exists():
77
- if is_dir:
78
- raise DirectoryNotFoundError(f"Directory not found: {path!r}")
79
- else:
80
- raise FileNotFoundError(f"File not found: {path!r}")
81
-
82
- # Check if path is correct type
83
- if is_dir and not resolved_path.is_dir():
84
- raise DirectoryNotFoundError(f"Path is not a directory: {path!r}")
85
- elif not is_dir and not resolved_path.is_file():
86
- raise FileNotFoundError(f"Path is not a file: {path!r}")
87
-
88
- # Check if path is accessible
49
+ name, path_str = mapping.split("=", 1)
50
+ except ValueError:
51
+ raise ValueError(f"Invalid mapping format (missing '='): {mapping}")
52
+
53
+ # Validate name
54
+ name = name.strip()
55
+ if not name:
56
+ raise VariableNameError("Variable name cannot be empty")
57
+ if not name.isidentifier():
58
+ raise VariableNameError(f"Invalid variable name: {name}")
59
+
60
+ # Normalize path
61
+ path_str = path_str.strip()
62
+ if not path_str:
63
+ raise VariableValueError("Path cannot be empty")
64
+
65
+ # Create a Path object
66
+ path = Path(path_str)
67
+ if not path.is_absolute() and base_dir:
68
+ path = Path(base_dir) / path
69
+
70
+ # Validate path with security manager if provided
71
+ if security_manager:
89
72
  try:
90
- if is_dir:
91
- os.listdir(str(resolved_path))
92
- else:
93
- with open(str(resolved_path), "r", encoding="utf-8") as f:
94
- f.read(1)
95
- except OSError as e:
96
- if e.errno == 13: # Permission denied
73
+ path = security_manager.validate_path(path)
74
+ except PathSecurityError as e:
75
+ if (
76
+ e.context.get("reason")
77
+ == SecurityErrorReasons.PATH_OUTSIDE_ALLOWED
78
+ ):
97
79
  raise PathSecurityError(
98
- f"Permission denied accessing path: {path!r}"
99
- )
100
- raise
101
-
102
- # Check security constraints
103
- if security_manager:
104
- if not security_manager.is_path_allowed(str(resolved_path)):
105
- raise PathSecurityError.from_expanded_paths(
106
- original_path=str(path),
107
- expanded_path=str(resolved_path),
108
- base_dir=str(security_manager.base_dir),
109
- allowed_dirs=[
110
- str(d) for d in security_manager.allowed_dirs
111
- ],
112
- )
113
-
114
- # Return the original path to maintain relative paths in the output
115
- return name, path
116
-
117
- except ValueError as e:
118
- if "not enough values to unpack" in str(e):
119
- raise VariableValueError(
120
- f"Invalid {'directory' if is_dir else 'file'} mapping "
121
- f"(expected name=path format): {mapping!r}"
122
- )
123
- raise
80
+ f"Path '{path}' is outside the base directory and not in allowed directories",
81
+ path=str(path),
82
+ context=e.context,
83
+ ) from e
84
+ raise PathSecurityError(
85
+ f"Path validation failed: {e}",
86
+ path=str(path),
87
+ context=e.context,
88
+ ) from e
89
+
90
+ # Check path existence and type
91
+ if not path.exists():
92
+ if is_dir:
93
+ raise DirectoryNotFoundError(f"Directory not found: {path}")
94
+ raise FileNotFoundError(f"File not found: {path}")
95
+
96
+ # Check path type
97
+ if is_dir and not path.is_dir():
98
+ raise DirectoryNotFoundError(
99
+ f"Path exists but is not a directory: {path}"
100
+ )
101
+ elif not is_dir and not path.is_file():
102
+ raise FileNotFoundError(f"Path exists but is not a file: {path}")
103
+
104
+ return name, str(path)
@@ -0,0 +1,32 @@
1
+ """Security package for file access management.
2
+
3
+ This package provides a comprehensive set of security features for file access:
4
+ - Path normalization and validation
5
+ - Safe path joining
6
+ - Directory traversal prevention
7
+ - Symlink resolution with security checks
8
+ - Case sensitivity handling
9
+ - Temporary path management
10
+ """
11
+
12
+ from .allowed_checker import is_path_in_allowed_dirs
13
+ from .case_manager import CaseManager
14
+ from .errors import (
15
+ DirectoryNotFoundError,
16
+ PathSecurityError,
17
+ SecurityErrorReasons,
18
+ )
19
+ from .normalization import normalize_path
20
+ from .safe_joiner import safe_join
21
+ from .security_manager import SecurityManager
22
+
23
+ __all__ = [
24
+ "normalize_path",
25
+ "safe_join",
26
+ "is_path_in_allowed_dirs",
27
+ "CaseManager",
28
+ "PathSecurityError",
29
+ "DirectoryNotFoundError",
30
+ "SecurityErrorReasons",
31
+ "SecurityManager",
32
+ ]
@@ -0,0 +1,47 @@
1
+ """Allowed directory checker module.
2
+
3
+ This module provides functionality to verify that a given path is within
4
+ one of a set of allowed directories.
5
+ """
6
+
7
+ from pathlib import Path
8
+ from typing import List, Union
9
+
10
+ from .normalization import normalize_path
11
+
12
+
13
+ def is_path_in_allowed_dirs(
14
+ path: Union[str, Path], allowed_dirs: List[Path]
15
+ ) -> bool:
16
+ """Check if a given path is inside any of the allowed directories.
17
+
18
+ This function normalizes both the input path and allowed directories
19
+ before comparison to ensure consistent results across platforms.
20
+
21
+ Args:
22
+ path: The path to check.
23
+ allowed_dirs: A list of allowed directory paths.
24
+
25
+ Returns:
26
+ True if path is within one of the allowed directories; False otherwise.
27
+
28
+ Example:
29
+ >>> allowed = [Path("/base"), Path("/tmp")]
30
+ >>> is_path_in_allowed_dirs("/base/file.txt", allowed)
31
+ True
32
+ >>> is_path_in_allowed_dirs("/etc/passwd", allowed)
33
+ False
34
+ """
35
+ norm_path = normalize_path(path)
36
+ norm_allowed = [normalize_path(d) for d in allowed_dirs]
37
+
38
+ for allowed in norm_allowed:
39
+ try:
40
+ # If path.relative_to(allowed) does not raise an error,
41
+ # then path is within allowed.
42
+ norm_path.relative_to(allowed)
43
+ return True
44
+ except ValueError:
45
+ continue
46
+
47
+ return False
@@ -0,0 +1,75 @@
1
+ """Case management module.
2
+
3
+ This module provides a class for tracking and preserving the original case
4
+ of file paths on case-insensitive systems.
5
+ """
6
+
7
+ from pathlib import Path
8
+ from threading import Lock
9
+ from typing import Dict
10
+
11
+
12
+ class CaseManager:
13
+ """Manages original case preservation for paths.
14
+
15
+ This class provides a thread-safe way to track original case preservation
16
+ without modifying Path objects. This is particularly important on
17
+ case-insensitive systems (macOS, Windows) where we normalize paths
18
+ to lowercase but want to preserve the original case for display.
19
+
20
+ Example:
21
+ >>> CaseManager.set_original_case(Path("/tmp/file.txt"), "/TMP/File.txt")
22
+ >>> CaseManager.get_original_case(Path("/tmp/file.txt"))
23
+ '/TMP/File.txt'
24
+ """
25
+
26
+ _case_mapping: Dict[str, str] = {}
27
+ _lock = Lock()
28
+
29
+ @classmethod
30
+ def set_original_case(
31
+ cls, normalized_path: Path, original_case: str
32
+ ) -> None:
33
+ """Store the original case for a normalized path.
34
+
35
+ Args:
36
+ normalized_path: The normalized (potentially lowercased) Path.
37
+ original_case: The original path string with its original case.
38
+
39
+ Raises:
40
+ TypeError: If normalized_path or original_case is None.
41
+ """
42
+ if normalized_path is None:
43
+ raise TypeError("normalized_path cannot be None")
44
+ if original_case is None:
45
+ raise TypeError("original_case cannot be None")
46
+
47
+ with cls._lock:
48
+ cls._case_mapping[str(normalized_path)] = original_case
49
+
50
+ @classmethod
51
+ def get_original_case(cls, normalized_path: Path) -> str:
52
+ """Retrieve the original case for a normalized path.
53
+
54
+ Args:
55
+ normalized_path: The normalized Path.
56
+
57
+ Returns:
58
+ The original case string if stored; otherwise the normalized path string.
59
+
60
+ Raises:
61
+ TypeError: If normalized_path is None.
62
+ """
63
+ if normalized_path is None:
64
+ raise TypeError("normalized_path cannot be None")
65
+
66
+ with cls._lock:
67
+ return cls._case_mapping.get(
68
+ str(normalized_path), str(normalized_path)
69
+ )
70
+
71
+ @classmethod
72
+ def clear(cls) -> None:
73
+ """Clear all stored case mappings."""
74
+ with cls._lock:
75
+ cls._case_mapping.clear()
@@ -0,0 +1,184 @@
1
+ """Error definitions for the security package.
2
+
3
+ This module defines custom exceptions and error reason constants used throughout
4
+ the security modules.
5
+ """
6
+
7
+ from typing import Any, Dict, List, Optional
8
+
9
+
10
+ class PathSecurityError(Exception):
11
+ """Base exception for security-related errors.
12
+
13
+ This class provides rich error information for security-related issues,
14
+ including context and error wrapping capabilities.
15
+ """
16
+
17
+ def __init__(
18
+ self,
19
+ message: str,
20
+ path: str = "",
21
+ context: Optional[Dict[str, Any]] = None,
22
+ error_logged: bool = False,
23
+ ) -> None:
24
+ """Initialize the error.
25
+
26
+ Args:
27
+ message: The error message.
28
+ path: The path that caused the error.
29
+ context: Additional context about the error.
30
+ error_logged: Whether this error has already been logged.
31
+ """
32
+ super().__init__(message)
33
+ self.path = path
34
+ self.context = context or {}
35
+ self._error_logged = error_logged
36
+ self._wrapped = False
37
+
38
+ def __str__(self) -> str:
39
+ """Format the error message with context if available."""
40
+ msg = super().__str__()
41
+
42
+ # Add expanded path information if available
43
+ if self.context:
44
+ if (
45
+ "original_path" in self.context
46
+ and "expanded_path" in self.context
47
+ ):
48
+ msg = (
49
+ f"{msg}\n"
50
+ f"Original path: {self.context['original_path']}\n"
51
+ f"Expanded path: {self.context['expanded_path']}"
52
+ )
53
+ if "base_dir" in self.context:
54
+ msg = f"{msg}\nBase directory: {self.context['base_dir']}"
55
+ if "allowed_dirs" in self.context:
56
+ msg = f"{msg}\nAllowed directories: {self.context['allowed_dirs']!r}"
57
+
58
+ return msg
59
+
60
+ @property
61
+ def has_been_logged(self) -> bool:
62
+ """Whether this error has been logged."""
63
+ return self._error_logged
64
+
65
+ @has_been_logged.setter
66
+ def has_been_logged(self, value: bool) -> None:
67
+ """Set whether this error has been logged."""
68
+ self._error_logged = value
69
+
70
+ @property
71
+ def wrapped(self) -> bool:
72
+ """Whether this error is wrapping another error."""
73
+ return self._wrapped
74
+
75
+ def format_with_context(
76
+ self,
77
+ original_path: str,
78
+ expanded_path: str,
79
+ base_dir: str,
80
+ allowed_dirs: List[str],
81
+ ) -> str:
82
+ """Format the error message with additional context.
83
+
84
+ Args:
85
+ original_path: The original path that caused the error
86
+ expanded_path: The expanded/absolute path
87
+ base_dir: The base directory for security checks
88
+ allowed_dirs: List of allowed directories
89
+
90
+ Returns:
91
+ A formatted error message with context
92
+ """
93
+ lines = [
94
+ str(self),
95
+ f"Original path: {original_path}",
96
+ f"Expanded path: {expanded_path}",
97
+ f"Base directory: {base_dir}",
98
+ f"Allowed directories: {allowed_dirs}",
99
+ "Use --allowed-dir to add more allowed directories",
100
+ ]
101
+ return "\n".join(lines)
102
+
103
+ @classmethod
104
+ def wrap_error(
105
+ cls, message: str, original: "PathSecurityError"
106
+ ) -> "PathSecurityError":
107
+ """Wrap an existing error with additional context.
108
+
109
+ Args:
110
+ message: The new error message
111
+ original: The original error to wrap
112
+
113
+ Returns:
114
+ A new PathSecurityError instance wrapping the original
115
+ """
116
+ wrapped = cls(
117
+ f"{message}: {str(original)}",
118
+ path=original.path,
119
+ context=original.context,
120
+ error_logged=original.has_been_logged,
121
+ )
122
+ wrapped._wrapped = True
123
+ return wrapped
124
+
125
+ @classmethod
126
+ def from_expanded_paths(
127
+ cls,
128
+ original_path: str,
129
+ expanded_path: str,
130
+ base_dir: str,
131
+ allowed_dirs: List[str],
132
+ error_logged: bool = False,
133
+ ) -> "PathSecurityError":
134
+ """Create an error instance with expanded path information.
135
+
136
+ Args:
137
+ original_path: The original path that caused the error
138
+ expanded_path: The expanded/absolute path
139
+ base_dir: The base directory for security checks
140
+ allowed_dirs: List of allowed directories
141
+ error_logged: Whether this error has already been logged
142
+
143
+ Returns:
144
+ A new PathSecurityError instance with expanded path context
145
+ """
146
+ message = f"Path '{original_path}' is outside the base directory and not in allowed directories"
147
+ context = {
148
+ "original_path": original_path,
149
+ "expanded_path": expanded_path,
150
+ "base_dir": base_dir,
151
+ "allowed_dirs": allowed_dirs,
152
+ }
153
+ return cls(
154
+ message,
155
+ path=original_path,
156
+ context=context,
157
+ error_logged=error_logged,
158
+ )
159
+
160
+
161
+ class DirectoryNotFoundError(PathSecurityError):
162
+ """Raised when a directory that is expected to exist does not."""
163
+
164
+
165
+ class SecurityErrorReasons:
166
+ """Constants for common security error reasons."""
167
+
168
+ # Path validation errors
169
+ PATH_TRAVERSAL = "path_traversal"
170
+ UNSAFE_UNICODE = "unsafe_unicode"
171
+ NORMALIZATION_ERROR = "normalization_error"
172
+ CASE_MISMATCH = "case_mismatch"
173
+
174
+ # Symlink-related errors
175
+ SYMLINK_LOOP = "symlink_loop"
176
+ SYMLINK_ERROR = "symlink_error"
177
+ SYMLINK_TARGET_NOT_ALLOWED = "symlink_target_not_allowed"
178
+ SYMLINK_MAX_DEPTH = "symlink_max_depth"
179
+ SYMLINK_BROKEN = "symlink_broken"
180
+
181
+ # Directory access errors
182
+ PATH_NOT_IN_BASE = "path_not_in_base"
183
+ PATH_OUTSIDE_ALLOWED = "path_outside_allowed"
184
+ TEMP_PATHS_NOT_ALLOWED = "temp_paths_not_allowed"
@@ -0,0 +1,161 @@
1
+ """Path normalization module.
2
+
3
+ This module provides functions to normalize file paths by:
4
+ - Performing Unicode normalization (NFKC)
5
+ - Normalizing path separators and redundant parts
6
+ - Converting relative paths to absolute paths
7
+ - Validating Unicode safety
8
+
9
+ Security Design Choices:
10
+ 1. Unicode Normalization:
11
+ - Uses NFKC form for maximum compatibility
12
+ - Blocks known unsafe Unicode characters
13
+ - Basic protection against homograph attacks
14
+ - Does not handle all possible confusable sequences
15
+
16
+ 2. Path Separators:
17
+ - Normalizes to forward slashes
18
+ - Collapses multiple slashes
19
+ - Converts backslashes on all platforms
20
+ - Note: This breaks Windows UNC and device paths
21
+
22
+ 3. Parent Directory References:
23
+ - Allows ".." components in raw input
24
+ - Security checks done after path resolution
25
+ - Directory traversal prevented by final path validation
26
+
27
+ 4. Absolute Paths:
28
+ - Converts relative to absolute using CWD
29
+ - No environment variable expansion
30
+ - No home directory (~) expansion
31
+ - Thread-safety warning for CWD operations
32
+
33
+ Known Limitations:
34
+ 1. Windows-Specific:
35
+ - UNC paths (r"\\\\server\\share") break when normalized
36
+ - Device paths (r"\\\\?\\", r"\\\\.") become invalid
37
+ - Drive-relative paths may resolve incorrectly
38
+ - Reserved names (CON, NUL, etc.) not handled
39
+ - ADS (:stream) not detected
40
+ - Case sensitivity not handled (delegated to CaseManager)
41
+
42
+ 2. Unicode Handling:
43
+ - Some confusable characters may pass checks
44
+ - Zero-width characters not fully covered
45
+ - Advanced homograph attacks possible
46
+ - Duplicate entries in safety pattern need review
47
+
48
+ 3. Threading:
49
+ - CWD operations not thread-safe
50
+ - Race conditions possible during path resolution
51
+ """
52
+
53
+ import os
54
+ import re
55
+ import unicodedata
56
+ from pathlib import Path
57
+ from typing import Union
58
+
59
+ from .errors import PathSecurityError, SecurityErrorReasons
60
+
61
+ # Patterns for path normalization and validation
62
+ _UNICODE_SAFETY_PATTERN = re.compile(
63
+ r"[\u0000-\u001F\u007F-\u009F\u2028-\u2029\u0085]" # Control chars and line separators
64
+ r"|\.{2,}" # Directory traversal attempts
65
+ r"|[\u2024\u2025\uFE52\u2024\u2025\u2026\uFE19\uFE30\uFE52\uFF0E\uFF61]" # Alternative dots and separators
66
+ )
67
+ _BACKSLASH_PATTERN = re.compile(r"\\")
68
+ _MULTIPLE_SLASH_PATTERN = re.compile(r"/+")
69
+
70
+
71
+ def normalize_path(path: Union[str, Path]) -> Path:
72
+ """Normalize a path string with security checks.
73
+
74
+ This function:
75
+ 1. Converts to Unicode NFKC form
76
+ 2. Checks for unsafe Unicode characters
77
+ 3. Normalizes path separators
78
+ 4. Uses os.path.normpath to collapse redundant separators and dots
79
+ 5. Converts to absolute path if needed
80
+ 6. Returns a pathlib.Path object
81
+
82
+ Security Features:
83
+ - Unicode NFKC normalization
84
+ - Blocks unsafe Unicode characters
85
+ - Normalizes path separators
86
+ - Converts to absolute paths
87
+
88
+ Design Choices:
89
+ - No environment variable expansion
90
+ - No home directory (~) expansion
91
+ - No symlink resolution (handled separately)
92
+ - Case sensitivity handled by CaseManager
93
+ - Thread-safety warning: CWD operations are not atomic
94
+
95
+ Args:
96
+ path: A string or Path object representing a file path.
97
+
98
+ Returns:
99
+ A pathlib.Path object for the normalized absolute path.
100
+
101
+ Raises:
102
+ PathSecurityError: If the path contains unsafe Unicode characters.
103
+ TypeError: If path is None.
104
+
105
+ Note:
106
+ This function has known limitations with Windows paths:
107
+ - UNC paths are not properly handled
108
+ - Device paths are not supported
109
+ - Drive-relative paths may resolve incorrectly
110
+ - Reserved names are not checked
111
+ - ADS is not detected
112
+ """
113
+ if path is None:
114
+ raise TypeError("Path cannot be None")
115
+
116
+ path_str = str(path)
117
+
118
+ # Unicode normalization
119
+ try:
120
+ normalized = unicodedata.normalize("NFKC", path_str)
121
+ except Exception as e:
122
+ raise PathSecurityError(
123
+ "Unicode normalization failed",
124
+ path=path_str,
125
+ context={
126
+ "reason": SecurityErrorReasons.UNSAFE_UNICODE,
127
+ "error": str(e),
128
+ },
129
+ ) from e
130
+
131
+ # Check for unsafe characters and directory traversal
132
+ if match := _UNICODE_SAFETY_PATTERN.search(normalized):
133
+ matched_text = match.group(0)
134
+ if ".." in matched_text:
135
+ raise PathSecurityError(
136
+ "Directory traversal not allowed",
137
+ path=path_str,
138
+ context={
139
+ "reason": SecurityErrorReasons.PATH_TRAVERSAL,
140
+ "matched": matched_text,
141
+ },
142
+ )
143
+ else:
144
+ raise PathSecurityError(
145
+ "Path contains unsafe characters",
146
+ path=path_str,
147
+ context={
148
+ "reason": SecurityErrorReasons.UNSAFE_UNICODE,
149
+ "matched": matched_text,
150
+ },
151
+ )
152
+
153
+ # Normalize path separators
154
+ normalized = _BACKSLASH_PATTERN.sub("/", normalized)
155
+ normalized = _MULTIPLE_SLASH_PATTERN.sub("/", normalized)
156
+
157
+ # Convert to absolute path if needed
158
+ if not os.path.isabs(normalized):
159
+ normalized = os.path.abspath(normalized)
160
+
161
+ return Path(normalized)