ostruct-cli 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,184 @@
1
+ """Error definitions for the security package.
2
+
3
+ This module defines custom exceptions and error reason constants used throughout
4
+ the security modules.
5
+ """
6
+
7
+ from typing import Any, Dict, List, Optional
8
+
9
+
10
+ class PathSecurityError(Exception):
11
+ """Base exception for security-related errors.
12
+
13
+ This class provides rich error information for security-related issues,
14
+ including context and error wrapping capabilities.
15
+ """
16
+
17
+ def __init__(
18
+ self,
19
+ message: str,
20
+ path: str = "",
21
+ context: Optional[Dict[str, Any]] = None,
22
+ error_logged: bool = False,
23
+ ) -> None:
24
+ """Initialize the error.
25
+
26
+ Args:
27
+ message: The error message.
28
+ path: The path that caused the error.
29
+ context: Additional context about the error.
30
+ error_logged: Whether this error has already been logged.
31
+ """
32
+ super().__init__(message)
33
+ self.path = path
34
+ self.context = context or {}
35
+ self._error_logged = error_logged
36
+ self._wrapped = False
37
+
38
+ def __str__(self) -> str:
39
+ """Format the error message with context if available."""
40
+ msg = super().__str__()
41
+
42
+ # Add expanded path information if available
43
+ if self.context:
44
+ if (
45
+ "original_path" in self.context
46
+ and "expanded_path" in self.context
47
+ ):
48
+ msg = (
49
+ f"{msg}\n"
50
+ f"Original path: {self.context['original_path']}\n"
51
+ f"Expanded path: {self.context['expanded_path']}"
52
+ )
53
+ if "base_dir" in self.context:
54
+ msg = f"{msg}\nBase directory: {self.context['base_dir']}"
55
+ if "allowed_dirs" in self.context:
56
+ msg = f"{msg}\nAllowed directories: {self.context['allowed_dirs']!r}"
57
+
58
+ return msg
59
+
60
+ @property
61
+ def has_been_logged(self) -> bool:
62
+ """Whether this error has been logged."""
63
+ return self._error_logged
64
+
65
+ @has_been_logged.setter
66
+ def has_been_logged(self, value: bool) -> None:
67
+ """Set whether this error has been logged."""
68
+ self._error_logged = value
69
+
70
+ @property
71
+ def wrapped(self) -> bool:
72
+ """Whether this error is wrapping another error."""
73
+ return self._wrapped
74
+
75
+ def format_with_context(
76
+ self,
77
+ original_path: str,
78
+ expanded_path: str,
79
+ base_dir: str,
80
+ allowed_dirs: List[str],
81
+ ) -> str:
82
+ """Format the error message with additional context.
83
+
84
+ Args:
85
+ original_path: The original path that caused the error
86
+ expanded_path: The expanded/absolute path
87
+ base_dir: The base directory for security checks
88
+ allowed_dirs: List of allowed directories
89
+
90
+ Returns:
91
+ A formatted error message with context
92
+ """
93
+ lines = [
94
+ str(self),
95
+ f"Original path: {original_path}",
96
+ f"Expanded path: {expanded_path}",
97
+ f"Base directory: {base_dir}",
98
+ f"Allowed directories: {allowed_dirs}",
99
+ "Use --allowed-dir to add more allowed directories",
100
+ ]
101
+ return "\n".join(lines)
102
+
103
+ @classmethod
104
+ def wrap_error(
105
+ cls, message: str, original: "PathSecurityError"
106
+ ) -> "PathSecurityError":
107
+ """Wrap an existing error with additional context.
108
+
109
+ Args:
110
+ message: The new error message
111
+ original: The original error to wrap
112
+
113
+ Returns:
114
+ A new PathSecurityError instance wrapping the original
115
+ """
116
+ wrapped = cls(
117
+ f"{message}: {str(original)}",
118
+ path=original.path,
119
+ context=original.context,
120
+ error_logged=original.has_been_logged,
121
+ )
122
+ wrapped._wrapped = True
123
+ return wrapped
124
+
125
+ @classmethod
126
+ def from_expanded_paths(
127
+ cls,
128
+ original_path: str,
129
+ expanded_path: str,
130
+ base_dir: str,
131
+ allowed_dirs: List[str],
132
+ error_logged: bool = False,
133
+ ) -> "PathSecurityError":
134
+ """Create an error instance with expanded path information.
135
+
136
+ Args:
137
+ original_path: The original path that caused the error
138
+ expanded_path: The expanded/absolute path
139
+ base_dir: The base directory for security checks
140
+ allowed_dirs: List of allowed directories
141
+ error_logged: Whether this error has already been logged
142
+
143
+ Returns:
144
+ A new PathSecurityError instance with expanded path context
145
+ """
146
+ message = f"Path '{original_path}' is outside the base directory and not in allowed directories"
147
+ context = {
148
+ "original_path": original_path,
149
+ "expanded_path": expanded_path,
150
+ "base_dir": base_dir,
151
+ "allowed_dirs": allowed_dirs,
152
+ }
153
+ return cls(
154
+ message,
155
+ path=original_path,
156
+ context=context,
157
+ error_logged=error_logged,
158
+ )
159
+
160
+
161
+ class DirectoryNotFoundError(PathSecurityError):
162
+ """Raised when a directory that is expected to exist does not."""
163
+
164
+
165
+ class SecurityErrorReasons:
166
+ """Constants for common security error reasons."""
167
+
168
+ # Path validation errors
169
+ PATH_TRAVERSAL = "path_traversal"
170
+ UNSAFE_UNICODE = "unsafe_unicode"
171
+ NORMALIZATION_ERROR = "normalization_error"
172
+ CASE_MISMATCH = "case_mismatch"
173
+
174
+ # Symlink-related errors
175
+ SYMLINK_LOOP = "symlink_loop"
176
+ SYMLINK_ERROR = "symlink_error"
177
+ SYMLINK_TARGET_NOT_ALLOWED = "symlink_target_not_allowed"
178
+ SYMLINK_MAX_DEPTH = "symlink_max_depth"
179
+ SYMLINK_BROKEN = "symlink_broken"
180
+
181
+ # Directory access errors
182
+ PATH_NOT_IN_BASE = "path_not_in_base"
183
+ PATH_OUTSIDE_ALLOWED = "path_outside_allowed"
184
+ TEMP_PATHS_NOT_ALLOWED = "temp_paths_not_allowed"
@@ -0,0 +1,161 @@
1
+ """Path normalization module.
2
+
3
+ This module provides functions to normalize file paths by:
4
+ - Performing Unicode normalization (NFKC)
5
+ - Normalizing path separators and redundant parts
6
+ - Converting relative paths to absolute paths
7
+ - Validating Unicode safety
8
+
9
+ Security Design Choices:
10
+ 1. Unicode Normalization:
11
+ - Uses NFKC form for maximum compatibility
12
+ - Blocks known unsafe Unicode characters
13
+ - Basic protection against homograph attacks
14
+ - Does not handle all possible confusable sequences
15
+
16
+ 2. Path Separators:
17
+ - Normalizes to forward slashes
18
+ - Collapses multiple slashes
19
+ - Converts backslashes on all platforms
20
+ - Note: This breaks Windows UNC and device paths
21
+
22
+ 3. Parent Directory References:
23
+ - Allows ".." components in raw input
24
+ - Security checks done after path resolution
25
+ - Directory traversal prevented by final path validation
26
+
27
+ 4. Absolute Paths:
28
+ - Converts relative to absolute using CWD
29
+ - No environment variable expansion
30
+ - No home directory (~) expansion
31
+ - Thread-safety warning for CWD operations
32
+
33
+ Known Limitations:
34
+ 1. Windows-Specific:
35
+ - UNC paths (r"\\\\server\\share") break when normalized
36
+ - Device paths (r"\\\\?\\", r"\\\\.") become invalid
37
+ - Drive-relative paths may resolve incorrectly
38
+ - Reserved names (CON, NUL, etc.) not handled
39
+ - ADS (:stream) not detected
40
+ - Case sensitivity not handled (delegated to CaseManager)
41
+
42
+ 2. Unicode Handling:
43
+ - Some confusable characters may pass checks
44
+ - Zero-width characters not fully covered
45
+ - Advanced homograph attacks possible
46
+ - Duplicate entries in safety pattern need review
47
+
48
+ 3. Threading:
49
+ - CWD operations not thread-safe
50
+ - Race conditions possible during path resolution
51
+ """
52
+
53
+ import os
54
+ import re
55
+ import unicodedata
56
+ from pathlib import Path
57
+ from typing import Union
58
+
59
+ from .errors import PathSecurityError, SecurityErrorReasons
60
+
61
+ # Patterns for path normalization and validation
62
+ _UNICODE_SAFETY_PATTERN = re.compile(
63
+ r"[\u0000-\u001F\u007F-\u009F\u2028-\u2029\u0085]" # Control chars and line separators
64
+ r"|\.{2,}" # Directory traversal attempts
65
+ r"|[\u2024\u2025\uFE52\u2024\u2025\u2026\uFE19\uFE30\uFE52\uFF0E\uFF61]" # Alternative dots and separators
66
+ )
67
+ _BACKSLASH_PATTERN = re.compile(r"\\")
68
+ _MULTIPLE_SLASH_PATTERN = re.compile(r"/+")
69
+
70
+
71
+ def normalize_path(path: Union[str, Path]) -> Path:
72
+ """Normalize a path string with security checks.
73
+
74
+ This function:
75
+ 1. Converts to Unicode NFKC form
76
+ 2. Checks for unsafe Unicode characters
77
+ 3. Normalizes path separators
78
+ 4. Uses os.path.normpath to collapse redundant separators and dots
79
+ 5. Converts to absolute path if needed
80
+ 6. Returns a pathlib.Path object
81
+
82
+ Security Features:
83
+ - Unicode NFKC normalization
84
+ - Blocks unsafe Unicode characters
85
+ - Normalizes path separators
86
+ - Converts to absolute paths
87
+
88
+ Design Choices:
89
+ - No environment variable expansion
90
+ - No home directory (~) expansion
91
+ - No symlink resolution (handled separately)
92
+ - Case sensitivity handled by CaseManager
93
+ - Thread-safety warning: CWD operations are not atomic
94
+
95
+ Args:
96
+ path: A string or Path object representing a file path.
97
+
98
+ Returns:
99
+ A pathlib.Path object for the normalized absolute path.
100
+
101
+ Raises:
102
+ PathSecurityError: If the path contains unsafe Unicode characters.
103
+ TypeError: If path is None.
104
+
105
+ Note:
106
+ This function has known limitations with Windows paths:
107
+ - UNC paths are not properly handled
108
+ - Device paths are not supported
109
+ - Drive-relative paths may resolve incorrectly
110
+ - Reserved names are not checked
111
+ - ADS is not detected
112
+ """
113
+ if path is None:
114
+ raise TypeError("Path cannot be None")
115
+
116
+ path_str = str(path)
117
+
118
+ # Unicode normalization
119
+ try:
120
+ normalized = unicodedata.normalize("NFKC", path_str)
121
+ except Exception as e:
122
+ raise PathSecurityError(
123
+ "Unicode normalization failed",
124
+ path=path_str,
125
+ context={
126
+ "reason": SecurityErrorReasons.UNSAFE_UNICODE,
127
+ "error": str(e),
128
+ },
129
+ ) from e
130
+
131
+ # Check for unsafe characters and directory traversal
132
+ if match := _UNICODE_SAFETY_PATTERN.search(normalized):
133
+ matched_text = match.group(0)
134
+ if ".." in matched_text:
135
+ raise PathSecurityError(
136
+ "Directory traversal not allowed",
137
+ path=path_str,
138
+ context={
139
+ "reason": SecurityErrorReasons.PATH_TRAVERSAL,
140
+ "matched": matched_text,
141
+ },
142
+ )
143
+ else:
144
+ raise PathSecurityError(
145
+ "Path contains unsafe characters",
146
+ path=path_str,
147
+ context={
148
+ "reason": SecurityErrorReasons.UNSAFE_UNICODE,
149
+ "matched": matched_text,
150
+ },
151
+ )
152
+
153
+ # Normalize path separators
154
+ normalized = _BACKSLASH_PATTERN.sub("/", normalized)
155
+ normalized = _MULTIPLE_SLASH_PATTERN.sub("/", normalized)
156
+
157
+ # Convert to absolute path if needed
158
+ if not os.path.isabs(normalized):
159
+ normalized = os.path.abspath(normalized)
160
+
161
+ return Path(normalized)
@@ -0,0 +1,211 @@
1
+ """Safe path joining module.
2
+
3
+ This module provides a safe_join function that is inspired by Werkzeug's safe_join.
4
+ It safely joins untrusted path components to a trusted base directory while avoiding
5
+ directory traversal issues.
6
+
7
+ Security Design Choices:
8
+ 1. Parent Directory (..) References:
9
+ - Explicitly blocked for security, even in "safe" contexts
10
+ - This is a deliberate choice to prevent directory traversal
11
+ - No exceptions are made, even for legitimate uses
12
+
13
+ 2. Environment Variables:
14
+ - No expansion of environment variables (%VAR%, $HOME)
15
+ - Must be handled explicitly at a higher level if needed
16
+ - Prevents unexpected path resolution
17
+
18
+ 3. Home Directory:
19
+ - No expansion of ~ (tilde)
20
+ - Must be expanded explicitly before passing to this function
21
+ - Prevents unexpected user directory access
22
+
23
+ 4. Symlinks:
24
+ - Not resolved in this module
25
+ - Handled separately by the resolve_symlink function
26
+ - Allows for explicit symlink security policies
27
+
28
+ 5. Case Sensitivity:
29
+ - Basic normalization only
30
+ - Full case handling delegated to CaseManager
31
+ - Ensures consistent cross-platform behavior
32
+
33
+ Known Limitations:
34
+ 1. Windows-Specific:
35
+ - UNC paths (r"\\\\server\\share") are handled but must be complete
36
+ - Device paths (r"\\\\?\\", r"\\\\.") are rejected for security
37
+ - Drive-relative paths (C:folder) must be absolute
38
+ - Reserved names (CON, NUL, etc.) are rejected
39
+ - Alternate Data Streams (:stream) are rejected
40
+
41
+ 2. Unicode:
42
+ - Basic NFKC normalization only
43
+ - Some confusable characters may not be detected
44
+ - Advanced homograph attack prevention requires additional checks
45
+
46
+ 3. Threading:
47
+ - Current working directory calls are not thread-safe
48
+ - Race conditions possible if CWD changes during execution
49
+ """
50
+
51
+ import os
52
+ import posixpath
53
+ import re
54
+ from typing import Optional
55
+
56
+ # Compute alternative separators (if any) that differ from "/"
57
+ _os_alt_seps = list(
58
+ {sep for sep in [os.path.sep, os.path.altsep] if sep and sep != "/"}
59
+ )
60
+
61
+ # Windows-specific patterns
62
+ _WINDOWS_DEVICE_PATH = re.compile(r"^\\\\[?.]\\") # \\?\ and \\.\ paths
63
+ _WINDOWS_DRIVE_RELATIVE = re.compile(
64
+ r"^[A-Za-z]:(?![/\\])"
65
+ ) # C:folder (no slash)
66
+ _WINDOWS_RESERVED_NAMES = re.compile(
67
+ r"^(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])(?:\.|$)", re.IGNORECASE
68
+ )
69
+ _WINDOWS_UNC = re.compile(r"^\\\\[^?./\\]") # UNC but not device paths
70
+ _WINDOWS_ADS = re.compile(r":.+$") # Alternate Data Streams
71
+
72
+
73
+ def safe_join(directory: str, *pathnames: str) -> Optional[str]:
74
+ """Safely join zero or more untrusted path components to a trusted base directory.
75
+
76
+ This function is inspired by Werkzeug's safe_join and ensures that the
77
+ resulting path is always within the base directory, preventing directory
78
+ traversal attacks.
79
+
80
+ Security Features:
81
+ - Rejects absolute path components
82
+ - Blocks all parent directory references (..)
83
+ - Normalizes path separators to forward slashes
84
+ - Performs final containment check against base directory
85
+ - Handles Windows-specific security concerns:
86
+ * Rejects device paths (r"\\\\?\\", r"\\\\.")
87
+ * Rejects relative drive paths (C:folder)
88
+ * Rejects reserved names (CON, PRN, etc.)
89
+ * Rejects Alternate Data Streams
90
+ * Safely handles UNC paths
91
+
92
+ Design Choices:
93
+ - No environment variable expansion
94
+ - No home directory (~) expansion
95
+ - No symlink resolution (handled separately)
96
+ - Case sensitivity handled by CaseManager
97
+ - Thread-safety warning: CWD operations are not atomic
98
+
99
+ Args:
100
+ directory: The trusted base directory.
101
+ pathnames: Untrusted path components relative to the base directory.
102
+
103
+ Returns:
104
+ A safe path as a string if successful; otherwise, None.
105
+
106
+ Example:
107
+ >>> safe_join("/base", "subdir", "file.txt")
108
+ '/base/subdir/file.txt'
109
+ >>> safe_join("/base", "../etc/passwd")
110
+ None
111
+ """
112
+ if not directory and not pathnames:
113
+ return None
114
+
115
+ if not directory:
116
+ directory = "."
117
+
118
+ # Handle None values in pathnames
119
+ if any(p is None for p in pathnames):
120
+ return None
121
+
122
+ # Convert and normalize base directory
123
+ directory = str(directory)
124
+ directory = directory.replace("\\", "/")
125
+ base_dir = posixpath.normpath(directory)
126
+
127
+ # Windows-specific base directory checks
128
+ if os.name == "nt":
129
+ # Check for device paths
130
+ if _WINDOWS_DEVICE_PATH.search(base_dir):
131
+ return None
132
+ # Check for relative drive paths
133
+ if _WINDOWS_DRIVE_RELATIVE.search(base_dir):
134
+ return None
135
+ # Check for reserved names
136
+ if _WINDOWS_RESERVED_NAMES.search(base_dir):
137
+ return None
138
+ # Check for ADS
139
+ if _WINDOWS_ADS.search(base_dir):
140
+ return None
141
+ # Handle UNC paths - must be complete
142
+ if _WINDOWS_UNC.search(base_dir):
143
+ if base_dir.count("/") < 3: # Needs server and share
144
+ return None
145
+
146
+ # Process and validate each component
147
+ normalized_parts = []
148
+ for filename in pathnames:
149
+ if filename == "":
150
+ continue
151
+
152
+ # Convert to string and normalize separators
153
+ filename = str(filename)
154
+ filename = filename.replace("\\", "/")
155
+
156
+ # Windows-specific component checks
157
+ if os.name == "nt":
158
+ # Check for device paths
159
+ if _WINDOWS_DEVICE_PATH.search(filename):
160
+ return None
161
+ # Check for relative drive paths
162
+ if _WINDOWS_DRIVE_RELATIVE.search(filename):
163
+ return None
164
+ # Check for reserved names
165
+ if _WINDOWS_RESERVED_NAMES.search(filename):
166
+ return None
167
+ # Check for ADS
168
+ if _WINDOWS_ADS.search(filename):
169
+ return None
170
+ # Reject UNC in components
171
+ if _WINDOWS_UNC.search(filename):
172
+ return None
173
+
174
+ # Reject absolute paths and parent directory traversal
175
+ if (
176
+ filename.startswith("/")
177
+ or filename == ".."
178
+ or filename.startswith("../")
179
+ or filename.endswith("/..")
180
+ or "/../" in filename
181
+ ):
182
+ return None
183
+
184
+ # Normalize the component
185
+ normalized = posixpath.normpath(filename)
186
+ if normalized == ".":
187
+ continue
188
+ normalized_parts.append(normalized)
189
+
190
+ # Join all parts
191
+ if not normalized_parts:
192
+ result = base_dir
193
+ else:
194
+ result = posixpath.join(base_dir, *normalized_parts)
195
+
196
+ # Final security check on the complete path
197
+ normalized_result = posixpath.normpath(result)
198
+ if not normalized_result.startswith(base_dir):
199
+ return None
200
+
201
+ # Final Windows-specific checks on complete path
202
+ if os.name == "nt":
203
+ # Check for ADS in final path
204
+ if _WINDOWS_ADS.search(normalized_result):
205
+ return None
206
+ # Check for reserved names in any component
207
+ path_parts = normalized_result.split("/")
208
+ if any(_WINDOWS_RESERVED_NAMES.search(part) for part in path_parts):
209
+ return None
210
+
211
+ return normalized_result