ostruct-cli 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,211 @@
1
+ """Safe path joining module.
2
+
3
+ This module provides a safe_join function that is inspired by Werkzeug's safe_join.
4
+ It safely joins untrusted path components to a trusted base directory while avoiding
5
+ directory traversal issues.
6
+
7
+ Security Design Choices:
8
+ 1. Parent Directory (..) References:
9
+ - Explicitly blocked for security, even in "safe" contexts
10
+ - This is a deliberate choice to prevent directory traversal
11
+ - No exceptions are made, even for legitimate uses
12
+
13
+ 2. Environment Variables:
14
+ - No expansion of environment variables (%VAR%, $HOME)
15
+ - Must be handled explicitly at a higher level if needed
16
+ - Prevents unexpected path resolution
17
+
18
+ 3. Home Directory:
19
+ - No expansion of ~ (tilde)
20
+ - Must be expanded explicitly before passing to this function
21
+ - Prevents unexpected user directory access
22
+
23
+ 4. Symlinks:
24
+ - Not resolved in this module
25
+ - Handled separately by the resolve_symlink function
26
+ - Allows for explicit symlink security policies
27
+
28
+ 5. Case Sensitivity:
29
+ - Basic normalization only
30
+ - Full case handling delegated to CaseManager
31
+ - Ensures consistent cross-platform behavior
32
+
33
+ Known Limitations:
34
+ 1. Windows-Specific:
35
+ - UNC paths (r"\\\\server\\share") are handled but must be complete
36
+ - Device paths (r"\\\\?\\", r"\\\\.") are rejected for security
37
+ - Drive-relative paths (C:folder) must be absolute
38
+ - Reserved names (CON, NUL, etc.) are rejected
39
+ - Alternate Data Streams (:stream) are rejected
40
+
41
+ 2. Unicode:
42
+ - Basic NFKC normalization only
43
+ - Some confusable characters may not be detected
44
+ - Advanced homograph attack prevention requires additional checks
45
+
46
+ 3. Threading:
47
+ - Current working directory calls are not thread-safe
48
+ - Race conditions possible if CWD changes during execution
49
+ """
50
+
51
+ import os
52
+ import posixpath
53
+ import re
54
+ from typing import Optional
55
+
56
+ # Compute alternative separators (if any) that differ from "/"
57
+ _os_alt_seps = list(
58
+ {sep for sep in [os.path.sep, os.path.altsep] if sep and sep != "/"}
59
+ )
60
+
61
+ # Windows-specific patterns
62
+ _WINDOWS_DEVICE_PATH = re.compile(r"^\\\\[?.]\\") # \\?\ and \\.\ paths
63
+ _WINDOWS_DRIVE_RELATIVE = re.compile(
64
+ r"^[A-Za-z]:(?![/\\])"
65
+ ) # C:folder (no slash)
66
+ _WINDOWS_RESERVED_NAMES = re.compile(
67
+ r"^(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])(?:\.|$)", re.IGNORECASE
68
+ )
69
+ _WINDOWS_UNC = re.compile(r"^\\\\[^?./\\]") # UNC but not device paths
70
+ _WINDOWS_ADS = re.compile(r":.+$") # Alternate Data Streams
71
+
72
+
73
+ def safe_join(directory: str, *pathnames: str) -> Optional[str]:
74
+ """Safely join zero or more untrusted path components to a trusted base directory.
75
+
76
+ This function is inspired by Werkzeug's safe_join and ensures that the
77
+ resulting path is always within the base directory, preventing directory
78
+ traversal attacks.
79
+
80
+ Security Features:
81
+ - Rejects absolute path components
82
+ - Blocks all parent directory references (..)
83
+ - Normalizes path separators to forward slashes
84
+ - Performs final containment check against base directory
85
+ - Handles Windows-specific security concerns:
86
+ * Rejects device paths (r"\\\\?\\", r"\\\\.")
87
+ * Rejects relative drive paths (C:folder)
88
+ * Rejects reserved names (CON, PRN, etc.)
89
+ * Rejects Alternate Data Streams
90
+ * Safely handles UNC paths
91
+
92
+ Design Choices:
93
+ - No environment variable expansion
94
+ - No home directory (~) expansion
95
+ - No symlink resolution (handled separately)
96
+ - Case sensitivity handled by CaseManager
97
+ - Thread-safety warning: CWD operations are not atomic
98
+
99
+ Args:
100
+ directory: The trusted base directory.
101
+ pathnames: Untrusted path components relative to the base directory.
102
+
103
+ Returns:
104
+ A safe path as a string if successful; otherwise, None.
105
+
106
+ Example:
107
+ >>> safe_join("/base", "subdir", "file.txt")
108
+ '/base/subdir/file.txt'
109
+ >>> safe_join("/base", "../etc/passwd")
110
+ None
111
+ """
112
+ if not directory and not pathnames:
113
+ return None
114
+
115
+ if not directory:
116
+ directory = "."
117
+
118
+ # Handle None values in pathnames
119
+ if any(p is None for p in pathnames):
120
+ return None
121
+
122
+ # Convert and normalize base directory
123
+ directory = str(directory)
124
+ directory = directory.replace("\\", "/")
125
+ base_dir = posixpath.normpath(directory)
126
+
127
+ # Windows-specific base directory checks
128
+ if os.name == "nt":
129
+ # Check for device paths
130
+ if _WINDOWS_DEVICE_PATH.search(base_dir):
131
+ return None
132
+ # Check for relative drive paths
133
+ if _WINDOWS_DRIVE_RELATIVE.search(base_dir):
134
+ return None
135
+ # Check for reserved names
136
+ if _WINDOWS_RESERVED_NAMES.search(base_dir):
137
+ return None
138
+ # Check for ADS
139
+ if _WINDOWS_ADS.search(base_dir):
140
+ return None
141
+ # Handle UNC paths - must be complete
142
+ if _WINDOWS_UNC.search(base_dir):
143
+ if base_dir.count("/") < 3: # Needs server and share
144
+ return None
145
+
146
+ # Process and validate each component
147
+ normalized_parts = []
148
+ for filename in pathnames:
149
+ if filename == "":
150
+ continue
151
+
152
+ # Convert to string and normalize separators
153
+ filename = str(filename)
154
+ filename = filename.replace("\\", "/")
155
+
156
+ # Windows-specific component checks
157
+ if os.name == "nt":
158
+ # Check for device paths
159
+ if _WINDOWS_DEVICE_PATH.search(filename):
160
+ return None
161
+ # Check for relative drive paths
162
+ if _WINDOWS_DRIVE_RELATIVE.search(filename):
163
+ return None
164
+ # Check for reserved names
165
+ if _WINDOWS_RESERVED_NAMES.search(filename):
166
+ return None
167
+ # Check for ADS
168
+ if _WINDOWS_ADS.search(filename):
169
+ return None
170
+ # Reject UNC in components
171
+ if _WINDOWS_UNC.search(filename):
172
+ return None
173
+
174
+ # Reject absolute paths and parent directory traversal
175
+ if (
176
+ filename.startswith("/")
177
+ or filename == ".."
178
+ or filename.startswith("../")
179
+ or filename.endswith("/..")
180
+ or "/../" in filename
181
+ ):
182
+ return None
183
+
184
+ # Normalize the component
185
+ normalized = posixpath.normpath(filename)
186
+ if normalized == ".":
187
+ continue
188
+ normalized_parts.append(normalized)
189
+
190
+ # Join all parts
191
+ if not normalized_parts:
192
+ result = base_dir
193
+ else:
194
+ result = posixpath.join(base_dir, *normalized_parts)
195
+
196
+ # Final security check on the complete path
197
+ normalized_result = posixpath.normpath(result)
198
+ if not normalized_result.startswith(base_dir):
199
+ return None
200
+
201
+ # Final Windows-specific checks on complete path
202
+ if os.name == "nt":
203
+ # Check for ADS in final path
204
+ if _WINDOWS_ADS.search(normalized_result):
205
+ return None
206
+ # Check for reserved names in any component
207
+ path_parts = normalized_result.split("/")
208
+ if any(_WINDOWS_RESERVED_NAMES.search(part) for part in path_parts):
209
+ return None
210
+
211
+ return normalized_result
@@ -0,0 +1,353 @@
1
+ """Security manager module.
2
+
3
+ This module provides a high-level SecurityManager class that uses the other modules to:
4
+ - Normalize paths
5
+ - Safely join paths
6
+ - Validate that paths are within allowed directories
7
+ - Resolve symlinks securely with depth and loop checking
8
+ - Manage case differences on case-insensitive systems
9
+ """
10
+
11
+ import logging
12
+ import os
13
+ import tempfile
14
+ from contextlib import contextmanager
15
+ from pathlib import Path
16
+ from typing import Generator, List, Optional, Union
17
+
18
+ from .allowed_checker import is_path_in_allowed_dirs
19
+ from .case_manager import CaseManager
20
+ from .errors import (
21
+ DirectoryNotFoundError,
22
+ PathSecurityError,
23
+ SecurityErrorReasons,
24
+ )
25
+ from .normalization import normalize_path
26
+ from .symlink_resolver import _resolve_symlink
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class SecurityManager:
32
+ """Manages security for file access.
33
+
34
+ Validates all file access against a base directory and optional
35
+ allowed directories. Prevents unauthorized access and directory
36
+ traversal attacks.
37
+
38
+ The security model is based on:
39
+ 1. A base directory that serves as the root for all file operations
40
+ 2. A set of explicitly allowed directories that can be accessed outside the base directory
41
+ 3. Special handling for temporary directories that are always allowed
42
+ 4. Case-sensitive or case-insensitive path handling based on platform
43
+
44
+ Example:
45
+ >>> sm = SecurityManager("/base/dir")
46
+ >>> sm.add_allowed_directory("/tmp")
47
+ >>> sm.validate_path("/base/dir/file.txt") # OK
48
+ >>> sm.validate_path("/etc/passwd") # Raises PathSecurityError
49
+ """
50
+
51
+ MAX_SYMLINK_DEPTH = 16
52
+
53
+ def __init__(
54
+ self,
55
+ base_dir: Union[str, Path],
56
+ allowed_dirs: Optional[List[Union[str, Path]]] = None,
57
+ allow_temp_paths: bool = False,
58
+ max_symlink_depth: int = MAX_SYMLINK_DEPTH,
59
+ ):
60
+ """Initialize the SecurityManager.
61
+
62
+ Args:
63
+ base_dir: The root directory for file operations.
64
+ allowed_dirs: Additional directories allowed for access.
65
+ allow_temp_paths: Whether to allow temporary directory paths.
66
+ max_symlink_depth: Maximum depth for symlink resolution.
67
+
68
+ Raises:
69
+ DirectoryNotFoundError: If base_dir or any allowed directory doesn't exist.
70
+ """
71
+ # Normalize and verify base directory
72
+ self._base_dir = normalize_path(base_dir)
73
+ if not self._base_dir.is_dir():
74
+ raise DirectoryNotFoundError(
75
+ f"Base directory not found: {base_dir}",
76
+ path=str(base_dir),
77
+ )
78
+
79
+ # Initialize allowed directories with the base directory
80
+ self._allowed_dirs: List[Path] = [self._base_dir]
81
+ if allowed_dirs:
82
+ for d in allowed_dirs:
83
+ self.add_allowed_directory(d)
84
+
85
+ self._allow_temp_paths = allow_temp_paths
86
+ self._max_symlink_depth = max_symlink_depth
87
+ self._temp_dir = (
88
+ normalize_path(tempfile.gettempdir()) if allow_temp_paths else None
89
+ )
90
+
91
+ logger.debug(
92
+ "\n=== Initialized SecurityManager ===\n"
93
+ "Base dir: %s\n"
94
+ "Allowed dirs: %s\n"
95
+ "Allow temp: %s\n"
96
+ "Temp dir: %s\n"
97
+ "Max symlink depth: %d",
98
+ self._base_dir,
99
+ self._allowed_dirs,
100
+ self._allow_temp_paths,
101
+ self._temp_dir,
102
+ self._max_symlink_depth,
103
+ )
104
+
105
+ @property
106
+ def base_dir(self) -> Path:
107
+ """Return the base directory."""
108
+ return self._base_dir
109
+
110
+ @property
111
+ def allowed_dirs(self) -> List[Path]:
112
+ """Return the list of allowed directories."""
113
+ return self._allowed_dirs.copy()
114
+
115
+ def add_allowed_directory(self, directory: Union[str, Path]) -> None:
116
+ """Add a new directory to the allowed directories list.
117
+
118
+ Args:
119
+ directory: The directory to add.
120
+
121
+ Raises:
122
+ DirectoryNotFoundError: If the directory doesn't exist.
123
+ """
124
+ norm_dir = normalize_path(directory)
125
+ if not norm_dir.is_dir():
126
+ raise DirectoryNotFoundError(
127
+ f"Allowed directory not found: {directory}",
128
+ path=str(directory),
129
+ )
130
+ if norm_dir not in self._allowed_dirs:
131
+ self._allowed_dirs.append(norm_dir)
132
+
133
+ def is_temp_path(self, path: Union[str, Path]) -> bool:
134
+ """Check if a path is in the system's temporary directory.
135
+
136
+ Args:
137
+ path: The path to check.
138
+
139
+ Returns:
140
+ True if the path is a temporary path; False otherwise.
141
+
142
+ Raises:
143
+ PathSecurityError: If there's an error checking the path.
144
+ """
145
+ if not self._allow_temp_paths or not self._temp_dir:
146
+ return False
147
+
148
+ try:
149
+ # Use string-based comparison instead of resolving
150
+ norm_path = normalize_path(path)
151
+ temp_path_str = str(self._temp_dir)
152
+ norm_path_str = str(norm_path)
153
+ return norm_path_str.startswith(temp_path_str)
154
+ except Exception as e:
155
+ raise PathSecurityError(
156
+ f"Error checking temporary path: {e}",
157
+ path=str(path),
158
+ ) from e
159
+
160
+ def is_path_allowed(self, path: Union[str, Path]) -> bool:
161
+ """Check if a path is allowed based on security rules.
162
+
163
+ Args:
164
+ path: The path to check.
165
+
166
+ Returns:
167
+ True if the path is allowed; False otherwise.
168
+ """
169
+ try:
170
+ norm_path = normalize_path(path)
171
+ except PathSecurityError:
172
+ return False
173
+
174
+ # Check if the path is within one of the allowed directories
175
+ if is_path_in_allowed_dirs(norm_path, self._allowed_dirs):
176
+ return True
177
+
178
+ # Allow temp paths if configured
179
+ if self._allow_temp_paths and self.is_temp_path(norm_path):
180
+ return True
181
+
182
+ return False
183
+
184
+ def validate_path(self, path: Union[str, Path]) -> Path:
185
+ """Validate a path against security rules.
186
+
187
+ This method:
188
+ 1. Checks if it's a symlink first
189
+ 2. Normalizes the input
190
+ 3. Validates against security rules
191
+ 4. Checks existence (only after security validation)
192
+
193
+ Args:
194
+ path: The path to validate.
195
+
196
+ Returns:
197
+ A validated and resolved Path object.
198
+
199
+ Raises:
200
+ PathSecurityError: If the path fails security validation
201
+ FileNotFoundError: If the file doesn't exist (only checked after security validation)
202
+ """
203
+ # First normalize the path
204
+ norm_path = normalize_path(path)
205
+
206
+ # Handle symlinks first - delegate to symlink_resolver
207
+ if norm_path.is_symlink():
208
+ try:
209
+ return _resolve_symlink(
210
+ norm_path,
211
+ self._max_symlink_depth,
212
+ self._allowed_dirs,
213
+ )
214
+ except RuntimeError as e:
215
+ if "Symlink loop" in str(e):
216
+ raise PathSecurityError(
217
+ "Symlink security violation: loop detected",
218
+ path=str(path),
219
+ context={"reason": SecurityErrorReasons.SYMLINK_LOOP},
220
+ ) from e
221
+ raise PathSecurityError(
222
+ f"Symlink security violation: failed to resolve symlink - {e}",
223
+ path=str(path),
224
+ context={"reason": SecurityErrorReasons.SYMLINK_ERROR},
225
+ ) from e
226
+
227
+ # For non-symlinks, just check if the normalized path is allowed
228
+ if not self.is_path_allowed(norm_path):
229
+ logger.error(
230
+ "Security violation: Path %s is outside allowed directories",
231
+ path,
232
+ )
233
+ raise PathSecurityError(
234
+ (
235
+ f"Access denied: {os.path.basename(str(path))} is outside "
236
+ "base directory and not in allowed directories"
237
+ ),
238
+ path=str(path),
239
+ context={
240
+ "reason": SecurityErrorReasons.PATH_OUTSIDE_ALLOWED,
241
+ "base_dir": str(self._base_dir),
242
+ "allowed_dirs": [str(d) for d in self._allowed_dirs],
243
+ },
244
+ )
245
+
246
+ # Only check existence after security validation passes
247
+ if not norm_path.exists():
248
+ logger.debug("Path allowed but not found: %s", norm_path)
249
+ raise FileNotFoundError(
250
+ f"File not found: {os.path.basename(str(path))}"
251
+ )
252
+
253
+ return norm_path
254
+
255
+ def resolve_path(self, path: Union[str, Path]) -> Path:
256
+ """Resolve a path with security checks.
257
+
258
+ This method maintains backward compatibility by translating
259
+ internal security errors to standard filesystem errors where appropriate.
260
+
261
+ Args:
262
+ path: Path to resolve
263
+
264
+ Returns:
265
+ Resolved Path object
266
+
267
+ Raises:
268
+ FileNotFoundError: If path doesn't exist or is a broken symlink
269
+ PathSecurityError: For other security violations
270
+ """
271
+ try:
272
+ norm_path = normalize_path(path)
273
+
274
+ # Early return for allowed temp paths
275
+ if self._allow_temp_paths and self.is_temp_path(norm_path):
276
+ logger.debug("Allowing temp path: %s", norm_path)
277
+ if not norm_path.exists():
278
+ raise FileNotFoundError(f"File not found: {path}")
279
+ return norm_path
280
+
281
+ # Handle symlinks with security checks
282
+ if norm_path.is_symlink():
283
+ try:
284
+ return _resolve_symlink(
285
+ norm_path, self._max_symlink_depth, self._allowed_dirs
286
+ )
287
+ except PathSecurityError as e:
288
+ reason = e.context.get("reason")
289
+ # First check for loop errors (highest precedence)
290
+ if reason == SecurityErrorReasons.SYMLINK_LOOP:
291
+ raise # Propagate loop errors unchanged
292
+ # Then check for max depth errors
293
+ elif reason == SecurityErrorReasons.SYMLINK_MAX_DEPTH:
294
+ raise # Propagate max depth errors unchanged
295
+ # Finally handle broken links (lowest precedence)
296
+ elif reason == SecurityErrorReasons.SYMLINK_BROKEN:
297
+ msg = f"Broken symlink: {e.context['source']} -> {e.context['target']}"
298
+ logger.debug(msg)
299
+ raise FileNotFoundError(msg) from e
300
+ # Any other security errors propagate unchanged
301
+ raise
302
+
303
+ # For non-symlinks, check if the normalized path is allowed
304
+ if not self.is_path_allowed(norm_path):
305
+ logger.error(
306
+ "Security violation: Path %s is outside allowed directories",
307
+ path,
308
+ )
309
+ raise PathSecurityError(
310
+ f"Access denied: {os.path.basename(str(path))} is outside base directory",
311
+ path=str(path),
312
+ context={
313
+ "reason": SecurityErrorReasons.PATH_OUTSIDE_ALLOWED,
314
+ "base_dir": str(self._base_dir),
315
+ "allowed_dirs": [str(d) for d in self._allowed_dirs],
316
+ },
317
+ )
318
+
319
+ # Only check existence after security validation
320
+ if not norm_path.exists():
321
+ raise FileNotFoundError(f"File not found: {path}")
322
+
323
+ return norm_path
324
+
325
+ except OSError as e:
326
+ if isinstance(e, FileNotFoundError):
327
+ raise
328
+ logger.error("Error resolving path: %s - %s", path, e)
329
+ raise PathSecurityError(
330
+ f"Failed to resolve path: {e}",
331
+ path=str(path),
332
+ context={
333
+ "reason": SecurityErrorReasons.SYMLINK_ERROR,
334
+ "error": str(e),
335
+ },
336
+ ) from e
337
+
338
+ @contextmanager
339
+ def symlink_context(self) -> Generator[None, None, None]:
340
+ """Context manager for symlink resolution.
341
+
342
+ This context manager ensures that symlink resolution state is properly
343
+ cleaned up, even if an error occurs during resolution.
344
+
345
+ Example:
346
+ >>> with security_manager.symlink_context():
347
+ ... resolved = security_manager.resolve_path("/path/to/symlink")
348
+ """
349
+ try:
350
+ yield
351
+ finally:
352
+ # Clean up any case mappings that were created during symlink resolution
353
+ CaseManager.clear()