ostruct-cli 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ostruct/cli/base_errors.py +183 -0
- ostruct/cli/cli.py +830 -585
- ostruct/cli/click_options.py +338 -211
- ostruct/cli/errors.py +214 -227
- ostruct/cli/exit_codes.py +18 -0
- ostruct/cli/file_info.py +126 -69
- ostruct/cli/file_list.py +191 -72
- ostruct/cli/file_utils.py +132 -97
- ostruct/cli/path_utils.py +86 -77
- ostruct/cli/security/__init__.py +32 -0
- ostruct/cli/security/allowed_checker.py +55 -0
- ostruct/cli/security/base.py +46 -0
- ostruct/cli/security/case_manager.py +75 -0
- ostruct/cli/security/errors.py +164 -0
- ostruct/cli/security/normalization.py +161 -0
- ostruct/cli/security/safe_joiner.py +211 -0
- ostruct/cli/security/security_manager.py +366 -0
- ostruct/cli/security/symlink_resolver.py +483 -0
- ostruct/cli/security/types.py +108 -0
- ostruct/cli/security/windows_paths.py +404 -0
- ostruct/cli/serialization.py +25 -0
- ostruct/cli/template_filters.py +13 -8
- ostruct/cli/template_rendering.py +46 -22
- ostruct/cli/template_utils.py +12 -4
- ostruct/cli/template_validation.py +26 -8
- ostruct/cli/token_utils.py +43 -0
- ostruct/cli/validators.py +109 -0
- {ostruct_cli-0.3.0.dist-info → ostruct_cli-0.5.0.dist-info}/METADATA +64 -24
- ostruct_cli-0.5.0.dist-info/RECORD +42 -0
- {ostruct_cli-0.3.0.dist-info → ostruct_cli-0.5.0.dist-info}/WHEEL +1 -1
- ostruct/cli/security.py +0 -964
- ostruct/cli/security_types.py +0 -46
- ostruct_cli-0.3.0.dist-info/RECORD +0 -28
- {ostruct_cli-0.3.0.dist-info → ostruct_cli-0.5.0.dist-info}/LICENSE +0 -0
- {ostruct_cli-0.3.0.dist-info → ostruct_cli-0.5.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,46 @@
|
|
1
|
+
"""Base class for security-related errors."""
|
2
|
+
|
3
|
+
from typing import Any, Dict, Optional
|
4
|
+
|
5
|
+
from ostruct.cli.base_errors import CLIError
|
6
|
+
from ostruct.cli.exit_codes import ExitCode
|
7
|
+
|
8
|
+
|
9
|
+
class SecurityErrorBase(CLIError):
|
10
|
+
"""Base class for security-related errors."""
|
11
|
+
|
12
|
+
def __init__(
|
13
|
+
self,
|
14
|
+
message: str,
|
15
|
+
context: Optional[Dict[str, Any]] = None,
|
16
|
+
details: Optional[str] = None,
|
17
|
+
has_been_logged: bool = False,
|
18
|
+
) -> None:
|
19
|
+
"""Initialize security error.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
message: The error message.
|
23
|
+
context: Additional context for the error.
|
24
|
+
details: Detailed explanation of the error.
|
25
|
+
has_been_logged: Whether the error has been logged.
|
26
|
+
"""
|
27
|
+
if context is None:
|
28
|
+
context = {}
|
29
|
+
context["category"] = "security"
|
30
|
+
super().__init__(
|
31
|
+
message,
|
32
|
+
context=context,
|
33
|
+
exit_code=ExitCode.SECURITY_ERROR,
|
34
|
+
details=details,
|
35
|
+
)
|
36
|
+
self._has_been_logged = has_been_logged
|
37
|
+
|
38
|
+
@property
|
39
|
+
def has_been_logged(self) -> bool:
|
40
|
+
"""Whether this error has been logged."""
|
41
|
+
return self._has_been_logged
|
42
|
+
|
43
|
+
@has_been_logged.setter
|
44
|
+
def has_been_logged(self, value: bool) -> None:
|
45
|
+
"""Set whether this error has been logged."""
|
46
|
+
self._has_been_logged = value
|
@@ -0,0 +1,75 @@
|
|
1
|
+
"""Case management module.
|
2
|
+
|
3
|
+
This module provides a class for tracking and preserving the original case
|
4
|
+
of file paths on case-insensitive systems.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from pathlib import Path
|
8
|
+
from threading import Lock
|
9
|
+
from typing import Dict
|
10
|
+
|
11
|
+
|
12
|
+
class CaseManager:
|
13
|
+
"""Manages original case preservation for paths.
|
14
|
+
|
15
|
+
This class provides a thread-safe way to track original case preservation
|
16
|
+
without modifying Path objects. This is particularly important on
|
17
|
+
case-insensitive systems (macOS, Windows) where we normalize paths
|
18
|
+
to lowercase but want to preserve the original case for display.
|
19
|
+
|
20
|
+
Example:
|
21
|
+
>>> CaseManager.set_original_case(Path("/tmp/file.txt"), "/TMP/File.txt")
|
22
|
+
>>> CaseManager.get_original_case(Path("/tmp/file.txt"))
|
23
|
+
'/TMP/File.txt'
|
24
|
+
"""
|
25
|
+
|
26
|
+
_case_mapping: Dict[str, str] = {}
|
27
|
+
_lock = Lock()
|
28
|
+
|
29
|
+
@classmethod
|
30
|
+
def set_original_case(
|
31
|
+
cls, normalized_path: Path, original_case: str
|
32
|
+
) -> None:
|
33
|
+
"""Store the original case for a normalized path.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
normalized_path: The normalized (potentially lowercased) Path.
|
37
|
+
original_case: The original path string with its original case.
|
38
|
+
|
39
|
+
Raises:
|
40
|
+
TypeError: If normalized_path or original_case is None.
|
41
|
+
"""
|
42
|
+
if normalized_path is None:
|
43
|
+
raise TypeError("normalized_path cannot be None")
|
44
|
+
if original_case is None:
|
45
|
+
raise TypeError("original_case cannot be None")
|
46
|
+
|
47
|
+
with cls._lock:
|
48
|
+
cls._case_mapping[str(normalized_path)] = original_case
|
49
|
+
|
50
|
+
@classmethod
|
51
|
+
def get_original_case(cls, normalized_path: Path) -> str:
|
52
|
+
"""Retrieve the original case for a normalized path.
|
53
|
+
|
54
|
+
Args:
|
55
|
+
normalized_path: The normalized Path.
|
56
|
+
|
57
|
+
Returns:
|
58
|
+
The original case string if stored; otherwise the normalized path string.
|
59
|
+
|
60
|
+
Raises:
|
61
|
+
TypeError: If normalized_path is None.
|
62
|
+
"""
|
63
|
+
if normalized_path is None:
|
64
|
+
raise TypeError("normalized_path cannot be None")
|
65
|
+
|
66
|
+
with cls._lock:
|
67
|
+
return cls._case_mapping.get(
|
68
|
+
str(normalized_path), str(normalized_path)
|
69
|
+
)
|
70
|
+
|
71
|
+
@classmethod
|
72
|
+
def clear(cls) -> None:
|
73
|
+
"""Clear all stored case mappings."""
|
74
|
+
with cls._lock:
|
75
|
+
cls._case_mapping.clear()
|
@@ -0,0 +1,164 @@
|
|
1
|
+
"""Error definitions for the security package.
|
2
|
+
|
3
|
+
This module defines custom exceptions and error reason constants used throughout
|
4
|
+
the security modules.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from typing import Any, Dict, List, Optional
|
8
|
+
|
9
|
+
from .base import SecurityErrorBase
|
10
|
+
|
11
|
+
|
12
|
+
class PathSecurityError(SecurityErrorBase):
|
13
|
+
"""Security error for path-related issues."""
|
14
|
+
|
15
|
+
def __init__(
|
16
|
+
self,
|
17
|
+
message: str,
|
18
|
+
path: Optional[str] = None,
|
19
|
+
context: Optional[Dict[str, Any]] = None,
|
20
|
+
details: Optional[str] = None,
|
21
|
+
error_logged: bool = False,
|
22
|
+
wrapped: bool = False,
|
23
|
+
) -> None:
|
24
|
+
"""Initialize the error.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
message: The error message.
|
28
|
+
path: The path that caused the error.
|
29
|
+
context: Additional context for the error.
|
30
|
+
details: Detailed explanation of the error.
|
31
|
+
error_logged: Whether the error has been logged.
|
32
|
+
wrapped: Whether this is a wrapped error.
|
33
|
+
"""
|
34
|
+
if context is None:
|
35
|
+
context = {}
|
36
|
+
if path is not None:
|
37
|
+
context["path"] = path
|
38
|
+
if details is None:
|
39
|
+
details = "The specified path violates security constraints"
|
40
|
+
context["troubleshooting"] = [
|
41
|
+
"Check if the path is within allowed directories",
|
42
|
+
"Use --allowed-dir to specify additional allowed directories",
|
43
|
+
"Verify path permissions",
|
44
|
+
]
|
45
|
+
self._wrapped = wrapped
|
46
|
+
super().__init__(
|
47
|
+
message,
|
48
|
+
context=context,
|
49
|
+
details=details,
|
50
|
+
has_been_logged=error_logged,
|
51
|
+
)
|
52
|
+
|
53
|
+
@property
|
54
|
+
def error_logged(self) -> bool:
|
55
|
+
"""Alias for has_been_logged for backward compatibility."""
|
56
|
+
return self.has_been_logged
|
57
|
+
|
58
|
+
@property
|
59
|
+
def wrapped(self) -> bool:
|
60
|
+
"""Whether this is a wrapped error."""
|
61
|
+
return self._wrapped
|
62
|
+
|
63
|
+
@property
|
64
|
+
def details(self) -> str:
|
65
|
+
"""Get the detailed explanation of the error."""
|
66
|
+
return self.details
|
67
|
+
|
68
|
+
@classmethod
|
69
|
+
def from_expanded_paths(
|
70
|
+
cls,
|
71
|
+
original_path: str,
|
72
|
+
expanded_path: str,
|
73
|
+
base_dir: str,
|
74
|
+
allowed_dirs: List[str],
|
75
|
+
error_logged: bool = False,
|
76
|
+
) -> "PathSecurityError":
|
77
|
+
"""Create an error from expanded paths.
|
78
|
+
|
79
|
+
Args:
|
80
|
+
original_path: The original path.
|
81
|
+
expanded_path: The expanded path.
|
82
|
+
base_dir: The base directory.
|
83
|
+
allowed_dirs: List of allowed directories.
|
84
|
+
error_logged: Whether the error has been logged.
|
85
|
+
|
86
|
+
Returns:
|
87
|
+
A new PathSecurityError instance.
|
88
|
+
"""
|
89
|
+
context = {
|
90
|
+
"original_path": original_path,
|
91
|
+
"expanded_path": expanded_path,
|
92
|
+
"base_dir": base_dir,
|
93
|
+
"allowed_dirs": allowed_dirs,
|
94
|
+
"reason": SecurityErrorReasons.PATH_OUTSIDE_ALLOWED,
|
95
|
+
"troubleshooting": [
|
96
|
+
"Check if the path is within allowed directories",
|
97
|
+
f"Ensure the path is within base directory: {base_dir}",
|
98
|
+
f"Current allowed directories: {', '.join(allowed_dirs)}",
|
99
|
+
],
|
100
|
+
}
|
101
|
+
return cls(
|
102
|
+
"Access denied",
|
103
|
+
context=context,
|
104
|
+
details="Path is outside allowed directories",
|
105
|
+
error_logged=error_logged,
|
106
|
+
)
|
107
|
+
|
108
|
+
@classmethod
|
109
|
+
def wrap_error(
|
110
|
+
cls, message: str, original_error: Exception
|
111
|
+
) -> "PathSecurityError":
|
112
|
+
"""Wrap another error with a security error.
|
113
|
+
|
114
|
+
Args:
|
115
|
+
message: The security error message.
|
116
|
+
original_error: The original error to wrap.
|
117
|
+
|
118
|
+
Returns:
|
119
|
+
A new PathSecurityError instance.
|
120
|
+
"""
|
121
|
+
context = {
|
122
|
+
"wrapped_error": original_error.__class__.__name__,
|
123
|
+
"original_message": str(original_error),
|
124
|
+
"wrapped": True,
|
125
|
+
"troubleshooting": [
|
126
|
+
"Check if the path is within allowed directories",
|
127
|
+
"Verify path permissions",
|
128
|
+
"Check if the original error has been resolved",
|
129
|
+
],
|
130
|
+
}
|
131
|
+
if hasattr(original_error, "context"):
|
132
|
+
context.update(original_error.context)
|
133
|
+
return cls(
|
134
|
+
message,
|
135
|
+
context=context,
|
136
|
+
wrapped=True,
|
137
|
+
error_logged=getattr(original_error, "error_logged", False),
|
138
|
+
)
|
139
|
+
|
140
|
+
|
141
|
+
class DirectoryNotFoundError(PathSecurityError):
|
142
|
+
"""Raised when a directory that is expected to exist does not."""
|
143
|
+
|
144
|
+
|
145
|
+
class SecurityErrorReasons:
|
146
|
+
"""Constants for common security error reasons."""
|
147
|
+
|
148
|
+
# Path validation errors
|
149
|
+
PATH_TRAVERSAL = "path_traversal"
|
150
|
+
UNSAFE_UNICODE = "unsafe_unicode"
|
151
|
+
NORMALIZATION_ERROR = "normalization_error"
|
152
|
+
CASE_MISMATCH = "case_mismatch"
|
153
|
+
|
154
|
+
# Symlink-related errors
|
155
|
+
SYMLINK_LOOP = "symlink_loop"
|
156
|
+
SYMLINK_ERROR = "symlink_error"
|
157
|
+
SYMLINK_TARGET_NOT_ALLOWED = "symlink_target_not_allowed"
|
158
|
+
SYMLINK_MAX_DEPTH = "symlink_max_depth"
|
159
|
+
SYMLINK_BROKEN = "symlink_broken"
|
160
|
+
|
161
|
+
# Directory access errors
|
162
|
+
PATH_NOT_IN_BASE = "path_not_in_base"
|
163
|
+
PATH_OUTSIDE_ALLOWED = "path_outside_allowed"
|
164
|
+
TEMP_PATHS_NOT_ALLOWED = "temp_paths_not_allowed"
|
@@ -0,0 +1,161 @@
|
|
1
|
+
"""Path normalization module.
|
2
|
+
|
3
|
+
This module provides functions to normalize file paths by:
|
4
|
+
- Performing Unicode normalization (NFKC)
|
5
|
+
- Normalizing path separators and redundant parts
|
6
|
+
- Converting relative paths to absolute paths
|
7
|
+
- Validating Unicode safety
|
8
|
+
|
9
|
+
Security Design Choices:
|
10
|
+
1. Unicode Normalization:
|
11
|
+
- Uses NFKC form for maximum compatibility
|
12
|
+
- Blocks known unsafe Unicode characters
|
13
|
+
- Basic protection against homograph attacks
|
14
|
+
- Does not handle all possible confusable sequences
|
15
|
+
|
16
|
+
2. Path Separators:
|
17
|
+
- Normalizes to forward slashes
|
18
|
+
- Collapses multiple slashes
|
19
|
+
- Converts backslashes on all platforms
|
20
|
+
- Note: This breaks Windows UNC and device paths
|
21
|
+
|
22
|
+
3. Parent Directory References:
|
23
|
+
- Allows ".." components in raw input
|
24
|
+
- Security checks done after path resolution
|
25
|
+
- Directory traversal prevented by final path validation
|
26
|
+
|
27
|
+
4. Absolute Paths:
|
28
|
+
- Converts relative to absolute using CWD
|
29
|
+
- No environment variable expansion
|
30
|
+
- No home directory (~) expansion
|
31
|
+
- Thread-safety warning for CWD operations
|
32
|
+
|
33
|
+
Known Limitations:
|
34
|
+
1. Windows-Specific:
|
35
|
+
- UNC paths (r"\\\\server\\share") break when normalized
|
36
|
+
- Device paths (r"\\\\?\\", r"\\\\.") become invalid
|
37
|
+
- Drive-relative paths may resolve incorrectly
|
38
|
+
- Reserved names (CON, NUL, etc.) not handled
|
39
|
+
- ADS (:stream) not detected
|
40
|
+
- Case sensitivity not handled (delegated to CaseManager)
|
41
|
+
|
42
|
+
2. Unicode Handling:
|
43
|
+
- Some confusable characters may pass checks
|
44
|
+
- Zero-width characters not fully covered
|
45
|
+
- Advanced homograph attacks possible
|
46
|
+
- Duplicate entries in safety pattern need review
|
47
|
+
|
48
|
+
3. Threading:
|
49
|
+
- CWD operations not thread-safe
|
50
|
+
- Race conditions possible during path resolution
|
51
|
+
"""
|
52
|
+
|
53
|
+
import os
|
54
|
+
import re
|
55
|
+
import unicodedata
|
56
|
+
from pathlib import Path
|
57
|
+
from typing import Union
|
58
|
+
|
59
|
+
from .errors import PathSecurityError, SecurityErrorReasons
|
60
|
+
|
61
|
+
# Patterns for path normalization and validation
|
62
|
+
_UNICODE_SAFETY_PATTERN = re.compile(
|
63
|
+
r"[\u0000-\u001F\u007F-\u009F\u2028-\u2029\u0085]" # Control chars and line separators
|
64
|
+
r"|\.{2,}" # Directory traversal attempts
|
65
|
+
r"|[\u2024\u2025\uFE52\u2024\u2025\u2026\uFE19\uFE30\uFE52\uFF0E\uFF61]" # Alternative dots and separators
|
66
|
+
)
|
67
|
+
_BACKSLASH_PATTERN = re.compile(r"\\")
|
68
|
+
_MULTIPLE_SLASH_PATTERN = re.compile(r"/+")
|
69
|
+
|
70
|
+
|
71
|
+
def normalize_path(path: Union[str, Path]) -> Path:
|
72
|
+
"""Normalize a path string with security checks.
|
73
|
+
|
74
|
+
This function:
|
75
|
+
1. Converts to Unicode NFKC form
|
76
|
+
2. Checks for unsafe Unicode characters
|
77
|
+
3. Normalizes path separators
|
78
|
+
4. Uses os.path.normpath to collapse redundant separators and dots
|
79
|
+
5. Converts to absolute path if needed
|
80
|
+
6. Returns a pathlib.Path object
|
81
|
+
|
82
|
+
Security Features:
|
83
|
+
- Unicode NFKC normalization
|
84
|
+
- Blocks unsafe Unicode characters
|
85
|
+
- Normalizes path separators
|
86
|
+
- Converts to absolute paths
|
87
|
+
|
88
|
+
Design Choices:
|
89
|
+
- No environment variable expansion
|
90
|
+
- No home directory (~) expansion
|
91
|
+
- No symlink resolution (handled separately)
|
92
|
+
- Case sensitivity handled by CaseManager
|
93
|
+
- Thread-safety warning: CWD operations are not atomic
|
94
|
+
|
95
|
+
Args:
|
96
|
+
path: A string or Path object representing a file path.
|
97
|
+
|
98
|
+
Returns:
|
99
|
+
A pathlib.Path object for the normalized absolute path.
|
100
|
+
|
101
|
+
Raises:
|
102
|
+
PathSecurityError: If the path contains unsafe Unicode characters.
|
103
|
+
TypeError: If path is None.
|
104
|
+
|
105
|
+
Note:
|
106
|
+
This function has known limitations with Windows paths:
|
107
|
+
- UNC paths are not properly handled
|
108
|
+
- Device paths are not supported
|
109
|
+
- Drive-relative paths may resolve incorrectly
|
110
|
+
- Reserved names are not checked
|
111
|
+
- ADS is not detected
|
112
|
+
"""
|
113
|
+
if path is None:
|
114
|
+
raise TypeError("Path cannot be None")
|
115
|
+
|
116
|
+
path_str = str(path)
|
117
|
+
|
118
|
+
# Unicode normalization
|
119
|
+
try:
|
120
|
+
normalized = unicodedata.normalize("NFKC", path_str)
|
121
|
+
except Exception as e:
|
122
|
+
raise PathSecurityError(
|
123
|
+
"Unicode normalization failed",
|
124
|
+
path=path_str,
|
125
|
+
context={
|
126
|
+
"reason": SecurityErrorReasons.UNSAFE_UNICODE,
|
127
|
+
"error": str(e),
|
128
|
+
},
|
129
|
+
) from e
|
130
|
+
|
131
|
+
# Check for unsafe characters and directory traversal
|
132
|
+
if match := _UNICODE_SAFETY_PATTERN.search(normalized):
|
133
|
+
matched_text = match.group(0)
|
134
|
+
if ".." in matched_text:
|
135
|
+
raise PathSecurityError(
|
136
|
+
"Directory traversal not allowed",
|
137
|
+
path=path_str,
|
138
|
+
context={
|
139
|
+
"reason": SecurityErrorReasons.PATH_TRAVERSAL,
|
140
|
+
"matched": matched_text,
|
141
|
+
},
|
142
|
+
)
|
143
|
+
else:
|
144
|
+
raise PathSecurityError(
|
145
|
+
"Path contains unsafe characters",
|
146
|
+
path=path_str,
|
147
|
+
context={
|
148
|
+
"reason": SecurityErrorReasons.UNSAFE_UNICODE,
|
149
|
+
"matched": matched_text,
|
150
|
+
},
|
151
|
+
)
|
152
|
+
|
153
|
+
# Normalize path separators
|
154
|
+
normalized = _BACKSLASH_PATTERN.sub("/", normalized)
|
155
|
+
normalized = _MULTIPLE_SLASH_PATTERN.sub("/", normalized)
|
156
|
+
|
157
|
+
# Convert to absolute path if needed
|
158
|
+
if not os.path.isabs(normalized):
|
159
|
+
normalized = os.path.abspath(normalized)
|
160
|
+
|
161
|
+
return Path(normalized)
|
@@ -0,0 +1,211 @@
|
|
1
|
+
"""Safe path joining module.
|
2
|
+
|
3
|
+
This module provides a safe_join function that is inspired by Werkzeug's safe_join.
|
4
|
+
It safely joins untrusted path components to a trusted base directory while avoiding
|
5
|
+
directory traversal issues.
|
6
|
+
|
7
|
+
Security Design Choices:
|
8
|
+
1. Parent Directory (..) References:
|
9
|
+
- Explicitly blocked for security, even in "safe" contexts
|
10
|
+
- This is a deliberate choice to prevent directory traversal
|
11
|
+
- No exceptions are made, even for legitimate uses
|
12
|
+
|
13
|
+
2. Environment Variables:
|
14
|
+
- No expansion of environment variables (%VAR%, $HOME)
|
15
|
+
- Must be handled explicitly at a higher level if needed
|
16
|
+
- Prevents unexpected path resolution
|
17
|
+
|
18
|
+
3. Home Directory:
|
19
|
+
- No expansion of ~ (tilde)
|
20
|
+
- Must be expanded explicitly before passing to this function
|
21
|
+
- Prevents unexpected user directory access
|
22
|
+
|
23
|
+
4. Symlinks:
|
24
|
+
- Not resolved in this module
|
25
|
+
- Handled separately by the resolve_symlink function
|
26
|
+
- Allows for explicit symlink security policies
|
27
|
+
|
28
|
+
5. Case Sensitivity:
|
29
|
+
- Basic normalization only
|
30
|
+
- Full case handling delegated to CaseManager
|
31
|
+
- Ensures consistent cross-platform behavior
|
32
|
+
|
33
|
+
Known Limitations:
|
34
|
+
1. Windows-Specific:
|
35
|
+
- UNC paths (r"\\\\server\\share") are handled but must be complete
|
36
|
+
- Device paths (r"\\\\?\\", r"\\\\.") are rejected for security
|
37
|
+
- Drive-relative paths (C:folder) must be absolute
|
38
|
+
- Reserved names (CON, NUL, etc.) are rejected
|
39
|
+
- Alternate Data Streams (:stream) are rejected
|
40
|
+
|
41
|
+
2. Unicode:
|
42
|
+
- Basic NFKC normalization only
|
43
|
+
- Some confusable characters may not be detected
|
44
|
+
- Advanced homograph attack prevention requires additional checks
|
45
|
+
|
46
|
+
3. Threading:
|
47
|
+
- Current working directory calls are not thread-safe
|
48
|
+
- Race conditions possible if CWD changes during execution
|
49
|
+
"""
|
50
|
+
|
51
|
+
import os
|
52
|
+
import posixpath
|
53
|
+
import re
|
54
|
+
from typing import Optional
|
55
|
+
|
56
|
+
# Compute alternative separators (if any) that differ from "/"
|
57
|
+
_os_alt_seps = list(
|
58
|
+
{sep for sep in [os.path.sep, os.path.altsep] if sep and sep != "/"}
|
59
|
+
)
|
60
|
+
|
61
|
+
# Windows-specific patterns
|
62
|
+
_WINDOWS_DEVICE_PATH = re.compile(r"^\\\\[?.]\\") # \\?\ and \\.\ paths
|
63
|
+
_WINDOWS_DRIVE_RELATIVE = re.compile(
|
64
|
+
r"^[A-Za-z]:(?![/\\])"
|
65
|
+
) # C:folder (no slash)
|
66
|
+
_WINDOWS_RESERVED_NAMES = re.compile(
|
67
|
+
r"^(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])(?:\.|$)", re.IGNORECASE
|
68
|
+
)
|
69
|
+
_WINDOWS_UNC = re.compile(r"^\\\\[^?./\\]") # UNC but not device paths
|
70
|
+
_WINDOWS_ADS = re.compile(r":.+$") # Alternate Data Streams
|
71
|
+
|
72
|
+
|
73
|
+
def safe_join(directory: str, *pathnames: str) -> Optional[str]:
|
74
|
+
"""Safely join zero or more untrusted path components to a trusted base directory.
|
75
|
+
|
76
|
+
This function is inspired by Werkzeug's safe_join and ensures that the
|
77
|
+
resulting path is always within the base directory, preventing directory
|
78
|
+
traversal attacks.
|
79
|
+
|
80
|
+
Security Features:
|
81
|
+
- Rejects absolute path components
|
82
|
+
- Blocks all parent directory references (..)
|
83
|
+
- Normalizes path separators to forward slashes
|
84
|
+
- Performs final containment check against base directory
|
85
|
+
- Handles Windows-specific security concerns:
|
86
|
+
* Rejects device paths (r"\\\\?\\", r"\\\\.")
|
87
|
+
* Rejects relative drive paths (C:folder)
|
88
|
+
* Rejects reserved names (CON, PRN, etc.)
|
89
|
+
* Rejects Alternate Data Streams
|
90
|
+
* Safely handles UNC paths
|
91
|
+
|
92
|
+
Design Choices:
|
93
|
+
- No environment variable expansion
|
94
|
+
- No home directory (~) expansion
|
95
|
+
- No symlink resolution (handled separately)
|
96
|
+
- Case sensitivity handled by CaseManager
|
97
|
+
- Thread-safety warning: CWD operations are not atomic
|
98
|
+
|
99
|
+
Args:
|
100
|
+
directory: The trusted base directory.
|
101
|
+
pathnames: Untrusted path components relative to the base directory.
|
102
|
+
|
103
|
+
Returns:
|
104
|
+
A safe path as a string if successful; otherwise, None.
|
105
|
+
|
106
|
+
Example:
|
107
|
+
>>> safe_join("/base", "subdir", "file.txt")
|
108
|
+
'/base/subdir/file.txt'
|
109
|
+
>>> safe_join("/base", "../etc/passwd")
|
110
|
+
None
|
111
|
+
"""
|
112
|
+
if not directory and not pathnames:
|
113
|
+
return None
|
114
|
+
|
115
|
+
if not directory:
|
116
|
+
directory = "."
|
117
|
+
|
118
|
+
# Handle None values in pathnames
|
119
|
+
if any(p is None for p in pathnames):
|
120
|
+
return None
|
121
|
+
|
122
|
+
# Convert and normalize base directory
|
123
|
+
directory = str(directory)
|
124
|
+
directory = directory.replace("\\", "/")
|
125
|
+
base_dir = posixpath.normpath(directory)
|
126
|
+
|
127
|
+
# Windows-specific base directory checks
|
128
|
+
if os.name == "nt":
|
129
|
+
# Check for device paths
|
130
|
+
if _WINDOWS_DEVICE_PATH.search(base_dir):
|
131
|
+
return None
|
132
|
+
# Check for relative drive paths
|
133
|
+
if _WINDOWS_DRIVE_RELATIVE.search(base_dir):
|
134
|
+
return None
|
135
|
+
# Check for reserved names
|
136
|
+
if _WINDOWS_RESERVED_NAMES.search(base_dir):
|
137
|
+
return None
|
138
|
+
# Check for ADS
|
139
|
+
if _WINDOWS_ADS.search(base_dir):
|
140
|
+
return None
|
141
|
+
# Handle UNC paths - must be complete
|
142
|
+
if _WINDOWS_UNC.search(base_dir):
|
143
|
+
if base_dir.count("/") < 3: # Needs server and share
|
144
|
+
return None
|
145
|
+
|
146
|
+
# Process and validate each component
|
147
|
+
normalized_parts = []
|
148
|
+
for filename in pathnames:
|
149
|
+
if filename == "":
|
150
|
+
continue
|
151
|
+
|
152
|
+
# Convert to string and normalize separators
|
153
|
+
filename = str(filename)
|
154
|
+
filename = filename.replace("\\", "/")
|
155
|
+
|
156
|
+
# Windows-specific component checks
|
157
|
+
if os.name == "nt":
|
158
|
+
# Check for device paths
|
159
|
+
if _WINDOWS_DEVICE_PATH.search(filename):
|
160
|
+
return None
|
161
|
+
# Check for relative drive paths
|
162
|
+
if _WINDOWS_DRIVE_RELATIVE.search(filename):
|
163
|
+
return None
|
164
|
+
# Check for reserved names
|
165
|
+
if _WINDOWS_RESERVED_NAMES.search(filename):
|
166
|
+
return None
|
167
|
+
# Check for ADS
|
168
|
+
if _WINDOWS_ADS.search(filename):
|
169
|
+
return None
|
170
|
+
# Reject UNC in components
|
171
|
+
if _WINDOWS_UNC.search(filename):
|
172
|
+
return None
|
173
|
+
|
174
|
+
# Reject absolute paths and parent directory traversal
|
175
|
+
if (
|
176
|
+
filename.startswith("/")
|
177
|
+
or filename == ".."
|
178
|
+
or filename.startswith("../")
|
179
|
+
or filename.endswith("/..")
|
180
|
+
or "/../" in filename
|
181
|
+
):
|
182
|
+
return None
|
183
|
+
|
184
|
+
# Normalize the component
|
185
|
+
normalized = posixpath.normpath(filename)
|
186
|
+
if normalized == ".":
|
187
|
+
continue
|
188
|
+
normalized_parts.append(normalized)
|
189
|
+
|
190
|
+
# Join all parts
|
191
|
+
if not normalized_parts:
|
192
|
+
result = base_dir
|
193
|
+
else:
|
194
|
+
result = posixpath.join(base_dir, *normalized_parts)
|
195
|
+
|
196
|
+
# Final security check on the complete path
|
197
|
+
normalized_result = posixpath.normpath(result)
|
198
|
+
if not normalized_result.startswith(base_dir):
|
199
|
+
return None
|
200
|
+
|
201
|
+
# Final Windows-specific checks on complete path
|
202
|
+
if os.name == "nt":
|
203
|
+
# Check for ADS in final path
|
204
|
+
if _WINDOWS_ADS.search(normalized_result):
|
205
|
+
return None
|
206
|
+
# Check for reserved names in any component
|
207
|
+
path_parts = normalized_result.split("/")
|
208
|
+
if any(_WINDOWS_RESERVED_NAMES.search(part) for part in path_parts):
|
209
|
+
return None
|
210
|
+
|
211
|
+
return normalized_result
|