ostruct-cli 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ostruct/cli/cli.py +84 -118
- ostruct/cli/click_options.py +54 -45
- ostruct/cli/errors.py +63 -170
- ostruct/cli/file_info.py +98 -57
- ostruct/cli/file_list.py +189 -64
- ostruct/cli/file_utils.py +93 -66
- ostruct/cli/path_utils.py +58 -77
- ostruct/cli/security/__init__.py +32 -0
- ostruct/cli/security/allowed_checker.py +47 -0
- ostruct/cli/security/case_manager.py +75 -0
- ostruct/cli/security/errors.py +184 -0
- ostruct/cli/security/normalization.py +161 -0
- ostruct/cli/security/safe_joiner.py +211 -0
- ostruct/cli/security/security_manager.py +353 -0
- ostruct/cli/security/symlink_resolver.py +483 -0
- ostruct/cli/security/types.py +108 -0
- ostruct/cli/security/windows_paths.py +404 -0
- ostruct/cli/template_filters.py +8 -5
- {ostruct_cli-0.3.0.dist-info → ostruct_cli-0.4.0.dist-info}/METADATA +6 -5
- ostruct_cli-0.4.0.dist-info/RECORD +36 -0
- ostruct/cli/security.py +0 -964
- ostruct/cli/security_types.py +0 -46
- ostruct_cli-0.3.0.dist-info/RECORD +0 -28
- {ostruct_cli-0.3.0.dist-info → ostruct_cli-0.4.0.dist-info}/LICENSE +0 -0
- {ostruct_cli-0.3.0.dist-info → ostruct_cli-0.4.0.dist-info}/WHEEL +0 -0
- {ostruct_cli-0.3.0.dist-info → ostruct_cli-0.4.0.dist-info}/entry_points.txt +0 -0
ostruct/cli/path_utils.py
CHANGED
@@ -1,17 +1,16 @@
|
|
1
1
|
"""Path validation utilities for the CLI."""
|
2
2
|
|
3
|
-
import os
|
4
3
|
from pathlib import Path
|
5
4
|
from typing import Optional, Tuple
|
6
5
|
|
7
|
-
from .errors import (
|
6
|
+
from ostruct.cli.errors import (
|
8
7
|
DirectoryNotFoundError,
|
9
8
|
FileNotFoundError,
|
10
|
-
PathSecurityError,
|
11
9
|
VariableNameError,
|
12
10
|
VariableValueError,
|
13
11
|
)
|
14
|
-
from .security import
|
12
|
+
from ostruct.cli.security.errors import PathSecurityError, SecurityErrorReasons
|
13
|
+
from ostruct.cli.security.security_manager import SecurityManager
|
15
14
|
|
16
15
|
|
17
16
|
def validate_path_mapping(
|
@@ -45,79 +44,61 @@ def validate_path_mapping(
|
|
45
44
|
>>> validate_path_mapping("data=config/", is_dir=True) # Validates directory
|
46
45
|
('data', 'config/')
|
47
46
|
"""
|
47
|
+
# Split into name and path parts
|
48
48
|
try:
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
except OSError as e:
|
73
|
-
raise OSError(f"Failed to resolve path: {e}")
|
74
|
-
|
75
|
-
# Check if path exists
|
76
|
-
if not resolved_path.exists():
|
77
|
-
if is_dir:
|
78
|
-
raise DirectoryNotFoundError(f"Directory not found: {path!r}")
|
79
|
-
else:
|
80
|
-
raise FileNotFoundError(f"File not found: {path!r}")
|
81
|
-
|
82
|
-
# Check if path is correct type
|
83
|
-
if is_dir and not resolved_path.is_dir():
|
84
|
-
raise DirectoryNotFoundError(f"Path is not a directory: {path!r}")
|
85
|
-
elif not is_dir and not resolved_path.is_file():
|
86
|
-
raise FileNotFoundError(f"Path is not a file: {path!r}")
|
87
|
-
|
88
|
-
# Check if path is accessible
|
49
|
+
name, path_str = mapping.split("=", 1)
|
50
|
+
except ValueError:
|
51
|
+
raise ValueError(f"Invalid mapping format (missing '='): {mapping}")
|
52
|
+
|
53
|
+
# Validate name
|
54
|
+
name = name.strip()
|
55
|
+
if not name:
|
56
|
+
raise VariableNameError("Variable name cannot be empty")
|
57
|
+
if not name.isidentifier():
|
58
|
+
raise VariableNameError(f"Invalid variable name: {name}")
|
59
|
+
|
60
|
+
# Normalize path
|
61
|
+
path_str = path_str.strip()
|
62
|
+
if not path_str:
|
63
|
+
raise VariableValueError("Path cannot be empty")
|
64
|
+
|
65
|
+
# Create a Path object
|
66
|
+
path = Path(path_str)
|
67
|
+
if not path.is_absolute() and base_dir:
|
68
|
+
path = Path(base_dir) / path
|
69
|
+
|
70
|
+
# Validate path with security manager if provided
|
71
|
+
if security_manager:
|
89
72
|
try:
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
if e.errno == 13: # Permission denied
|
73
|
+
path = security_manager.validate_path(path)
|
74
|
+
except PathSecurityError as e:
|
75
|
+
if (
|
76
|
+
e.context.get("reason")
|
77
|
+
== SecurityErrorReasons.PATH_OUTSIDE_ALLOWED
|
78
|
+
):
|
97
79
|
raise PathSecurityError(
|
98
|
-
f"
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
raise
|
80
|
+
f"Path '{path}' is outside the base directory and not in allowed directories",
|
81
|
+
path=str(path),
|
82
|
+
context=e.context,
|
83
|
+
) from e
|
84
|
+
raise PathSecurityError(
|
85
|
+
f"Path validation failed: {e}",
|
86
|
+
path=str(path),
|
87
|
+
context=e.context,
|
88
|
+
) from e
|
89
|
+
|
90
|
+
# Check path existence and type
|
91
|
+
if not path.exists():
|
92
|
+
if is_dir:
|
93
|
+
raise DirectoryNotFoundError(f"Directory not found: {path}")
|
94
|
+
raise FileNotFoundError(f"File not found: {path}")
|
95
|
+
|
96
|
+
# Check path type
|
97
|
+
if is_dir and not path.is_dir():
|
98
|
+
raise DirectoryNotFoundError(
|
99
|
+
f"Path exists but is not a directory: {path}"
|
100
|
+
)
|
101
|
+
elif not is_dir and not path.is_file():
|
102
|
+
raise FileNotFoundError(f"Path exists but is not a file: {path}")
|
103
|
+
|
104
|
+
return name, str(path)
|
@@ -0,0 +1,32 @@
|
|
1
|
+
"""Security package for file access management.
|
2
|
+
|
3
|
+
This package provides a comprehensive set of security features for file access:
|
4
|
+
- Path normalization and validation
|
5
|
+
- Safe path joining
|
6
|
+
- Directory traversal prevention
|
7
|
+
- Symlink resolution with security checks
|
8
|
+
- Case sensitivity handling
|
9
|
+
- Temporary path management
|
10
|
+
"""
|
11
|
+
|
12
|
+
from .allowed_checker import is_path_in_allowed_dirs
|
13
|
+
from .case_manager import CaseManager
|
14
|
+
from .errors import (
|
15
|
+
DirectoryNotFoundError,
|
16
|
+
PathSecurityError,
|
17
|
+
SecurityErrorReasons,
|
18
|
+
)
|
19
|
+
from .normalization import normalize_path
|
20
|
+
from .safe_joiner import safe_join
|
21
|
+
from .security_manager import SecurityManager
|
22
|
+
|
23
|
+
__all__ = [
|
24
|
+
"normalize_path",
|
25
|
+
"safe_join",
|
26
|
+
"is_path_in_allowed_dirs",
|
27
|
+
"CaseManager",
|
28
|
+
"PathSecurityError",
|
29
|
+
"DirectoryNotFoundError",
|
30
|
+
"SecurityErrorReasons",
|
31
|
+
"SecurityManager",
|
32
|
+
]
|
@@ -0,0 +1,47 @@
|
|
1
|
+
"""Allowed directory checker module.
|
2
|
+
|
3
|
+
This module provides functionality to verify that a given path is within
|
4
|
+
one of a set of allowed directories.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from pathlib import Path
|
8
|
+
from typing import List, Union
|
9
|
+
|
10
|
+
from .normalization import normalize_path
|
11
|
+
|
12
|
+
|
13
|
+
def is_path_in_allowed_dirs(
|
14
|
+
path: Union[str, Path], allowed_dirs: List[Path]
|
15
|
+
) -> bool:
|
16
|
+
"""Check if a given path is inside any of the allowed directories.
|
17
|
+
|
18
|
+
This function normalizes both the input path and allowed directories
|
19
|
+
before comparison to ensure consistent results across platforms.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
path: The path to check.
|
23
|
+
allowed_dirs: A list of allowed directory paths.
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
True if path is within one of the allowed directories; False otherwise.
|
27
|
+
|
28
|
+
Example:
|
29
|
+
>>> allowed = [Path("/base"), Path("/tmp")]
|
30
|
+
>>> is_path_in_allowed_dirs("/base/file.txt", allowed)
|
31
|
+
True
|
32
|
+
>>> is_path_in_allowed_dirs("/etc/passwd", allowed)
|
33
|
+
False
|
34
|
+
"""
|
35
|
+
norm_path = normalize_path(path)
|
36
|
+
norm_allowed = [normalize_path(d) for d in allowed_dirs]
|
37
|
+
|
38
|
+
for allowed in norm_allowed:
|
39
|
+
try:
|
40
|
+
# If path.relative_to(allowed) does not raise an error,
|
41
|
+
# then path is within allowed.
|
42
|
+
norm_path.relative_to(allowed)
|
43
|
+
return True
|
44
|
+
except ValueError:
|
45
|
+
continue
|
46
|
+
|
47
|
+
return False
|
@@ -0,0 +1,75 @@
|
|
1
|
+
"""Case management module.
|
2
|
+
|
3
|
+
This module provides a class for tracking and preserving the original case
|
4
|
+
of file paths on case-insensitive systems.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from pathlib import Path
|
8
|
+
from threading import Lock
|
9
|
+
from typing import Dict
|
10
|
+
|
11
|
+
|
12
|
+
class CaseManager:
|
13
|
+
"""Manages original case preservation for paths.
|
14
|
+
|
15
|
+
This class provides a thread-safe way to track original case preservation
|
16
|
+
without modifying Path objects. This is particularly important on
|
17
|
+
case-insensitive systems (macOS, Windows) where we normalize paths
|
18
|
+
to lowercase but want to preserve the original case for display.
|
19
|
+
|
20
|
+
Example:
|
21
|
+
>>> CaseManager.set_original_case(Path("/tmp/file.txt"), "/TMP/File.txt")
|
22
|
+
>>> CaseManager.get_original_case(Path("/tmp/file.txt"))
|
23
|
+
'/TMP/File.txt'
|
24
|
+
"""
|
25
|
+
|
26
|
+
_case_mapping: Dict[str, str] = {}
|
27
|
+
_lock = Lock()
|
28
|
+
|
29
|
+
@classmethod
|
30
|
+
def set_original_case(
|
31
|
+
cls, normalized_path: Path, original_case: str
|
32
|
+
) -> None:
|
33
|
+
"""Store the original case for a normalized path.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
normalized_path: The normalized (potentially lowercased) Path.
|
37
|
+
original_case: The original path string with its original case.
|
38
|
+
|
39
|
+
Raises:
|
40
|
+
TypeError: If normalized_path or original_case is None.
|
41
|
+
"""
|
42
|
+
if normalized_path is None:
|
43
|
+
raise TypeError("normalized_path cannot be None")
|
44
|
+
if original_case is None:
|
45
|
+
raise TypeError("original_case cannot be None")
|
46
|
+
|
47
|
+
with cls._lock:
|
48
|
+
cls._case_mapping[str(normalized_path)] = original_case
|
49
|
+
|
50
|
+
@classmethod
|
51
|
+
def get_original_case(cls, normalized_path: Path) -> str:
|
52
|
+
"""Retrieve the original case for a normalized path.
|
53
|
+
|
54
|
+
Args:
|
55
|
+
normalized_path: The normalized Path.
|
56
|
+
|
57
|
+
Returns:
|
58
|
+
The original case string if stored; otherwise the normalized path string.
|
59
|
+
|
60
|
+
Raises:
|
61
|
+
TypeError: If normalized_path is None.
|
62
|
+
"""
|
63
|
+
if normalized_path is None:
|
64
|
+
raise TypeError("normalized_path cannot be None")
|
65
|
+
|
66
|
+
with cls._lock:
|
67
|
+
return cls._case_mapping.get(
|
68
|
+
str(normalized_path), str(normalized_path)
|
69
|
+
)
|
70
|
+
|
71
|
+
@classmethod
|
72
|
+
def clear(cls) -> None:
|
73
|
+
"""Clear all stored case mappings."""
|
74
|
+
with cls._lock:
|
75
|
+
cls._case_mapping.clear()
|
@@ -0,0 +1,184 @@
|
|
1
|
+
"""Error definitions for the security package.
|
2
|
+
|
3
|
+
This module defines custom exceptions and error reason constants used throughout
|
4
|
+
the security modules.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from typing import Any, Dict, List, Optional
|
8
|
+
|
9
|
+
|
10
|
+
class PathSecurityError(Exception):
|
11
|
+
"""Base exception for security-related errors.
|
12
|
+
|
13
|
+
This class provides rich error information for security-related issues,
|
14
|
+
including context and error wrapping capabilities.
|
15
|
+
"""
|
16
|
+
|
17
|
+
def __init__(
|
18
|
+
self,
|
19
|
+
message: str,
|
20
|
+
path: str = "",
|
21
|
+
context: Optional[Dict[str, Any]] = None,
|
22
|
+
error_logged: bool = False,
|
23
|
+
) -> None:
|
24
|
+
"""Initialize the error.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
message: The error message.
|
28
|
+
path: The path that caused the error.
|
29
|
+
context: Additional context about the error.
|
30
|
+
error_logged: Whether this error has already been logged.
|
31
|
+
"""
|
32
|
+
super().__init__(message)
|
33
|
+
self.path = path
|
34
|
+
self.context = context or {}
|
35
|
+
self._error_logged = error_logged
|
36
|
+
self._wrapped = False
|
37
|
+
|
38
|
+
def __str__(self) -> str:
|
39
|
+
"""Format the error message with context if available."""
|
40
|
+
msg = super().__str__()
|
41
|
+
|
42
|
+
# Add expanded path information if available
|
43
|
+
if self.context:
|
44
|
+
if (
|
45
|
+
"original_path" in self.context
|
46
|
+
and "expanded_path" in self.context
|
47
|
+
):
|
48
|
+
msg = (
|
49
|
+
f"{msg}\n"
|
50
|
+
f"Original path: {self.context['original_path']}\n"
|
51
|
+
f"Expanded path: {self.context['expanded_path']}"
|
52
|
+
)
|
53
|
+
if "base_dir" in self.context:
|
54
|
+
msg = f"{msg}\nBase directory: {self.context['base_dir']}"
|
55
|
+
if "allowed_dirs" in self.context:
|
56
|
+
msg = f"{msg}\nAllowed directories: {self.context['allowed_dirs']!r}"
|
57
|
+
|
58
|
+
return msg
|
59
|
+
|
60
|
+
@property
|
61
|
+
def has_been_logged(self) -> bool:
|
62
|
+
"""Whether this error has been logged."""
|
63
|
+
return self._error_logged
|
64
|
+
|
65
|
+
@has_been_logged.setter
|
66
|
+
def has_been_logged(self, value: bool) -> None:
|
67
|
+
"""Set whether this error has been logged."""
|
68
|
+
self._error_logged = value
|
69
|
+
|
70
|
+
@property
|
71
|
+
def wrapped(self) -> bool:
|
72
|
+
"""Whether this error is wrapping another error."""
|
73
|
+
return self._wrapped
|
74
|
+
|
75
|
+
def format_with_context(
|
76
|
+
self,
|
77
|
+
original_path: str,
|
78
|
+
expanded_path: str,
|
79
|
+
base_dir: str,
|
80
|
+
allowed_dirs: List[str],
|
81
|
+
) -> str:
|
82
|
+
"""Format the error message with additional context.
|
83
|
+
|
84
|
+
Args:
|
85
|
+
original_path: The original path that caused the error
|
86
|
+
expanded_path: The expanded/absolute path
|
87
|
+
base_dir: The base directory for security checks
|
88
|
+
allowed_dirs: List of allowed directories
|
89
|
+
|
90
|
+
Returns:
|
91
|
+
A formatted error message with context
|
92
|
+
"""
|
93
|
+
lines = [
|
94
|
+
str(self),
|
95
|
+
f"Original path: {original_path}",
|
96
|
+
f"Expanded path: {expanded_path}",
|
97
|
+
f"Base directory: {base_dir}",
|
98
|
+
f"Allowed directories: {allowed_dirs}",
|
99
|
+
"Use --allowed-dir to add more allowed directories",
|
100
|
+
]
|
101
|
+
return "\n".join(lines)
|
102
|
+
|
103
|
+
@classmethod
|
104
|
+
def wrap_error(
|
105
|
+
cls, message: str, original: "PathSecurityError"
|
106
|
+
) -> "PathSecurityError":
|
107
|
+
"""Wrap an existing error with additional context.
|
108
|
+
|
109
|
+
Args:
|
110
|
+
message: The new error message
|
111
|
+
original: The original error to wrap
|
112
|
+
|
113
|
+
Returns:
|
114
|
+
A new PathSecurityError instance wrapping the original
|
115
|
+
"""
|
116
|
+
wrapped = cls(
|
117
|
+
f"{message}: {str(original)}",
|
118
|
+
path=original.path,
|
119
|
+
context=original.context,
|
120
|
+
error_logged=original.has_been_logged,
|
121
|
+
)
|
122
|
+
wrapped._wrapped = True
|
123
|
+
return wrapped
|
124
|
+
|
125
|
+
@classmethod
|
126
|
+
def from_expanded_paths(
|
127
|
+
cls,
|
128
|
+
original_path: str,
|
129
|
+
expanded_path: str,
|
130
|
+
base_dir: str,
|
131
|
+
allowed_dirs: List[str],
|
132
|
+
error_logged: bool = False,
|
133
|
+
) -> "PathSecurityError":
|
134
|
+
"""Create an error instance with expanded path information.
|
135
|
+
|
136
|
+
Args:
|
137
|
+
original_path: The original path that caused the error
|
138
|
+
expanded_path: The expanded/absolute path
|
139
|
+
base_dir: The base directory for security checks
|
140
|
+
allowed_dirs: List of allowed directories
|
141
|
+
error_logged: Whether this error has already been logged
|
142
|
+
|
143
|
+
Returns:
|
144
|
+
A new PathSecurityError instance with expanded path context
|
145
|
+
"""
|
146
|
+
message = f"Path '{original_path}' is outside the base directory and not in allowed directories"
|
147
|
+
context = {
|
148
|
+
"original_path": original_path,
|
149
|
+
"expanded_path": expanded_path,
|
150
|
+
"base_dir": base_dir,
|
151
|
+
"allowed_dirs": allowed_dirs,
|
152
|
+
}
|
153
|
+
return cls(
|
154
|
+
message,
|
155
|
+
path=original_path,
|
156
|
+
context=context,
|
157
|
+
error_logged=error_logged,
|
158
|
+
)
|
159
|
+
|
160
|
+
|
161
|
+
class DirectoryNotFoundError(PathSecurityError):
|
162
|
+
"""Raised when a directory that is expected to exist does not."""
|
163
|
+
|
164
|
+
|
165
|
+
class SecurityErrorReasons:
|
166
|
+
"""Constants for common security error reasons."""
|
167
|
+
|
168
|
+
# Path validation errors
|
169
|
+
PATH_TRAVERSAL = "path_traversal"
|
170
|
+
UNSAFE_UNICODE = "unsafe_unicode"
|
171
|
+
NORMALIZATION_ERROR = "normalization_error"
|
172
|
+
CASE_MISMATCH = "case_mismatch"
|
173
|
+
|
174
|
+
# Symlink-related errors
|
175
|
+
SYMLINK_LOOP = "symlink_loop"
|
176
|
+
SYMLINK_ERROR = "symlink_error"
|
177
|
+
SYMLINK_TARGET_NOT_ALLOWED = "symlink_target_not_allowed"
|
178
|
+
SYMLINK_MAX_DEPTH = "symlink_max_depth"
|
179
|
+
SYMLINK_BROKEN = "symlink_broken"
|
180
|
+
|
181
|
+
# Directory access errors
|
182
|
+
PATH_NOT_IN_BASE = "path_not_in_base"
|
183
|
+
PATH_OUTSIDE_ALLOWED = "path_outside_allowed"
|
184
|
+
TEMP_PATHS_NOT_ALLOWED = "temp_paths_not_allowed"
|
@@ -0,0 +1,161 @@
|
|
1
|
+
"""Path normalization module.
|
2
|
+
|
3
|
+
This module provides functions to normalize file paths by:
|
4
|
+
- Performing Unicode normalization (NFKC)
|
5
|
+
- Normalizing path separators and redundant parts
|
6
|
+
- Converting relative paths to absolute paths
|
7
|
+
- Validating Unicode safety
|
8
|
+
|
9
|
+
Security Design Choices:
|
10
|
+
1. Unicode Normalization:
|
11
|
+
- Uses NFKC form for maximum compatibility
|
12
|
+
- Blocks known unsafe Unicode characters
|
13
|
+
- Basic protection against homograph attacks
|
14
|
+
- Does not handle all possible confusable sequences
|
15
|
+
|
16
|
+
2. Path Separators:
|
17
|
+
- Normalizes to forward slashes
|
18
|
+
- Collapses multiple slashes
|
19
|
+
- Converts backslashes on all platforms
|
20
|
+
- Note: This breaks Windows UNC and device paths
|
21
|
+
|
22
|
+
3. Parent Directory References:
|
23
|
+
- Allows ".." components in raw input
|
24
|
+
- Security checks done after path resolution
|
25
|
+
- Directory traversal prevented by final path validation
|
26
|
+
|
27
|
+
4. Absolute Paths:
|
28
|
+
- Converts relative to absolute using CWD
|
29
|
+
- No environment variable expansion
|
30
|
+
- No home directory (~) expansion
|
31
|
+
- Thread-safety warning for CWD operations
|
32
|
+
|
33
|
+
Known Limitations:
|
34
|
+
1. Windows-Specific:
|
35
|
+
- UNC paths (r"\\\\server\\share") break when normalized
|
36
|
+
- Device paths (r"\\\\?\\", r"\\\\.") become invalid
|
37
|
+
- Drive-relative paths may resolve incorrectly
|
38
|
+
- Reserved names (CON, NUL, etc.) not handled
|
39
|
+
- ADS (:stream) not detected
|
40
|
+
- Case sensitivity not handled (delegated to CaseManager)
|
41
|
+
|
42
|
+
2. Unicode Handling:
|
43
|
+
- Some confusable characters may pass checks
|
44
|
+
- Zero-width characters not fully covered
|
45
|
+
- Advanced homograph attacks possible
|
46
|
+
- Duplicate entries in safety pattern need review
|
47
|
+
|
48
|
+
3. Threading:
|
49
|
+
- CWD operations not thread-safe
|
50
|
+
- Race conditions possible during path resolution
|
51
|
+
"""
|
52
|
+
|
53
|
+
import os
|
54
|
+
import re
|
55
|
+
import unicodedata
|
56
|
+
from pathlib import Path
|
57
|
+
from typing import Union
|
58
|
+
|
59
|
+
from .errors import PathSecurityError, SecurityErrorReasons
|
60
|
+
|
61
|
+
# Patterns for path normalization and validation
|
62
|
+
_UNICODE_SAFETY_PATTERN = re.compile(
|
63
|
+
r"[\u0000-\u001F\u007F-\u009F\u2028-\u2029\u0085]" # Control chars and line separators
|
64
|
+
r"|\.{2,}" # Directory traversal attempts
|
65
|
+
r"|[\u2024\u2025\uFE52\u2024\u2025\u2026\uFE19\uFE30\uFE52\uFF0E\uFF61]" # Alternative dots and separators
|
66
|
+
)
|
67
|
+
_BACKSLASH_PATTERN = re.compile(r"\\")
|
68
|
+
_MULTIPLE_SLASH_PATTERN = re.compile(r"/+")
|
69
|
+
|
70
|
+
|
71
|
+
def normalize_path(path: Union[str, Path]) -> Path:
|
72
|
+
"""Normalize a path string with security checks.
|
73
|
+
|
74
|
+
This function:
|
75
|
+
1. Converts to Unicode NFKC form
|
76
|
+
2. Checks for unsafe Unicode characters
|
77
|
+
3. Normalizes path separators
|
78
|
+
4. Uses os.path.normpath to collapse redundant separators and dots
|
79
|
+
5. Converts to absolute path if needed
|
80
|
+
6. Returns a pathlib.Path object
|
81
|
+
|
82
|
+
Security Features:
|
83
|
+
- Unicode NFKC normalization
|
84
|
+
- Blocks unsafe Unicode characters
|
85
|
+
- Normalizes path separators
|
86
|
+
- Converts to absolute paths
|
87
|
+
|
88
|
+
Design Choices:
|
89
|
+
- No environment variable expansion
|
90
|
+
- No home directory (~) expansion
|
91
|
+
- No symlink resolution (handled separately)
|
92
|
+
- Case sensitivity handled by CaseManager
|
93
|
+
- Thread-safety warning: CWD operations are not atomic
|
94
|
+
|
95
|
+
Args:
|
96
|
+
path: A string or Path object representing a file path.
|
97
|
+
|
98
|
+
Returns:
|
99
|
+
A pathlib.Path object for the normalized absolute path.
|
100
|
+
|
101
|
+
Raises:
|
102
|
+
PathSecurityError: If the path contains unsafe Unicode characters.
|
103
|
+
TypeError: If path is None.
|
104
|
+
|
105
|
+
Note:
|
106
|
+
This function has known limitations with Windows paths:
|
107
|
+
- UNC paths are not properly handled
|
108
|
+
- Device paths are not supported
|
109
|
+
- Drive-relative paths may resolve incorrectly
|
110
|
+
- Reserved names are not checked
|
111
|
+
- ADS is not detected
|
112
|
+
"""
|
113
|
+
if path is None:
|
114
|
+
raise TypeError("Path cannot be None")
|
115
|
+
|
116
|
+
path_str = str(path)
|
117
|
+
|
118
|
+
# Unicode normalization
|
119
|
+
try:
|
120
|
+
normalized = unicodedata.normalize("NFKC", path_str)
|
121
|
+
except Exception as e:
|
122
|
+
raise PathSecurityError(
|
123
|
+
"Unicode normalization failed",
|
124
|
+
path=path_str,
|
125
|
+
context={
|
126
|
+
"reason": SecurityErrorReasons.UNSAFE_UNICODE,
|
127
|
+
"error": str(e),
|
128
|
+
},
|
129
|
+
) from e
|
130
|
+
|
131
|
+
# Check for unsafe characters and directory traversal
|
132
|
+
if match := _UNICODE_SAFETY_PATTERN.search(normalized):
|
133
|
+
matched_text = match.group(0)
|
134
|
+
if ".." in matched_text:
|
135
|
+
raise PathSecurityError(
|
136
|
+
"Directory traversal not allowed",
|
137
|
+
path=path_str,
|
138
|
+
context={
|
139
|
+
"reason": SecurityErrorReasons.PATH_TRAVERSAL,
|
140
|
+
"matched": matched_text,
|
141
|
+
},
|
142
|
+
)
|
143
|
+
else:
|
144
|
+
raise PathSecurityError(
|
145
|
+
"Path contains unsafe characters",
|
146
|
+
path=path_str,
|
147
|
+
context={
|
148
|
+
"reason": SecurityErrorReasons.UNSAFE_UNICODE,
|
149
|
+
"matched": matched_text,
|
150
|
+
},
|
151
|
+
)
|
152
|
+
|
153
|
+
# Normalize path separators
|
154
|
+
normalized = _BACKSLASH_PATTERN.sub("/", normalized)
|
155
|
+
normalized = _MULTIPLE_SLASH_PATTERN.sub("/", normalized)
|
156
|
+
|
157
|
+
# Convert to absolute path if needed
|
158
|
+
if not os.path.isabs(normalized):
|
159
|
+
normalized = os.path.abspath(normalized)
|
160
|
+
|
161
|
+
return Path(normalized)
|