jentic-openapi-common 1.0.0a30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jentic/apitools/openapi/common/path_security.py +176 -0
- jentic/apitools/openapi/common/py.typed +0 -0
- jentic/apitools/openapi/common/subproc.py +145 -0
- jentic/apitools/openapi/common/uri.py +341 -0
- jentic/apitools/openapi/common/version_detection.py +223 -0
- jentic_openapi_common-1.0.0a30.dist-info/METADATA +322 -0
- jentic_openapi_common-1.0.0a30.dist-info/RECORD +10 -0
- jentic_openapi_common-1.0.0a30.dist-info/WHEEL +4 -0
- jentic_openapi_common-1.0.0a30.dist-info/licenses/LICENSE +202 -0
- jentic_openapi_common-1.0.0a30.dist-info/licenses/NOTICE +4 -0
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""Path security utilities for safe filesystem access."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Literal, overload
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"PathSecurityError",
|
|
9
|
+
"PathTraversalError",
|
|
10
|
+
"InvalidExtensionError",
|
|
11
|
+
"SymlinkSecurityError",
|
|
12
|
+
"validate_path",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class PathSecurityError(Exception):
|
|
17
|
+
"""Base exception for path security violations."""
|
|
18
|
+
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class PathTraversalError(PathSecurityError):
|
|
23
|
+
"""Raised when a path attempts to escape the allowed base directory."""
|
|
24
|
+
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class InvalidExtensionError(PathSecurityError):
|
|
29
|
+
"""Raised when a file has a disallowed extension."""
|
|
30
|
+
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class SymlinkSecurityError(PathSecurityError):
|
|
35
|
+
"""Raised when a path contains symlinks when not allowed or symlink escapes boundary."""
|
|
36
|
+
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@overload
|
|
41
|
+
def validate_path(
|
|
42
|
+
path: str | Path,
|
|
43
|
+
*,
|
|
44
|
+
allowed_base: str | Path | None = None,
|
|
45
|
+
allowed_extensions: tuple[str, ...] | None = None,
|
|
46
|
+
resolve_symlinks: bool = True,
|
|
47
|
+
as_string: Literal[True] = True,
|
|
48
|
+
) -> str: ...
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@overload
|
|
52
|
+
def validate_path(
|
|
53
|
+
path: str | Path,
|
|
54
|
+
*,
|
|
55
|
+
allowed_base: str | Path | None = None,
|
|
56
|
+
allowed_extensions: tuple[str, ...] | None = None,
|
|
57
|
+
resolve_symlinks: bool = True,
|
|
58
|
+
as_string: Literal[False],
|
|
59
|
+
) -> Path: ...
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def validate_path(
|
|
63
|
+
path: str | Path,
|
|
64
|
+
*,
|
|
65
|
+
allowed_base: str | Path | None = None,
|
|
66
|
+
allowed_extensions: tuple[str, ...] | None = None,
|
|
67
|
+
resolve_symlinks: bool = True,
|
|
68
|
+
as_string: bool = True,
|
|
69
|
+
) -> str | Path:
|
|
70
|
+
"""
|
|
71
|
+
Validate and canonicalize a filesystem path with security checks.
|
|
72
|
+
|
|
73
|
+
This function provides defense-in-depth security for filesystem access by:
|
|
74
|
+
1. Converting to absolute path and resolving `.` and `..` components
|
|
75
|
+
2. Optionally resolving symlinks and checking they don't escape boundaries
|
|
76
|
+
3. Enforcing boundary restrictions (path must be within allowed_base)
|
|
77
|
+
4. Validating file extensions against a whitelist
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
path: The filesystem path to validate (string or Path object)
|
|
81
|
+
allowed_base: Optional base directory that path must be within.
|
|
82
|
+
If None, no boundary checking is performed.
|
|
83
|
+
allowed_extensions: Optional tuple of allowed file extensions (e.g., ('.yaml', '.json')).
|
|
84
|
+
Extensions are case-sensitive. If None, no extension checking is performed.
|
|
85
|
+
resolve_symlinks: If True (default), resolve symlinks using Path.resolve().
|
|
86
|
+
If False, use Path.absolute() to preserve symlinks.
|
|
87
|
+
as_string: If True (default), return str. If False, return Path object.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Canonicalized path (str by default, or Path if as_string=False) that has passed all security checks.
|
|
91
|
+
|
|
92
|
+
Raises:
|
|
93
|
+
PathTraversalError: If the path attempts to escape the allowed_base directory
|
|
94
|
+
InvalidExtensionError: If the file extension is not in allowed_extensions
|
|
95
|
+
SymlinkSecurityError: If symlink resolution reveals a security issue
|
|
96
|
+
|
|
97
|
+
Examples:
|
|
98
|
+
>>> # Basic validation with boundary enforcement (returns str by default)
|
|
99
|
+
>>> validate_path("/var/app/data/file.yaml", allowed_base="/var/app")
|
|
100
|
+
'/var/app/data/file.yaml'
|
|
101
|
+
|
|
102
|
+
>>> # Return Path object when needed
|
|
103
|
+
>>> validate_path("/var/app/data/file.yaml", allowed_base="/var/app", as_string=False)
|
|
104
|
+
Path('/var/app/data/file.yaml')
|
|
105
|
+
|
|
106
|
+
>>> # Prevent directory traversal
|
|
107
|
+
>>> validate_path("/var/app/../etc/passwd", allowed_base="/var/app")
|
|
108
|
+
PathTraversalError: Path '/etc/passwd' is outside allowed base '/var/app'
|
|
109
|
+
|
|
110
|
+
>>> # Extension validation
|
|
111
|
+
>>> validate_path("file.txt", allowed_extensions=('.yaml', '.json'))
|
|
112
|
+
InvalidExtensionError: Path 'file.txt' has disallowed extension '.txt'
|
|
113
|
+
"""
|
|
114
|
+
if not path:
|
|
115
|
+
raise PathSecurityError("Path cannot be empty or None")
|
|
116
|
+
|
|
117
|
+
# Convert to Path object
|
|
118
|
+
path_obj = Path(path)
|
|
119
|
+
|
|
120
|
+
# Canonicalize path (resolve . and ..)
|
|
121
|
+
if resolve_symlinks:
|
|
122
|
+
# Fully resolve including symlinks
|
|
123
|
+
try:
|
|
124
|
+
canonical_path = path_obj.resolve(strict=False)
|
|
125
|
+
except (OSError, RuntimeError) as e:
|
|
126
|
+
raise PathSecurityError(f"Failed to resolve path '{path}': {e}") from e
|
|
127
|
+
else:
|
|
128
|
+
# Convert to absolute but preserve symlinks
|
|
129
|
+
canonical_path = path_obj.absolute()
|
|
130
|
+
|
|
131
|
+
# Boundary enforcement
|
|
132
|
+
if allowed_base is not None:
|
|
133
|
+
allowed_base_path = Path(allowed_base)
|
|
134
|
+
if resolve_symlinks:
|
|
135
|
+
try:
|
|
136
|
+
canonical_base = allowed_base_path.resolve(strict=False)
|
|
137
|
+
except (OSError, RuntimeError) as e:
|
|
138
|
+
raise PathSecurityError(
|
|
139
|
+
f"Failed to resolve allowed_base '{allowed_base}': {e}"
|
|
140
|
+
) from e
|
|
141
|
+
else:
|
|
142
|
+
canonical_base = allowed_base_path.absolute()
|
|
143
|
+
|
|
144
|
+
# Check if canonical_path is within canonical_base
|
|
145
|
+
try:
|
|
146
|
+
canonical_path.relative_to(canonical_base)
|
|
147
|
+
except ValueError:
|
|
148
|
+
raise PathTraversalError(
|
|
149
|
+
f"Path '{canonical_path}' is outside allowed base '{canonical_base}'"
|
|
150
|
+
) from None
|
|
151
|
+
|
|
152
|
+
# Additional check: if resolve_symlinks is True, verify that no symlink in the path
|
|
153
|
+
# escapes the boundary. This is already handled by resolve() above, but we add
|
|
154
|
+
# an explicit check for symlinks that might have been followed
|
|
155
|
+
if resolve_symlinks and path_obj.is_symlink():
|
|
156
|
+
# If the original path was a symlink, verify the resolved target is still in bounds
|
|
157
|
+
try:
|
|
158
|
+
canonical_path.relative_to(canonical_base)
|
|
159
|
+
except ValueError:
|
|
160
|
+
raise SymlinkSecurityError(
|
|
161
|
+
f"Symlink '{path}' resolves to '{canonical_path}' which is outside allowed base '{canonical_base}'"
|
|
162
|
+
) from None
|
|
163
|
+
|
|
164
|
+
# Extension validation
|
|
165
|
+
if allowed_extensions is not None:
|
|
166
|
+
if not canonical_path.suffix:
|
|
167
|
+
raise InvalidExtensionError(
|
|
168
|
+
f"Path '{canonical_path}' has no file extension. Allowed extensions: {allowed_extensions}"
|
|
169
|
+
)
|
|
170
|
+
if canonical_path.suffix not in allowed_extensions:
|
|
171
|
+
raise InvalidExtensionError(
|
|
172
|
+
f"Path '{canonical_path}' has disallowed extension '{canonical_path.suffix}'. "
|
|
173
|
+
f"Allowed extensions: {allowed_extensions}"
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
return str(canonical_path) if as_string else canonical_path
|
|
File without changes
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""Subprocess execution utilities for OpenAPI tools."""
|
|
2
|
+
|
|
3
|
+
import subprocess
|
|
4
|
+
from collections.abc import Sequence
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import IO, Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
__all__ = ["run_subprocess", "SubprocessExecutionResult", "SubprocessExecutionError"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class SubprocessExecutionResult:
|
|
14
|
+
"""Returned by a subprocess."""
|
|
15
|
+
|
|
16
|
+
returncode: int
|
|
17
|
+
stdout: str = ""
|
|
18
|
+
stderr: str = ""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
returncode: int,
|
|
23
|
+
stdout: str | None = None,
|
|
24
|
+
stderr: str | None = None,
|
|
25
|
+
):
|
|
26
|
+
self.returncode = returncode
|
|
27
|
+
self.stdout = stdout if isinstance(stdout, str) else ""
|
|
28
|
+
self.stderr = stderr if isinstance(stderr, str) else ""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class SubprocessExecutionError(RuntimeError):
|
|
32
|
+
"""Raised when a subprocess exits with non-zero return code."""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
cmd: Sequence[str],
|
|
37
|
+
returncode: int,
|
|
38
|
+
stdout: str | None = None,
|
|
39
|
+
stderr: str | None = None,
|
|
40
|
+
):
|
|
41
|
+
self.cmd = list(cmd)
|
|
42
|
+
self.returncode = returncode
|
|
43
|
+
self.stdout = stdout if isinstance(stdout, str) else ""
|
|
44
|
+
self.stderr = stderr if isinstance(stderr, str) else ""
|
|
45
|
+
message = (
|
|
46
|
+
f"Command {self.cmd!r} failed with exit code {self.returncode}\n"
|
|
47
|
+
f"--- stdout ---\n{self.stdout}\n"
|
|
48
|
+
f"--- stderr ---\n{self.stderr}"
|
|
49
|
+
)
|
|
50
|
+
super().__init__(message)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def run_subprocess(
|
|
54
|
+
cmd: Sequence[str],
|
|
55
|
+
*,
|
|
56
|
+
fail_on_error: bool = False,
|
|
57
|
+
timeout: float | None = None,
|
|
58
|
+
encoding: str = "utf-8",
|
|
59
|
+
errors: str = "strict",
|
|
60
|
+
cwd: str | None = None,
|
|
61
|
+
stdout: int | IO[Any] | None = None,
|
|
62
|
+
stderr: int | IO[Any] | None = None,
|
|
63
|
+
) -> SubprocessExecutionResult:
|
|
64
|
+
"""
|
|
65
|
+
Run a subprocess command and return (stdout, stderr) as text.
|
|
66
|
+
Raises SubprocessExecutionError if the command fails.
|
|
67
|
+
|
|
68
|
+
Parameters
|
|
69
|
+
----------
|
|
70
|
+
cmd : sequence of str
|
|
71
|
+
The command and its arguments.
|
|
72
|
+
fail_on_error : bool
|
|
73
|
+
If True, raises SubprocessExecutionError for non-zero return codes.
|
|
74
|
+
timeout : float | None
|
|
75
|
+
Seconds before timing out.
|
|
76
|
+
encoding : str
|
|
77
|
+
Passed to subprocess.run so stdout/stderr are decoded as text.
|
|
78
|
+
errors : str
|
|
79
|
+
Error handler for text decoding.
|
|
80
|
+
cwd : str | None
|
|
81
|
+
Working directory for the subprocess.
|
|
82
|
+
stdout : int | IO[Any] | None
|
|
83
|
+
Optional stdout destination. Can be subprocess.PIPE (default), subprocess.DEVNULL,
|
|
84
|
+
an open file object, or None. When redirected to a file, result.stdout will be empty.
|
|
85
|
+
stderr : int | IO[Any] | None
|
|
86
|
+
Optional stderr destination. Can be subprocess.PIPE (default), subprocess.DEVNULL,
|
|
87
|
+
an open file object, or None. When redirected to a file, result.stderr will be empty.
|
|
88
|
+
|
|
89
|
+
Returns
|
|
90
|
+
-------
|
|
91
|
+
(stdout, stderr, returncode): SubprocessExecutionResult
|
|
92
|
+
Note: If stdout/stderr are redirected to a file, the corresponding result fields
|
|
93
|
+
will be empty strings.
|
|
94
|
+
"""
|
|
95
|
+
try:
|
|
96
|
+
# If both stdout and stderr are None, use capture_output for simplicity
|
|
97
|
+
if stdout is None and stderr is None:
|
|
98
|
+
completed_process = subprocess.run(
|
|
99
|
+
cmd,
|
|
100
|
+
check=False,
|
|
101
|
+
capture_output=True,
|
|
102
|
+
text=True,
|
|
103
|
+
shell=False,
|
|
104
|
+
encoding=encoding,
|
|
105
|
+
errors=errors,
|
|
106
|
+
timeout=timeout,
|
|
107
|
+
cwd=cwd,
|
|
108
|
+
)
|
|
109
|
+
else:
|
|
110
|
+
# Use explicit stdout/stderr with defaults to PIPE if not specified
|
|
111
|
+
completed_process = subprocess.run(
|
|
112
|
+
cmd,
|
|
113
|
+
check=False,
|
|
114
|
+
stdout=stdout if stdout is not None else subprocess.PIPE,
|
|
115
|
+
stderr=stderr if stderr is not None else subprocess.PIPE,
|
|
116
|
+
text=True,
|
|
117
|
+
shell=False,
|
|
118
|
+
encoding=encoding,
|
|
119
|
+
errors=errors,
|
|
120
|
+
timeout=timeout,
|
|
121
|
+
cwd=cwd,
|
|
122
|
+
)
|
|
123
|
+
except subprocess.TimeoutExpired as e:
|
|
124
|
+
timeout_stdout = (
|
|
125
|
+
e.stdout.decode(encoding, errors) if isinstance(e.stdout, bytes) else e.stdout
|
|
126
|
+
)
|
|
127
|
+
timeout_stderr = (
|
|
128
|
+
e.stderr.decode(encoding, errors) if isinstance(e.stderr, bytes) else e.stderr
|
|
129
|
+
)
|
|
130
|
+
raise SubprocessExecutionError(cmd, -1, timeout_stdout, timeout_stderr) from e
|
|
131
|
+
except OSError as e: # e.g., executable not found, permission denied
|
|
132
|
+
raise SubprocessExecutionError(cmd, -1, None, str(e)) from e
|
|
133
|
+
|
|
134
|
+
if completed_process.returncode != 0 and fail_on_error:
|
|
135
|
+
raise SubprocessExecutionError(
|
|
136
|
+
cmd,
|
|
137
|
+
completed_process.returncode,
|
|
138
|
+
completed_process.stdout,
|
|
139
|
+
completed_process.stderr,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# At this point CompletedProcess stdout/stderr are str due to text=True + encoding
|
|
143
|
+
return SubprocessExecutionResult(
|
|
144
|
+
completed_process.returncode, completed_process.stdout, completed_process.stderr
|
|
145
|
+
)
|
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from urllib.parse import urljoin, urlparse, urlsplit, urlunsplit
|
|
5
|
+
from urllib.request import url2pathname
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"URIResolutionError",
|
|
10
|
+
"is_uri_like",
|
|
11
|
+
"is_http_https_url",
|
|
12
|
+
"is_file_uri",
|
|
13
|
+
"is_scheme_relative_uri",
|
|
14
|
+
"is_absolute_uri",
|
|
15
|
+
"is_fragment_only_uri",
|
|
16
|
+
"is_path",
|
|
17
|
+
"resolve_to_absolute",
|
|
18
|
+
"file_uri_to_path",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
_WINDOWS_DRIVE_RE = re.compile(r"^[A-Za-z]:[\\/]")
|
|
23
|
+
_WINDOWS_UNC_RE = re.compile(r"^(?:\\\\|//)[^\\/]+[\\/][^\\/]+")
|
|
24
|
+
|
|
25
|
+
# Matches:
|
|
26
|
+
# - http://... or https://...
|
|
27
|
+
# - file://...
|
|
28
|
+
# - POSIX absolute: /path or just "/"
|
|
29
|
+
# - Windows UNC: \\server\share\...
|
|
30
|
+
# - Windows root-relative: \path\to (current drive root)
|
|
31
|
+
# - Windows drive-absolute: C:\path\to or C:/path/to
|
|
32
|
+
# - Relative paths: ./path, ../path, .\path, ..\path, or plain relative paths
|
|
33
|
+
_URI_LIKE_RE = re.compile(
|
|
34
|
+
r"""^(?:
|
|
35
|
+
https?://[^\r\n]+ |
|
|
36
|
+
file://[^\r\n]+ |
|
|
37
|
+
/[^\r\n]* |
|
|
38
|
+
\\\\[^\r\n]+ |
|
|
39
|
+
\\[^\r\n]+ |
|
|
40
|
+
[A-Za-z]:\\[^\r\n]+ |
|
|
41
|
+
[A-Za-z]:/[^\r\n]+ |
|
|
42
|
+
\./[^\r\n]* |
|
|
43
|
+
\.\\/[^\r\n]* |
|
|
44
|
+
\.\.[/\\][^\r\n]* |
|
|
45
|
+
\.\.\\[^\r\n]* |
|
|
46
|
+
[a-zA-Z_][a-zA-Z0-9_.-]*(?:[/\\][a-zA-Z0-9_.-]+)+ |
|
|
47
|
+
[a-zA-Z_][a-zA-Z0-9_.-]*\.[a-zA-Z0-9]+(?![}\])])
|
|
48
|
+
)$""",
|
|
49
|
+
re.VERBOSE,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class URIResolutionError(ValueError):
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def is_uri_like(uri: str | None) -> bool:
|
|
58
|
+
r"""
|
|
59
|
+
Heuristic check: is `s` a URI-like reference or absolute/relative path?
|
|
60
|
+
- Accepts http(s)://, file://
|
|
61
|
+
- Accepts absolute POSIX (/...) and Windows (\\..., \..., C:\..., C:/...) paths
|
|
62
|
+
- Accepts relative paths (./..., ../..., .\..., ..\...)
|
|
63
|
+
- Must be a single line (no '\\n' or '\\r').
|
|
64
|
+
Leading/trailing whitespace is ignored.
|
|
65
|
+
"""
|
|
66
|
+
if not uri:
|
|
67
|
+
return False
|
|
68
|
+
uri = uri.strip()
|
|
69
|
+
# Enforce single line
|
|
70
|
+
if "\n" in uri or "\r" in uri:
|
|
71
|
+
return False
|
|
72
|
+
return bool(_URI_LIKE_RE.match(uri))
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def is_path(s: str | None) -> bool:
|
|
76
|
+
"""
|
|
77
|
+
Check if `s` is a filesystem path (not a URL or URI).
|
|
78
|
+
|
|
79
|
+
Returns True for:
|
|
80
|
+
- Absolute POSIX paths: /home/file.txt
|
|
81
|
+
- Absolute Windows paths: C:\\Windows\\file.txt, \\\\server\\share\\path
|
|
82
|
+
- Relative paths: ./config.yaml, ../parent/file.txt
|
|
83
|
+
|
|
84
|
+
Returns False for:
|
|
85
|
+
- HTTP(S) URLs: http://example.com
|
|
86
|
+
- File URIs: file:///home/file.txt
|
|
87
|
+
- Other URIs: mailto:test@example.com, data:text/plain, ftp://ftp.example.com
|
|
88
|
+
- Empty or None strings
|
|
89
|
+
"""
|
|
90
|
+
if not s:
|
|
91
|
+
return False
|
|
92
|
+
|
|
93
|
+
s = s.strip()
|
|
94
|
+
|
|
95
|
+
# Must match the URI-like pattern first
|
|
96
|
+
if not is_uri_like(s):
|
|
97
|
+
return False
|
|
98
|
+
|
|
99
|
+
# Exclude HTTP(S) URLs
|
|
100
|
+
if is_http_https_url(s):
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
# Exclude file:// URIs
|
|
104
|
+
if is_file_uri(s):
|
|
105
|
+
return False
|
|
106
|
+
|
|
107
|
+
# Exclude any other URI schemes (mailto:, data:, ftp:, etc.)
|
|
108
|
+
parsed = urlparse(s)
|
|
109
|
+
if parsed.scheme: # Has a scheme
|
|
110
|
+
return False
|
|
111
|
+
|
|
112
|
+
# It's a path!
|
|
113
|
+
return True
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def is_http_https_url(url: str) -> bool:
|
|
117
|
+
p = urlparse(url)
|
|
118
|
+
return p.scheme in ("http", "https") and bool(p.netloc)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def is_file_uri(uri: str) -> bool:
|
|
122
|
+
return urlparse(uri).scheme == "file"
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def is_scheme_relative_uri(uri: str) -> bool:
|
|
126
|
+
"""
|
|
127
|
+
Check if `uri` is a scheme-relative URI (also called protocol-relative URI).
|
|
128
|
+
|
|
129
|
+
A scheme-relative URI starts with "//" followed by an authority component (netloc),
|
|
130
|
+
inheriting the scheme from the context (e.g., "//cdn.example.com/path").
|
|
131
|
+
|
|
132
|
+
This is defined in RFC 3986 section 4.2 as a network-path reference.
|
|
133
|
+
Per RFC 3986, a valid network-path reference must have an authority component.
|
|
134
|
+
|
|
135
|
+
Examples:
|
|
136
|
+
- "//cdn.example.com/x.yaml" -> True
|
|
137
|
+
- "//example.com/api" -> True
|
|
138
|
+
- "http://example.com" -> False (has scheme)
|
|
139
|
+
- "/path/to/file" -> False (single slash)
|
|
140
|
+
- "./relative" -> False (relative path)
|
|
141
|
+
- "//" -> False (no authority component)
|
|
142
|
+
- "///path" -> False (no authority component)
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
uri: The string to check
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
True if the string is a valid scheme-relative URI with authority, False otherwise
|
|
149
|
+
"""
|
|
150
|
+
if not uri.startswith("//"):
|
|
151
|
+
return False
|
|
152
|
+
p = urlparse(uri)
|
|
153
|
+
return bool(p.netloc)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def is_absolute_uri(uri: str) -> bool:
|
|
157
|
+
"""
|
|
158
|
+
Check if `uri` is an absolute URI according to RFC 3986.
|
|
159
|
+
|
|
160
|
+
An absolute URI is defined as having a scheme (e.g., "http:", "https:", "ftp:", "file:").
|
|
161
|
+
|
|
162
|
+
Note: Scheme-relative URIs (starting with "//") are NOT considered absolute URIs.
|
|
163
|
+
According to RFC 3986 section 4.2, scheme-relative URIs are classified as
|
|
164
|
+
"relative references" (specifically, "network-path references").
|
|
165
|
+
Use `is_scheme_relative_uri()` to check for those separately.
|
|
166
|
+
|
|
167
|
+
Examples:
|
|
168
|
+
- "http://example.com" -> True
|
|
169
|
+
- "https://example.com/path" -> True
|
|
170
|
+
- "ftp://ftp.example.com" -> True
|
|
171
|
+
- "file:///path/to/file" -> True
|
|
172
|
+
- "//cdn.example.com/x.yaml" -> False (scheme-relative, use is_scheme_relative_uri)
|
|
173
|
+
- "/path/to/file" -> False (absolute path, not URI)
|
|
174
|
+
- "./relative" -> False
|
|
175
|
+
- "#fragment" -> False
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
uri: The string to check
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
True if the string is an absolute URI (has a scheme), False otherwise
|
|
182
|
+
"""
|
|
183
|
+
p = urlparse(uri)
|
|
184
|
+
return bool(p.scheme)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def is_fragment_only_uri(uri: str) -> bool:
|
|
188
|
+
"""
|
|
189
|
+
Check if `uri` is a fragment-only reference.
|
|
190
|
+
|
|
191
|
+
A fragment-only reference consists solely of a fragment identifier (starts with "#").
|
|
192
|
+
These are used in JSON References and OpenAPI to refer to parts within the same document.
|
|
193
|
+
|
|
194
|
+
Note: This checks if the ENTIRE string is a fragment reference, not whether
|
|
195
|
+
a URI contains a fragment. For example, "http://example.com#section" would
|
|
196
|
+
return False because it's a full URI with a fragment, not fragment-only.
|
|
197
|
+
|
|
198
|
+
Examples:
|
|
199
|
+
- "#/definitions/User" -> True
|
|
200
|
+
- "#fragment" -> True
|
|
201
|
+
- "#" -> True (empty fragment identifier)
|
|
202
|
+
- "##" -> True (fragment identifier is "#")
|
|
203
|
+
- "http://example.com#section" -> False (full URI with fragment)
|
|
204
|
+
- "/path/to/file" -> False
|
|
205
|
+
- "./relative" -> False
|
|
206
|
+
- "" -> False
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
uri: The string to check
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
True if the string is a fragment-only reference, False otherwise
|
|
213
|
+
"""
|
|
214
|
+
return uri.startswith("#")
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def resolve_to_absolute(value: str, base_uri: str | None = None) -> str:
|
|
218
|
+
"""
|
|
219
|
+
Resolve `value` to either:
|
|
220
|
+
- an absolute URL (with scheme), OR
|
|
221
|
+
- an absolute filesystem path (no scheme)
|
|
222
|
+
|
|
223
|
+
• If `base_uri` is None AND `value` has no scheme (i.e., relative URI or path),
|
|
224
|
+
return an **absolute filesystem path** resolved against CWD.
|
|
225
|
+
|
|
226
|
+
Other rules:
|
|
227
|
+
• Absolute http(s) URLs ⇒ return absolute URL.
|
|
228
|
+
• file:// URIs ⇒ return absolute filesystem path.
|
|
229
|
+
• If `base_uri` is an http(s) URL, relative inputs resolve to absolute URLs.
|
|
230
|
+
• If `base_uri` is a path or file://, relative inputs resolve to absolute paths.
|
|
231
|
+
• Mixing a path-like `value` with an http(s) `base_uri` raises (ambiguous).
|
|
232
|
+
• Scheme-relative (“//host/path”) without a URL base ⇒ raises.
|
|
233
|
+
"""
|
|
234
|
+
_guard_single_line(value)
|
|
235
|
+
|
|
236
|
+
if is_http_https_url(value):
|
|
237
|
+
return _normalize_url(value)
|
|
238
|
+
|
|
239
|
+
if is_file_uri(value):
|
|
240
|
+
return file_uri_to_path(value)
|
|
241
|
+
|
|
242
|
+
if _looks_like_windows_path(value):
|
|
243
|
+
return _resolve_path_like(value, base_uri)
|
|
244
|
+
|
|
245
|
+
parsed = urlparse(value)
|
|
246
|
+
# Scheme-relative without URL base is ambiguous
|
|
247
|
+
if value.startswith("//"):
|
|
248
|
+
if base_uri and is_http_https_url(base_uri):
|
|
249
|
+
return _normalize_url(urljoin(base_uri, value))
|
|
250
|
+
raise URIResolutionError("Scheme-relative URLs require a URL base_uri.")
|
|
251
|
+
|
|
252
|
+
# Any other explicit scheme (mailto:, data:, ftp:, etc.) → accept as-is
|
|
253
|
+
if parsed.scheme:
|
|
254
|
+
if parsed.scheme in ("http", "https"):
|
|
255
|
+
if not parsed.netloc:
|
|
256
|
+
raise URIResolutionError(f"Malformed URL (missing host): {value!r}")
|
|
257
|
+
return _normalize_url(value)
|
|
258
|
+
if parsed.scheme == "file":
|
|
259
|
+
# handled above
|
|
260
|
+
raise AssertionError("unreachable")
|
|
261
|
+
return value # leave non-file, non-http schemes untouched
|
|
262
|
+
|
|
263
|
+
# --- No scheme: relative URI or path ---
|
|
264
|
+
if base_uri:
|
|
265
|
+
if is_http_https_url(base_uri):
|
|
266
|
+
# Relative URI against URL base → absolute URL
|
|
267
|
+
return _normalize_url(urljoin(base_uri, value))
|
|
268
|
+
# base is file path or file:// → absolute path
|
|
269
|
+
return _resolve_path_like(value, base_uri)
|
|
270
|
+
|
|
271
|
+
# **Your rule**: no base + no scheme ⇒ absolute filesystem path
|
|
272
|
+
return _resolve_path_like(value, None)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def file_uri_to_path(file_uri: str) -> str:
|
|
276
|
+
"""
|
|
277
|
+
Convert a file:// URI to an absolute filesystem path.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
file_uri: A file:// URI string (e.g., "file:///path/to/file" or "file://server/share/path")
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
Absolute filesystem path as a string
|
|
284
|
+
|
|
285
|
+
Raises:
|
|
286
|
+
URIResolutionError: If the input is not a valid file:// URI
|
|
287
|
+
|
|
288
|
+
Examples:
|
|
289
|
+
>>> file_uri_to_path("file:///home/user/doc.yaml")
|
|
290
|
+
'/home/user/doc.yaml'
|
|
291
|
+
>>> file_uri_to_path("file://localhost/etc/config.yaml")
|
|
292
|
+
'/etc/config.yaml'
|
|
293
|
+
"""
|
|
294
|
+
parsed_uri = urlparse(file_uri)
|
|
295
|
+
if parsed_uri.scheme != "file":
|
|
296
|
+
raise URIResolutionError(f"Not a file URI: {file_uri!r}")
|
|
297
|
+
if parsed_uri.netloc and parsed_uri.netloc not in ("", "localhost"):
|
|
298
|
+
# UNC: \\server\share\path
|
|
299
|
+
unc = f"//{parsed_uri.netloc}{parsed_uri.path}"
|
|
300
|
+
return str(Path(url2pathname(unc)).resolve())
|
|
301
|
+
path = url2pathname(parsed_uri.path)
|
|
302
|
+
return str(Path(path).resolve())
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def _guard_single_line(s: str) -> None:
|
|
306
|
+
if not isinstance(s, str) or ("\n" in s or "\r" in s):
|
|
307
|
+
raise URIResolutionError("Input must be a single-line string.")
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def _looks_like_windows_path(s: str) -> bool:
|
|
311
|
+
return bool(_WINDOWS_DRIVE_RE.match(s) or _WINDOWS_UNC_RE.match(s))
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def _normalize_url(s: str) -> str:
|
|
315
|
+
import posixpath
|
|
316
|
+
|
|
317
|
+
parts = urlsplit(s)
|
|
318
|
+
# Normalize the path component using posixpath (URLs always use forward slashes)
|
|
319
|
+
normalized_path = posixpath.normpath(parts.path) if parts.path else "/"
|
|
320
|
+
# Ensure root path is "/"
|
|
321
|
+
if normalized_path == ".":
|
|
322
|
+
normalized_path = "/"
|
|
323
|
+
return urlunsplit((parts.scheme, parts.netloc, normalized_path, parts.query, parts.fragment))
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def _resolve_path_like(value: str, base_uri: str | None) -> str:
|
|
327
|
+
value = os.path.expandvars(os.path.expanduser(value))
|
|
328
|
+
|
|
329
|
+
if base_uri:
|
|
330
|
+
if is_file_uri(base_uri):
|
|
331
|
+
base_path = Path(url2pathname(urlparse(base_uri).path))
|
|
332
|
+
elif is_http_https_url(base_uri):
|
|
333
|
+
# Don't silently combine a local path with a URL base
|
|
334
|
+
raise URIResolutionError("Cannot resolve a local path against an HTTP(S) base_uri.")
|
|
335
|
+
else:
|
|
336
|
+
base_path = Path(os.path.expandvars(os.path.expanduser(base_uri)))
|
|
337
|
+
else:
|
|
338
|
+
base_path = Path.cwd()
|
|
339
|
+
|
|
340
|
+
p = Path(value)
|
|
341
|
+
return str(p.resolve() if p.is_absolute() else (base_path / p).resolve())
|