pathlibutil 0.3.1__tar.gz → 0.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pathlibutil-0.3.1/README.md → pathlibutil-0.3.3/PKG-INFO +116 -2
- pathlibutil-0.3.1/PKG-INFO → pathlibutil-0.3.3/README.md +90 -25
- pathlibutil-0.3.3/pathlibutil/urlpath.py +717 -0
- {pathlibutil-0.3.1 → pathlibutil-0.3.3}/pyproject.toml +12 -5
- pathlibutil-0.3.1/pathlibutil/urlpath.py +0 -331
- {pathlibutil-0.3.1 → pathlibutil-0.3.3}/LICENSE +0 -0
- {pathlibutil-0.3.1 → pathlibutil-0.3.3}/pathlibutil/__init__.py +0 -0
- {pathlibutil-0.3.1 → pathlibutil-0.3.3}/pathlibutil/base.py +0 -0
- {pathlibutil-0.3.1 → pathlibutil-0.3.3}/pathlibutil/json.py +0 -0
- {pathlibutil-0.3.1 → pathlibutil-0.3.3}/pathlibutil/path.py +0 -0
- {pathlibutil-0.3.1 → pathlibutil-0.3.3}/pathlibutil/types.py +0 -0
|
@@ -1,3 +1,27 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: pathlibutil
|
|
3
|
+
Version: 0.3.3
|
|
4
|
+
Summary: inherits from pathlib.Path with methods for hashing, copying, deleting and more
|
|
5
|
+
License: MIT
|
|
6
|
+
Keywords: pathlib,hashlib,shutil,urllib.parse,json,urlpath
|
|
7
|
+
Author: Christoph Dörrer
|
|
8
|
+
Author-email: d-chris@web.de
|
|
9
|
+
Requires-Python: >=3.8.1,<4.0.0
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
19
|
+
Provides-Extra: 7z
|
|
20
|
+
Requires-Dist: py7zr (>=0.20.2,<0.21.0) ; extra == "7z"
|
|
21
|
+
Project-URL: documentation, https://d-chris.github.io/pathlibutil
|
|
22
|
+
Project-URL: repository, https://github.com/d-chris/pathlibutil
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
1
25
|
<!--
|
|
2
26
|
filename: ./README.md
|
|
3
27
|
-->
|
|
@@ -49,7 +73,8 @@ Parse and modify URLs with `pathlibutil.urlpath`.
|
|
|
49
73
|
|
|
50
74
|
- `pathlibutil.urlpath.UrlPath()` modify URL and easy access the `path` of the url like a `pathlib.PurePosixPath` object.
|
|
51
75
|
- `pathlibutil.urlpath.UrlNetloc()` to parse and modify the `netloc` part of a URL.
|
|
52
|
-
- `pathlibutil.urlpath.
|
|
76
|
+
- `pathlibutil.urlpath.normalize()` to normalize a URL string.
|
|
77
|
+
- `pathlibutil.urlpath.url_from()` to create a URL from an UNC path object.
|
|
53
78
|
|
|
54
79
|
|
|
55
80
|
## Installation
|
|
@@ -296,4 +321,93 @@ os.getcwd is K:/pathlibutil
|
|
|
296
321
|
Path.cwd(frozen=True) is K:/pathlibutil/examples
|
|
297
322
|
Path.cwd(frozen=False) is K:/pathlibutil
|
|
298
323
|
Path.cwd(frozen=_MEIPASS) is C:/Users/CHRIST~1.DOE/AppData/Local/Temp/_MEI106042
|
|
299
|
-
```
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
## Example 7
|
|
327
|
+
|
|
328
|
+
Console application to convert UNC paths to intranet URLs.
|
|
329
|
+
|
|
330
|
+
By default, it checks if the filename and URL are available and copies the
|
|
331
|
+
normalized URL to the clipboard.
|
|
332
|
+
|
|
333
|
+
> `pathlibutil.urlpath.url_from()`
|
|
334
|
+
|
|
335
|
+
```python
|
|
336
|
+
import argparse
|
|
337
|
+
import sys
|
|
338
|
+
|
|
339
|
+
try:
|
|
340
|
+
import pyperclip
|
|
341
|
+
|
|
342
|
+
import pathlibutil.urlpath as up
|
|
343
|
+
except ModuleNotFoundError as e:
|
|
344
|
+
raise ModuleNotFoundError(f"pip install {e.name.split('.')[0]}") from e
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def intranet_from(uncpath: str, check: bool = True) -> str:
|
|
348
|
+
"""
|
|
349
|
+
Return the intranet URL for the given UNC path.
|
|
350
|
+
"""
|
|
351
|
+
|
|
352
|
+
url = up.url_from(
|
|
353
|
+
uncpath,
|
|
354
|
+
hostname="http://intranet.example.de",
|
|
355
|
+
strict=check,
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
return url.normalize()
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def cli():
|
|
362
|
+
|
|
363
|
+
parser = argparse.ArgumentParser(
|
|
364
|
+
description=intranet_from.__doc__,
|
|
365
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
parser.add_argument(
|
|
369
|
+
"filename",
|
|
370
|
+
nargs="*",
|
|
371
|
+
help="The UNC path to the file.",
|
|
372
|
+
)
|
|
373
|
+
parser.add_argument(
|
|
374
|
+
"-c",
|
|
375
|
+
"--no-check",
|
|
376
|
+
action="store_false",
|
|
377
|
+
dest="check",
|
|
378
|
+
help="Don't check if filename and url is available.",
|
|
379
|
+
)
|
|
380
|
+
parser.add_argument(
|
|
381
|
+
"-s",
|
|
382
|
+
"--silent",
|
|
383
|
+
action="store_true",
|
|
384
|
+
help="Do not print the url to stdout.",
|
|
385
|
+
)
|
|
386
|
+
parser.add_argument(
|
|
387
|
+
"-n",
|
|
388
|
+
"--no-clip",
|
|
389
|
+
action="store_false",
|
|
390
|
+
dest="clip",
|
|
391
|
+
help="Don't copy the url to the clipboard.",
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
args = parser.parse_args()
|
|
395
|
+
filename = " ".join(args.filename)
|
|
396
|
+
|
|
397
|
+
url = intranet_from(filename, check=args.check)
|
|
398
|
+
|
|
399
|
+
if not args.silent:
|
|
400
|
+
print(url)
|
|
401
|
+
|
|
402
|
+
if args.clip:
|
|
403
|
+
pyperclip.copy(url)
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
if __name__ == "__main__":
|
|
407
|
+
try:
|
|
408
|
+
cli()
|
|
409
|
+
except Exception as e:
|
|
410
|
+
print(e, file=sys.stderr)
|
|
411
|
+
sys.exit(1)
|
|
412
|
+
```
|
|
413
|
+
|
|
@@ -1,27 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: pathlibutil
|
|
3
|
-
Version: 0.3.1
|
|
4
|
-
Summary: inherits from pathlib.Path with methods for hashing, copying, deleting and more
|
|
5
|
-
Home-page: https://d-chris.github.io
|
|
6
|
-
License: MIT
|
|
7
|
-
Keywords: pathlib,hashlib,shutil,urllib.parse
|
|
8
|
-
Author: Christoph Dörrer
|
|
9
|
-
Author-email: d-chris@web.de
|
|
10
|
-
Requires-Python: >=3.8.1,<4.0.0
|
|
11
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
-
Classifier: Operating System :: OS Independent
|
|
13
|
-
Classifier: Programming Language :: Python :: 3
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
19
|
-
Provides-Extra: 7z
|
|
20
|
-
Requires-Dist: py7zr (>=0.20.2,<0.21.0) ; extra == "7z"
|
|
21
|
-
Project-URL: Documentation, https://d-chris.github.io/pathlibutil
|
|
22
|
-
Project-URL: Repository, https://github.com/d-chris/pathlibutil
|
|
23
|
-
Description-Content-Type: text/markdown
|
|
24
|
-
|
|
25
1
|
<!--
|
|
26
2
|
filename: ./README.md
|
|
27
3
|
-->
|
|
@@ -73,7 +49,8 @@ Parse and modify URLs with `pathlibutil.urlpath`.
|
|
|
73
49
|
|
|
74
50
|
- `pathlibutil.urlpath.UrlPath()` modify URL and easy access the `path` of the url like a `pathlib.PurePosixPath` object.
|
|
75
51
|
- `pathlibutil.urlpath.UrlNetloc()` to parse and modify the `netloc` part of a URL.
|
|
76
|
-
- `pathlibutil.urlpath.
|
|
52
|
+
- `pathlibutil.urlpath.normalize()` to normalize a URL string.
|
|
53
|
+
- `pathlibutil.urlpath.url_from()` to create a URL from an UNC path object.
|
|
77
54
|
|
|
78
55
|
|
|
79
56
|
## Installation
|
|
@@ -321,3 +298,91 @@ Path.cwd(frozen=True) is K:/pathlibutil/examples
|
|
|
321
298
|
Path.cwd(frozen=False) is K:/pathlibutil
|
|
322
299
|
Path.cwd(frozen=_MEIPASS) is C:/Users/CHRIST~1.DOE/AppData/Local/Temp/_MEI106042
|
|
323
300
|
```
|
|
301
|
+
|
|
302
|
+
## Example 7
|
|
303
|
+
|
|
304
|
+
Console application to convert UNC paths to intranet URLs.
|
|
305
|
+
|
|
306
|
+
By default, it checks if the filename and URL are available and copies the
|
|
307
|
+
normalized URL to the clipboard.
|
|
308
|
+
|
|
309
|
+
> `pathlibutil.urlpath.url_from()`
|
|
310
|
+
|
|
311
|
+
```python
|
|
312
|
+
import argparse
|
|
313
|
+
import sys
|
|
314
|
+
|
|
315
|
+
try:
|
|
316
|
+
import pyperclip
|
|
317
|
+
|
|
318
|
+
import pathlibutil.urlpath as up
|
|
319
|
+
except ModuleNotFoundError as e:
|
|
320
|
+
raise ModuleNotFoundError(f"pip install {e.name.split('.')[0]}") from e
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def intranet_from(uncpath: str, check: bool = True) -> str:
|
|
324
|
+
"""
|
|
325
|
+
Return the intranet URL for the given UNC path.
|
|
326
|
+
"""
|
|
327
|
+
|
|
328
|
+
url = up.url_from(
|
|
329
|
+
uncpath,
|
|
330
|
+
hostname="http://intranet.example.de",
|
|
331
|
+
strict=check,
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
return url.normalize()
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def cli():
|
|
338
|
+
|
|
339
|
+
parser = argparse.ArgumentParser(
|
|
340
|
+
description=intranet_from.__doc__,
|
|
341
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
parser.add_argument(
|
|
345
|
+
"filename",
|
|
346
|
+
nargs="*",
|
|
347
|
+
help="The UNC path to the file.",
|
|
348
|
+
)
|
|
349
|
+
parser.add_argument(
|
|
350
|
+
"-c",
|
|
351
|
+
"--no-check",
|
|
352
|
+
action="store_false",
|
|
353
|
+
dest="check",
|
|
354
|
+
help="Don't check if filename and url is available.",
|
|
355
|
+
)
|
|
356
|
+
parser.add_argument(
|
|
357
|
+
"-s",
|
|
358
|
+
"--silent",
|
|
359
|
+
action="store_true",
|
|
360
|
+
help="Do not print the url to stdout.",
|
|
361
|
+
)
|
|
362
|
+
parser.add_argument(
|
|
363
|
+
"-n",
|
|
364
|
+
"--no-clip",
|
|
365
|
+
action="store_false",
|
|
366
|
+
dest="clip",
|
|
367
|
+
help="Don't copy the url to the clipboard.",
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
args = parser.parse_args()
|
|
371
|
+
filename = " ".join(args.filename)
|
|
372
|
+
|
|
373
|
+
url = intranet_from(filename, check=args.check)
|
|
374
|
+
|
|
375
|
+
if not args.silent:
|
|
376
|
+
print(url)
|
|
377
|
+
|
|
378
|
+
if args.clip:
|
|
379
|
+
pyperclip.copy(url)
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
if __name__ == "__main__":
|
|
383
|
+
try:
|
|
384
|
+
cli()
|
|
385
|
+
except Exception as e:
|
|
386
|
+
print(e, file=sys.stderr)
|
|
387
|
+
sys.exit(1)
|
|
388
|
+
```
|
|
@@ -0,0 +1,717 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
import pathlib
|
|
3
|
+
import re
|
|
4
|
+
import urllib.parse as up
|
|
5
|
+
import urllib.request
|
|
6
|
+
from dataclasses import asdict, dataclass, field
|
|
7
|
+
from functools import cached_property, wraps
|
|
8
|
+
from typing import Any, Dict, Optional, Tuple, TypeVar, Union
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class UrlNetloc:
|
|
13
|
+
"""
|
|
14
|
+
A dataclass to represent the netloc part of a URL.
|
|
15
|
+
|
|
16
|
+
Attributes:
|
|
17
|
+
hostname (str): The hostname of the URL.
|
|
18
|
+
port (Optional[int]): The port number of the URL. Defaults to None.
|
|
19
|
+
username (Optional[str]): The username for authentication. Defaults to None.
|
|
20
|
+
password (Optional[str]): The password for authentication. Defaults to None.
|
|
21
|
+
|
|
22
|
+
Examples:
|
|
23
|
+
>>> url = UrlNetloc.from_netloc("www.example.com:443")
|
|
24
|
+
>>> url.port = None
|
|
25
|
+
>>> str(url)
|
|
26
|
+
'www.example.com'
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
hostname: str
|
|
30
|
+
"""
|
|
31
|
+
The hostname of the URL.
|
|
32
|
+
|
|
33
|
+
Examples:
|
|
34
|
+
'www.example.com'
|
|
35
|
+
"""
|
|
36
|
+
port: Optional[int] = field(default=None)
|
|
37
|
+
"""
|
|
38
|
+
The port number of the URL. Defaults to None.
|
|
39
|
+
"""
|
|
40
|
+
username: Optional[str] = field(default=None)
|
|
41
|
+
"""
|
|
42
|
+
The username for authentication. Defaults to None
|
|
43
|
+
"""
|
|
44
|
+
password: Optional[str] = field(default=None)
|
|
45
|
+
"""
|
|
46
|
+
The password for authentication. Defaults to None.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __str__(self) -> str:
|
|
50
|
+
return self.netloc
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def netloc(self) -> str:
|
|
54
|
+
"""
|
|
55
|
+
Return the netloc string representation of the `dataclass`.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
str: The netloc string representation.
|
|
59
|
+
|
|
60
|
+
Examples:
|
|
61
|
+
>>> UrlNetloc("www.example.de", 433, "user", "pass").netloc
|
|
62
|
+
'user:pass@www.example.de:433'
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
netloc = ""
|
|
66
|
+
|
|
67
|
+
if self.username:
|
|
68
|
+
netloc += self.username
|
|
69
|
+
|
|
70
|
+
if self.password:
|
|
71
|
+
netloc += f":{self.password}"
|
|
72
|
+
|
|
73
|
+
netloc += "@"
|
|
74
|
+
|
|
75
|
+
if ":" in self.hostname:
|
|
76
|
+
netloc += f"[{self.hostname}]"
|
|
77
|
+
else:
|
|
78
|
+
netloc += self.hostname
|
|
79
|
+
|
|
80
|
+
if self.port:
|
|
81
|
+
netloc += f":{self.port:d}"
|
|
82
|
+
|
|
83
|
+
return netloc
|
|
84
|
+
|
|
85
|
+
@classmethod
|
|
86
|
+
def from_netloc(cls, netloc: str, normalize: bool = False) -> "UrlNetloc":
|
|
87
|
+
"""
|
|
88
|
+
Parse a netloc string into a `UrlNetloc` object.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
netloc (str): The netloc string to parse.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
`UrlNetloc`: An instance of `UrlNetloc` with the parsed components.
|
|
95
|
+
|
|
96
|
+
Examples:
|
|
97
|
+
>>> UrlNetloc.from_netloc("user:pass@example.de:433")
|
|
98
|
+
UrlNetloc(hostname='example.de', port=433, username='user', password='pass')
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
if not netloc.startswith("//"):
|
|
102
|
+
netloc = f"//{netloc}"
|
|
103
|
+
|
|
104
|
+
url = up.urlparse(netloc)
|
|
105
|
+
|
|
106
|
+
hostname = url.hostname
|
|
107
|
+
|
|
108
|
+
if normalize is False:
|
|
109
|
+
try:
|
|
110
|
+
pattern = re.escape(url.hostname)
|
|
111
|
+
hostname = re.search(pattern, netloc, re.IGNORECASE).group()
|
|
112
|
+
except AttributeError:
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
return cls(
|
|
116
|
+
hostname=hostname,
|
|
117
|
+
port=url.port,
|
|
118
|
+
username=url.username,
|
|
119
|
+
password=url.password,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
def to_dict(self, prune: bool = False) -> Dict[str, Any]:
|
|
123
|
+
"""
|
|
124
|
+
Convert the `UrlNetloc` object to a dictionary.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
prune (bool): If True, removes key-value pairs where the value is `None`.
|
|
128
|
+
Defaults to False.
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
dict: A dictionary representation of the `UrlNetloc` object.
|
|
132
|
+
|
|
133
|
+
Examples:
|
|
134
|
+
>>> loc = UrlNetloc("example.de", 80, "user")
|
|
135
|
+
|
|
136
|
+
>>> loc.to_dict()
|
|
137
|
+
{'hostname': 'example.de', 'port': 80, 'username': 'user', 'password': None}
|
|
138
|
+
|
|
139
|
+
>>> loc.to_dict(prune=True)
|
|
140
|
+
{'hostname': 'www.example.de', 'port': 80, 'username': 'user'}
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
data = asdict(self)
|
|
144
|
+
|
|
145
|
+
if not prune:
|
|
146
|
+
return data
|
|
147
|
+
|
|
148
|
+
return {k: v for k, v in data.items() if v is not None}
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
_UrlPath = TypeVar("_UrlPath", bound="UrlPath")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def urlpath(func):
|
|
155
|
+
"""
|
|
156
|
+
decorator to return a `UrlPath` object from a `urllib.parse.ParseResult` object.
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
@wraps(func)
|
|
160
|
+
def wrapper(self, *args, **kwargs) -> _UrlPath:
|
|
161
|
+
result = func(self, *args, **kwargs)
|
|
162
|
+
|
|
163
|
+
return self.__class__(result.geturl(), **self._kwargs)
|
|
164
|
+
|
|
165
|
+
return wrapper
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class UrlPath(up.ParseResult):
|
|
169
|
+
"""
|
|
170
|
+
Class to manipulate URLs to change the scheme, netloc, path, query, and fragment.
|
|
171
|
+
|
|
172
|
+
This class wraps `pathlib.PurePosixPath` methods to return a new `UrlPath` object.
|
|
173
|
+
Attributes and methods from `PurePosixPath`, such as `name` and `with_suffix`,
|
|
174
|
+
are available.
|
|
175
|
+
|
|
176
|
+
Examples:
|
|
177
|
+
>>> url = UrlPath("https://www.example.com/path/file.txt")
|
|
178
|
+
|
|
179
|
+
>>> url.name
|
|
180
|
+
'file.txt'
|
|
181
|
+
|
|
182
|
+
>>> url.with_suffix(".html")
|
|
183
|
+
UrlPath('https://www.example.com/path/file.html')
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
_default_ports = {
|
|
187
|
+
"http": 80,
|
|
188
|
+
"https": 443,
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
def __new__(cls, url, **kwargs) -> _UrlPath:
|
|
192
|
+
url = (
|
|
193
|
+
f"//{url[1:]}" if url.startswith("/") and not url.startswith("//") else url
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
parsed_url = up.urlparse(url, **kwargs)
|
|
197
|
+
return super().__new__(cls, *parsed_url)
|
|
198
|
+
|
|
199
|
+
def __init__(
|
|
200
|
+
self,
|
|
201
|
+
url: str,
|
|
202
|
+
scheme: str = "",
|
|
203
|
+
allow_fragments: bool = True,
|
|
204
|
+
) -> None:
|
|
205
|
+
"""
|
|
206
|
+
Initialize the `UrlPath` object with a URL string.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
url (str): The URL string to initialize the `UrlPath` object.
|
|
210
|
+
scheme (str, optional): The scheme to use if not present in the URL.
|
|
211
|
+
Defaults to an empty string.
|
|
212
|
+
allow_fragments (bool, optional): Whether to allow fragments in the URL.
|
|
213
|
+
Defaults to True.
|
|
214
|
+
|
|
215
|
+
Raises:
|
|
216
|
+
ValueError: If the URL is not valid.
|
|
217
|
+
|
|
218
|
+
Examples:
|
|
219
|
+
>>> UrlPath("http://example.com/path/file.txt")
|
|
220
|
+
UrlPath('http://example.com/path/file.txt')
|
|
221
|
+
"""
|
|
222
|
+
self._url = url
|
|
223
|
+
self._kwargs = {
|
|
224
|
+
"scheme": scheme,
|
|
225
|
+
"allow_fragments": allow_fragments,
|
|
226
|
+
}
|
|
227
|
+
self._path = pathlib.PurePosixPath(up.unquote(self.path))
|
|
228
|
+
|
|
229
|
+
def __str__(self) -> str:
|
|
230
|
+
return self.normalize()
|
|
231
|
+
|
|
232
|
+
def __repr__(self) -> str:
|
|
233
|
+
return f"{self.__class__.__name__}({self.geturl()!r})"
|
|
234
|
+
|
|
235
|
+
def geturl(self, normalize: bool = False) -> str:
|
|
236
|
+
"""
|
|
237
|
+
Return a re-combined version of the URL.
|
|
238
|
+
|
|
239
|
+
If `normalize` is `True`, the scheme and netloc are converted to lowercase,
|
|
240
|
+
default ports are removed, and query parameters are sorted.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
normalize (bool): If True, normalizes the URL. Defaults to False.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
str: The re-combined URL.
|
|
247
|
+
|
|
248
|
+
Examples:
|
|
249
|
+
>>> url = UrlPath("HTTP://Example.COM:80/path/file name.txt?b=2&a=1")
|
|
250
|
+
|
|
251
|
+
>>> url.geturl(normalize=True)
|
|
252
|
+
'http://example.com/path/file%20name.txt?a=1&b=2'
|
|
253
|
+
|
|
254
|
+
>>> url.geturl()
|
|
255
|
+
'http://Example.COM:80/path/file name.txt?b=2&a=1'
|
|
256
|
+
"""
|
|
257
|
+
if normalize:
|
|
258
|
+
return self.normalize()
|
|
259
|
+
|
|
260
|
+
return super().geturl()
|
|
261
|
+
|
|
262
|
+
def normalize(self, sort: bool = True, **kwargs) -> str:
|
|
263
|
+
"""
|
|
264
|
+
Normalize the URL by converting the scheme and host to lowercase, removing the
|
|
265
|
+
default port if present, and sorting the query parameters.
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
sort (bool): If True, sorts the query parameters. Defaults to True.
|
|
269
|
+
**kwargs: Additional arguments, such as custom port mappings.
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
str: The normalized URL.
|
|
273
|
+
|
|
274
|
+
Examples:
|
|
275
|
+
>>> url = UrlPath("HTTP://Example.COM:80/path/file name.txt?b=2&a=1")
|
|
276
|
+
>>> url.normalize()
|
|
277
|
+
'http://example.com/path/file%20name.txt?a=1&b=2'
|
|
278
|
+
"""
|
|
279
|
+
|
|
280
|
+
ports = kwargs.get("ports", self._default_ports)
|
|
281
|
+
|
|
282
|
+
scheme = self.scheme.lower()
|
|
283
|
+
netloc = UrlNetloc.from_netloc(self.netloc, normalize=True)
|
|
284
|
+
|
|
285
|
+
try:
|
|
286
|
+
if ports[scheme] == netloc.port:
|
|
287
|
+
netloc.port = None
|
|
288
|
+
except KeyError:
|
|
289
|
+
pass
|
|
290
|
+
|
|
291
|
+
path = up.quote(up.unquote(self.path))
|
|
292
|
+
query = up.urlencode(sorted(up.parse_qsl(self.query))) if sort else self.query
|
|
293
|
+
|
|
294
|
+
return up.urlunparse(
|
|
295
|
+
(
|
|
296
|
+
scheme,
|
|
297
|
+
str(netloc),
|
|
298
|
+
path,
|
|
299
|
+
self.params,
|
|
300
|
+
query,
|
|
301
|
+
self.fragment,
|
|
302
|
+
)
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
def __getattr__(self, attr: str) -> Any:
|
|
306
|
+
|
|
307
|
+
try:
|
|
308
|
+
attr = getattr(self._path, attr)
|
|
309
|
+
except AttributeError as e:
|
|
310
|
+
raise AttributeError(
|
|
311
|
+
f"'{self.__class__.__name__}' object has no attribute '{attr}'"
|
|
312
|
+
) from e
|
|
313
|
+
|
|
314
|
+
if not callable(attr):
|
|
315
|
+
return attr
|
|
316
|
+
|
|
317
|
+
@wraps(attr)
|
|
318
|
+
def wrapper(*args, **kwargs) -> _UrlPath:
|
|
319
|
+
result = attr(*args, **kwargs)
|
|
320
|
+
|
|
321
|
+
return self.with_path(result)
|
|
322
|
+
|
|
323
|
+
return wrapper
|
|
324
|
+
|
|
325
|
+
@urlpath
|
|
326
|
+
def with_scheme(self, scheme: str) -> _UrlPath:
|
|
327
|
+
"""
|
|
328
|
+
Add or Change the `UrlPath.scheme` of the URL.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
scheme (str): The new scheme to set in the URL.
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
`UrlPath`: A new URL with the updated scheme.
|
|
335
|
+
|
|
336
|
+
Examples:
|
|
337
|
+
>>> url = UrlPath("http://example.com/path/file.txt")
|
|
338
|
+
>>> url.with_port(990).with_scheme("ftp")
|
|
339
|
+
UrlPath('ftp://example.com:990/path/file.txt')
|
|
340
|
+
"""
|
|
341
|
+
return self._replace(scheme=scheme)
|
|
342
|
+
|
|
343
|
+
@urlpath
|
|
344
|
+
def with_netloc(self, netloc: Union[str, UrlNetloc]) -> _UrlPath:
|
|
345
|
+
"""
|
|
346
|
+
Add or Change the `UrlPath.netloc` of the URL.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
netloc (Union[str, UrlNetloc]): The new netloc to set in the URL. It can be
|
|
350
|
+
a string or an instance of `UrlNetloc`.
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
`UrlPath`: A new URL with the updated netloc.
|
|
354
|
+
|
|
355
|
+
Examples:
|
|
356
|
+
>>> url = UrlPath("http://www.oldhost.com/path/file.txt")
|
|
357
|
+
>>> url.with_netloc("example.com")
|
|
358
|
+
UrlPath('http://example.com/path/file.txt')
|
|
359
|
+
"""
|
|
360
|
+
return self._replace(netloc=str(netloc))
|
|
361
|
+
|
|
362
|
+
@urlpath
|
|
363
|
+
def with_path(self, path: Union[str, pathlib.PurePosixPath]) -> _UrlPath:
|
|
364
|
+
"""
|
|
365
|
+
Add or Change the `UrlPath.path` of the URL.
|
|
366
|
+
|
|
367
|
+
Args:
|
|
368
|
+
path (Union[str, pathlib.PurePosixPath]): The new path to set in the URL.
|
|
369
|
+
|
|
370
|
+
Returns:
|
|
371
|
+
`UrlPath`: A new URL with the updated path.
|
|
372
|
+
|
|
373
|
+
Raises:
|
|
374
|
+
TypeError: If the provided path is of the wrong type
|
|
375
|
+
|
|
376
|
+
Examples:
|
|
377
|
+
>>> url = UrlPath("http://example.com/oldpath")
|
|
378
|
+
>>> url.with_path("/path/file.txt")
|
|
379
|
+
UrlPath('http://example.com/path/file.txt')
|
|
380
|
+
"""
|
|
381
|
+
|
|
382
|
+
try:
|
|
383
|
+
path = path.as_posix()
|
|
384
|
+
except AttributeError as e:
|
|
385
|
+
if not isinstance(path, str):
|
|
386
|
+
raise TypeError(
|
|
387
|
+
f"Expected str or PurePosixPath, got {type(path)}"
|
|
388
|
+
) from e
|
|
389
|
+
|
|
390
|
+
return self._replace(path=path)
|
|
391
|
+
|
|
392
|
+
@urlpath
|
|
393
|
+
def with_params(self, params: str) -> _UrlPath:
|
|
394
|
+
"""
|
|
395
|
+
Change the `UrlPath.params` of the URL.
|
|
396
|
+
|
|
397
|
+
Args:
|
|
398
|
+
params (str): The new parameters to set in the URL.
|
|
399
|
+
|
|
400
|
+
Returns:
|
|
401
|
+
`UrlPath`: A new URL with the updated parameters.
|
|
402
|
+
|
|
403
|
+
Examples:
|
|
404
|
+
>>> url = UrlPath("http://example.com/path")
|
|
405
|
+
>>> url.with_params("param1=value1;param2=value2")
|
|
406
|
+
UrlPath('http://example.com/path;param1=value1;param2=value2')
|
|
407
|
+
"""
|
|
408
|
+
return self._replace(params=params)
|
|
409
|
+
|
|
410
|
+
@urlpath
|
|
411
|
+
def with_query(self, query: str) -> _UrlPath:
|
|
412
|
+
"""
|
|
413
|
+
Add or Change the `UrlPath.query` of the URL.
|
|
414
|
+
|
|
415
|
+
Args:
|
|
416
|
+
query (str): The new query string to set in the URL.
|
|
417
|
+
|
|
418
|
+
Returns:
|
|
419
|
+
`UrlPath`: A new URL with the updated query string.
|
|
420
|
+
|
|
421
|
+
Examples:
|
|
422
|
+
>>> url = UrlPath("http://example.com/path")
|
|
423
|
+
>>> url.with_query("key=value")
|
|
424
|
+
UrlPath('http://example.com/path?key=value')
|
|
425
|
+
"""
|
|
426
|
+
return self._replace(query=query)
|
|
427
|
+
|
|
428
|
+
@urlpath
|
|
429
|
+
def with_fragment(self, fragment: str) -> _UrlPath:
|
|
430
|
+
"""
|
|
431
|
+
Add or Change the `UrlPath.fragment` of the URL.
|
|
432
|
+
|
|
433
|
+
Args:
|
|
434
|
+
fragment (str): The new fragment to set in the URL.
|
|
435
|
+
|
|
436
|
+
Returns:
|
|
437
|
+
`UrlPath`: A new URL with the updated fragment.
|
|
438
|
+
|
|
439
|
+
Examples:
|
|
440
|
+
>>> url = UrlPath("http://example.com/path")
|
|
441
|
+
>>> url.with_fragment("section1")
|
|
442
|
+
UrlPath('http://example.com/path#section1')
|
|
443
|
+
"""
|
|
444
|
+
return self._replace(fragment=fragment)
|
|
445
|
+
|
|
446
|
+
def with_port(self, port: int) -> _UrlPath:
|
|
447
|
+
"""
|
|
448
|
+
Add or Change the `UrlPath.port` in the netloc of the URL.
|
|
449
|
+
|
|
450
|
+
If `port` is `None`, the port is removed.
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
port (int): The new port to set in the URL.
|
|
454
|
+
|
|
455
|
+
Returns:
|
|
456
|
+
`UrlPath`: A new URL with the updated port.
|
|
457
|
+
|
|
458
|
+
Examples:
|
|
459
|
+
>>> url = UrlPath("http://example.de/path/file.txt")
|
|
460
|
+
>>> url.with_port(8080)
|
|
461
|
+
UrlPath('http://example.de:8080/path/file.txt')
|
|
462
|
+
"""
|
|
463
|
+
|
|
464
|
+
netloc = UrlNetloc.from_netloc(self.netloc)
|
|
465
|
+
netloc.port = port
|
|
466
|
+
|
|
467
|
+
return self.with_netloc(netloc)
|
|
468
|
+
|
|
469
|
+
def with_hostname(self, hostname: str) -> _UrlPath:
|
|
470
|
+
"""
|
|
471
|
+
Change the `UrlPath.hostname` in the netloc of the URL.
|
|
472
|
+
|
|
473
|
+
Args:
|
|
474
|
+
hostname (str): The new hostname to set in the URL.
|
|
475
|
+
|
|
476
|
+
Returns:
|
|
477
|
+
`UrlPath`: A new URL with the updated hostname.
|
|
478
|
+
|
|
479
|
+
Examples:
|
|
480
|
+
>>> url = UrlPath("http://example.de/path/file.txt")
|
|
481
|
+
>>> url.with_hostname("www.server.com")
|
|
482
|
+
UrlPath('http://www.server.com/path/file.txt')
|
|
483
|
+
"""
|
|
484
|
+
|
|
485
|
+
netloc = UrlNetloc.from_netloc(self.netloc)
|
|
486
|
+
netloc.hostname = hostname
|
|
487
|
+
|
|
488
|
+
return self.with_netloc(netloc)
|
|
489
|
+
|
|
490
|
+
def with_credentials(self, username: str, password: str = None) -> _UrlPath:
|
|
491
|
+
"""
|
|
492
|
+
Add or change the username and password in the netloc of the URL.
|
|
493
|
+
|
|
494
|
+
To change only `username`, the `password` must also be provided.
|
|
495
|
+
If `username` is `None`, the credentials are removed.
|
|
496
|
+
|
|
497
|
+
Args:
|
|
498
|
+
username (str): The new username to set in the URL.
|
|
499
|
+
password (str, optional): The new password to set in the URL.
|
|
500
|
+
Defaults to None.
|
|
501
|
+
|
|
502
|
+
Returns:
|
|
503
|
+
`UrlPath`: A new URL with the updated credentials.
|
|
504
|
+
|
|
505
|
+
Examples:
|
|
506
|
+
>>> url = UrlPath("ftp://example.com/path")
|
|
507
|
+
>>> url.with_credentials("user", "pass")
|
|
508
|
+
UrlPath('ftp://user:pass@example.com/path')
|
|
509
|
+
"""
|
|
510
|
+
|
|
511
|
+
netloc = UrlNetloc.from_netloc(self.netloc)
|
|
512
|
+
netloc.username = username
|
|
513
|
+
netloc.password = password
|
|
514
|
+
|
|
515
|
+
return self.with_netloc(netloc)
|
|
516
|
+
|
|
517
|
+
@cached_property
|
|
518
|
+
def parts(self) -> Tuple[str, ...]:
|
|
519
|
+
"""
|
|
520
|
+
Returns the parts of the path without any leading '/'.
|
|
521
|
+
|
|
522
|
+
Returns:
|
|
523
|
+
Tuple[str, ...]: A tuple containing the parts of the path.
|
|
524
|
+
|
|
525
|
+
Examples:
|
|
526
|
+
>>> UrlPath("//server/root/path/file.txt").parts
|
|
527
|
+
('root', 'path', 'file.txt')
|
|
528
|
+
"""
|
|
529
|
+
return tuple(part for part in self._path.parts if not part.startswith("/"))
|
|
530
|
+
|
|
531
|
+
@property
|
|
532
|
+
def anchor(self) -> str:
|
|
533
|
+
"""
|
|
534
|
+
Concatenates the netloc and root of the path.
|
|
535
|
+
|
|
536
|
+
Returns:
|
|
537
|
+
str: The combined netloc and root of the path.
|
|
538
|
+
|
|
539
|
+
Examples:
|
|
540
|
+
>>> UrlPath("//server/root/path/file.txt").anchor
|
|
541
|
+
'//server/root'
|
|
542
|
+
"""
|
|
543
|
+
try:
|
|
544
|
+
root = self.parts[0]
|
|
545
|
+
except IndexError:
|
|
546
|
+
root = ""
|
|
547
|
+
|
|
548
|
+
return f"//{self.netloc}/{root}"
|
|
549
|
+
|
|
550
|
+
def with_anchor(self, anchor: str, root: bool = False, **kwargs) -> _UrlPath:
|
|
551
|
+
"""
|
|
552
|
+
Change the `UrlPath.anchor` of the URL.
|
|
553
|
+
|
|
554
|
+
If `root` is `True`, the root of the path will not be removed.
|
|
555
|
+
|
|
556
|
+
Args:
|
|
557
|
+
anchor (str): The new anchor to set for the URL.
|
|
558
|
+
root (bool): If `True`, the root of the path will not be removed.
|
|
559
|
+
Defaults to `False`.
|
|
560
|
+
**kwargs: Additional arguments to pass to the UrlPath class constructor.
|
|
561
|
+
|
|
562
|
+
Returns:
|
|
563
|
+
`UrlPath`: A new URL with the updated anchor.
|
|
564
|
+
|
|
565
|
+
Examples:
|
|
566
|
+
>>> url = UrlPath("//server/root/path/file.txt")
|
|
567
|
+
|
|
568
|
+
>>> url.with_anchor("https://www.server.com")
|
|
569
|
+
UrlPath('https://www.server.com/path/file.txt')
|
|
570
|
+
|
|
571
|
+
>>> url.with_anchor("https://www.server.com", root=True)
|
|
572
|
+
UrlPath('https://www.server.com/root/path/file.txt')
|
|
573
|
+
"""
|
|
574
|
+
anchor = self.__class__(anchor, **kwargs)
|
|
575
|
+
|
|
576
|
+
url = self.with_netloc(anchor.netloc)
|
|
577
|
+
|
|
578
|
+
if anchor.scheme != url.scheme:
|
|
579
|
+
url = url.with_scheme(anchor.scheme)
|
|
580
|
+
|
|
581
|
+
if root is False:
|
|
582
|
+
parts = url.parts[1:]
|
|
583
|
+
else:
|
|
584
|
+
parts = url.parts
|
|
585
|
+
|
|
586
|
+
# if anchor has a path, anchor and url path are concatenated
|
|
587
|
+
if any(anchor.parts):
|
|
588
|
+
return url.with_path("/".join(itertools.chain(anchor.parts, parts)))
|
|
589
|
+
|
|
590
|
+
# if root is False, the root of the path is removed
|
|
591
|
+
if root is False:
|
|
592
|
+
return url.with_path("/".join(parts))
|
|
593
|
+
|
|
594
|
+
return url
|
|
595
|
+
|
|
596
|
+
def exists(self, errors: bool = False, **kwargs) -> bool:
|
|
597
|
+
"""
|
|
598
|
+
Check if the URL exists by making an HTTP request.
|
|
599
|
+
|
|
600
|
+
Args:
|
|
601
|
+
errors (bool): If True, raises a FileNotFoundError when the URL does
|
|
602
|
+
not exist. Defaults to False.
|
|
603
|
+
**kwargs: Additional arguments to pass to `urllib.request.urlopen`.
|
|
604
|
+
|
|
605
|
+
Returns:
|
|
606
|
+
bool: True if the URL exists (HTTP status 200), False otherwise.
|
|
607
|
+
|
|
608
|
+
Raises:
|
|
609
|
+
FileNotFoundError: If `errors` is True and the URL does not exist.
|
|
610
|
+
"""
|
|
611
|
+
url = self.normalize()
|
|
612
|
+
|
|
613
|
+
try:
|
|
614
|
+
with urllib.request.urlopen(url, **kwargs) as response:
|
|
615
|
+
return response.status == 200
|
|
616
|
+
except Exception as e:
|
|
617
|
+
if errors is not False:
|
|
618
|
+
raise FileNotFoundError(url) from e
|
|
619
|
+
|
|
620
|
+
return False
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
def url_from(
|
|
624
|
+
uncpath: str,
|
|
625
|
+
hostname: str,
|
|
626
|
+
*,
|
|
627
|
+
strict: bool = False,
|
|
628
|
+
**kwargs,
|
|
629
|
+
) -> UrlPath:
|
|
630
|
+
"""
|
|
631
|
+
Convert a UNC path to an URL.
|
|
632
|
+
|
|
633
|
+
Args:
|
|
634
|
+
uncpath (str): The UNC path to convert.
|
|
635
|
+
hostname (str): The hostname to replace server and root from the UNC path.
|
|
636
|
+
strict (bool, optional): Check if the uncpath and URL exists.
|
|
637
|
+
Defaults to False.
|
|
638
|
+
**kwargs: Additional keyword arguments for `UrlPath.with_anchor()`.
|
|
639
|
+
|
|
640
|
+
Returns:
|
|
641
|
+
UrlPath: The converted URL.
|
|
642
|
+
|
|
643
|
+
Raises:
|
|
644
|
+
FileNotFoundError: If uncpath or URL does not exits and strict is True.
|
|
645
|
+
|
|
646
|
+
Examples:
|
|
647
|
+
>>> url_from(r"\\\\server\\root\\path\\readme.pdf", "https://www.server.com")
|
|
648
|
+
UrlPath('https://www.server.com/path/readme.pdf')
|
|
649
|
+
"""
|
|
650
|
+
filename = pathlib.Path(uncpath).resolve(strict=strict)
|
|
651
|
+
|
|
652
|
+
url: UrlPath = UrlPath(filename.as_posix()).with_anchor(hostname, **kwargs)
|
|
653
|
+
|
|
654
|
+
if strict:
|
|
655
|
+
url.exists(errors=True)
|
|
656
|
+
|
|
657
|
+
return url
|
|
658
|
+
|
|
659
|
+
|
|
660
|
+
def normalize(
|
|
661
|
+
url: str,
|
|
662
|
+
port: bool = False,
|
|
663
|
+
sort: bool = True,
|
|
664
|
+
) -> str:
|
|
665
|
+
"""
|
|
666
|
+
Normalize a URL by converting the scheme and host to lowercase, optionally removing
|
|
667
|
+
the port, and sorting the query parameters.
|
|
668
|
+
|
|
669
|
+
Args:
|
|
670
|
+
url (str): The URL to normalize.
|
|
671
|
+
port (bool, optional): If False, remove the port from the URL.
|
|
672
|
+
Defaults to False.
|
|
673
|
+
sort (bool, optional): If True, sort the query parameters. Defaults to True.
|
|
674
|
+
|
|
675
|
+
Returns:
|
|
676
|
+
str: The normalized URL.
|
|
677
|
+
|
|
678
|
+
Examples:
|
|
679
|
+
>>> normalize("https://www.ExamplE.com:443/Path?b=2&a=1")
|
|
680
|
+
'https://www.example.com/Path?a=1&b=2'
|
|
681
|
+
"""
|
|
682
|
+
|
|
683
|
+
url: UrlPath = UrlPath(url)
|
|
684
|
+
|
|
685
|
+
if port is False:
|
|
686
|
+
ports = {url.scheme.lower(): url.port}
|
|
687
|
+
else:
|
|
688
|
+
ports = {}
|
|
689
|
+
|
|
690
|
+
return url.normalize(sort=sort, ports=ports)
|
|
691
|
+
|
|
692
|
+
|
|
693
|
+
def normalize_url(*args, **kwargs) -> str:
|
|
694
|
+
"""
|
|
695
|
+
Deprecated function, use `pathlibutil.urlpath.normalize()` instead.
|
|
696
|
+
|
|
697
|
+
Will be removed in the future.
|
|
698
|
+
"""
|
|
699
|
+
|
|
700
|
+
import warnings
|
|
701
|
+
|
|
702
|
+
warnings.warn(
|
|
703
|
+
"normalize_url() is deprecated, use normalize() instead.\n"
|
|
704
|
+
+ "Will be removed in the future.",
|
|
705
|
+
DeprecationWarning,
|
|
706
|
+
stacklevel=2,
|
|
707
|
+
)
|
|
708
|
+
|
|
709
|
+
return normalize(*args, **kwargs)
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
__all__ = [
|
|
713
|
+
"UrlNetloc",
|
|
714
|
+
"UrlPath",
|
|
715
|
+
"normalize",
|
|
716
|
+
"url_from",
|
|
717
|
+
]
|
|
@@ -5,7 +5,7 @@ requires = [ "poetry-core" ]
|
|
|
5
5
|
|
|
6
6
|
[tool.poetry]
|
|
7
7
|
name = "pathlibutil"
|
|
8
|
-
version = "v0.3.
|
|
8
|
+
version = "v0.3.3"
|
|
9
9
|
description = "inherits from pathlib.Path with methods for hashing, copying, deleting and more"
|
|
10
10
|
authors = [ "Christoph Dörrer <d-chris@web.de>" ]
|
|
11
11
|
readme = "README.md"
|
|
@@ -16,11 +16,13 @@ classifiers = [
|
|
|
16
16
|
"Programming Language :: Python :: 3.10",
|
|
17
17
|
"Programming Language :: Python :: 3.11",
|
|
18
18
|
"Programming Language :: Python :: 3.12",
|
|
19
|
+
"Programming Language :: Python :: 3.13",
|
|
19
20
|
"License :: OSI Approved :: MIT License",
|
|
20
21
|
"Operating System :: OS Independent",
|
|
21
22
|
]
|
|
22
|
-
keywords = [ "pathlib", "hashlib", "shutil", "urllib.parse" ]
|
|
23
|
-
|
|
23
|
+
keywords = [ "pathlib", "hashlib", "shutil", "urllib.parse", "json", "urlpath" ]
|
|
24
|
+
|
|
25
|
+
[tool.poetry.urls]
|
|
24
26
|
repository = "https://github.com/d-chris/pathlibutil"
|
|
25
27
|
documentation = "https://d-chris.github.io/pathlibutil"
|
|
26
28
|
|
|
@@ -36,15 +38,15 @@ tox = "^4.11.4"
|
|
|
36
38
|
pyinstaller = { version = "^6.10.0", python = "<3.14" }
|
|
37
39
|
|
|
38
40
|
[tool.poetry.group.test.dependencies]
|
|
39
|
-
pytest = "^
|
|
41
|
+
pytest = "^8.3.3"
|
|
40
42
|
pytest-random-order = "^1.1.0"
|
|
41
43
|
pytest-cov = "^4.1.0"
|
|
42
44
|
pytest-mock = "^3.12.0"
|
|
43
45
|
exrex = { git = "https://github.com/asciimoo/exrex", rev = "1c22c70" }
|
|
44
46
|
|
|
45
47
|
[tool.poetry.group.docs.dependencies]
|
|
46
|
-
pdoc = "^14.3.0"
|
|
47
48
|
jinja2-pdoc = "^1.1.0"
|
|
49
|
+
pyperclip = "^1.9.0"
|
|
48
50
|
|
|
49
51
|
[[tool.poetry.source]]
|
|
50
52
|
name = "PyPI"
|
|
@@ -69,3 +71,8 @@ addopts = [
|
|
|
69
71
|
"--cov-report=term-missing:skip-covered",
|
|
70
72
|
"--cov-report=xml",
|
|
71
73
|
]
|
|
74
|
+
|
|
75
|
+
[tool.coverage.report]
|
|
76
|
+
exclude_lines = [
|
|
77
|
+
"^def normalize_url",
|
|
78
|
+
]
|
|
@@ -1,331 +0,0 @@
|
|
|
1
|
-
import pathlib
|
|
2
|
-
import re
|
|
3
|
-
import urllib.parse as up
|
|
4
|
-
from dataclasses import asdict, dataclass, field
|
|
5
|
-
from functools import wraps
|
|
6
|
-
from typing import Any, Dict, Optional, TypeVar, Union
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
@dataclass
|
|
10
|
-
class UrlNetloc:
|
|
11
|
-
"""
|
|
12
|
-
A dataclass to represent the netloc part of a URL.
|
|
13
|
-
|
|
14
|
-
>>> url = UrlNetloc.from_netloc("www.example.com:443")
|
|
15
|
-
>>> url.port = None
|
|
16
|
-
>>> str(url)
|
|
17
|
-
'www.example.com'
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
hostname: str
|
|
21
|
-
port: Optional[int] = field(default=None)
|
|
22
|
-
username: Optional[str] = field(default=None)
|
|
23
|
-
password: Optional[str] = field(default=None)
|
|
24
|
-
|
|
25
|
-
def __str__(self) -> str:
|
|
26
|
-
return self.netloc
|
|
27
|
-
|
|
28
|
-
@property
|
|
29
|
-
def netloc(self) -> str:
|
|
30
|
-
"""netloc string representation of the `dataclass`"""
|
|
31
|
-
|
|
32
|
-
netloc = ""
|
|
33
|
-
|
|
34
|
-
if self.username:
|
|
35
|
-
netloc += self.username
|
|
36
|
-
|
|
37
|
-
if self.password:
|
|
38
|
-
netloc += f":{self.password}"
|
|
39
|
-
|
|
40
|
-
netloc += "@"
|
|
41
|
-
|
|
42
|
-
if ":" in self.hostname:
|
|
43
|
-
netloc += f"[{self.hostname}]"
|
|
44
|
-
else:
|
|
45
|
-
netloc += self.hostname
|
|
46
|
-
|
|
47
|
-
if self.port:
|
|
48
|
-
netloc += f":{self.port:d}"
|
|
49
|
-
|
|
50
|
-
return netloc
|
|
51
|
-
|
|
52
|
-
@classmethod
|
|
53
|
-
def from_netloc(cls, netloc: str, normalize: bool = False) -> "UrlNetloc":
|
|
54
|
-
"""Parse a netloc string into a `UrlNetloc` object"""
|
|
55
|
-
|
|
56
|
-
if not netloc.startswith("//"):
|
|
57
|
-
netloc = f"//{netloc}"
|
|
58
|
-
|
|
59
|
-
url = up.urlparse(netloc)
|
|
60
|
-
|
|
61
|
-
hostname = url.hostname
|
|
62
|
-
|
|
63
|
-
if normalize is False:
|
|
64
|
-
try:
|
|
65
|
-
pattern = re.escape(url.hostname)
|
|
66
|
-
hostname = re.search(pattern, netloc, re.IGNORECASE).group()
|
|
67
|
-
except AttributeError:
|
|
68
|
-
pass
|
|
69
|
-
|
|
70
|
-
return cls(
|
|
71
|
-
hostname=hostname,
|
|
72
|
-
port=url.port,
|
|
73
|
-
username=url.username,
|
|
74
|
-
password=url.password,
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
def to_dict(self, prune: bool = False) -> Dict[str, Any]:
|
|
78
|
-
"""
|
|
79
|
-
Convert the `UrlNetloc` object to a dictionary
|
|
80
|
-
|
|
81
|
-
If `prune` is `True`, remove all key-value pairs from the dict where the value
|
|
82
|
-
is `None`.
|
|
83
|
-
"""
|
|
84
|
-
|
|
85
|
-
data = asdict(self)
|
|
86
|
-
|
|
87
|
-
if not prune:
|
|
88
|
-
return data
|
|
89
|
-
|
|
90
|
-
return {k: v for k, v in data.items() if v is not None}
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
_UrlPath = TypeVar("_UrlPath", bound="UrlPath")
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
def normalize_url(
|
|
97
|
-
url: str,
|
|
98
|
-
port: bool = False,
|
|
99
|
-
sort: bool = True,
|
|
100
|
-
) -> str:
|
|
101
|
-
"""
|
|
102
|
-
Function to normalize a URL by converting the scheme and host to lowercase, removing
|
|
103
|
-
port if present, and sorting the query parameters.
|
|
104
|
-
|
|
105
|
-
>>> normalize_url("https://www.ExamplE.com:443/Path?b=2&a=1")
|
|
106
|
-
'https://www.example.com/Path?a=1&b=2'
|
|
107
|
-
"""
|
|
108
|
-
|
|
109
|
-
url = UrlPath(url)
|
|
110
|
-
|
|
111
|
-
if port is False:
|
|
112
|
-
ports = {url.scheme.lower(): url.port}
|
|
113
|
-
else:
|
|
114
|
-
ports = {}
|
|
115
|
-
|
|
116
|
-
return url.normalize(sort=sort, ports=ports)
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
def urlpath(func):
|
|
120
|
-
"""
|
|
121
|
-
decorator to return a `UrlPath` object from a `urllib.parse.ParseResult` object.
|
|
122
|
-
"""
|
|
123
|
-
|
|
124
|
-
@wraps(func)
|
|
125
|
-
def wrapper(self, *args, **kwargs):
|
|
126
|
-
result = func(self, *args, **kwargs)
|
|
127
|
-
|
|
128
|
-
return UrlPath(result.geturl(), **self._kwargs)
|
|
129
|
-
|
|
130
|
-
return wrapper
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
class UrlPath(up.ParseResult):
|
|
134
|
-
"""
|
|
135
|
-
Class to manipulate URLs to change the scheme, netloc, path, query, and fragment.
|
|
136
|
-
|
|
137
|
-
Wrap the `pathlib.PurePosixPath` methods to return a new `UrlPath` object
|
|
138
|
-
|
|
139
|
-
>>> url = UrlPath("https://www.example.com/path/to/file").with_suffix(".txt")
|
|
140
|
-
>>> str(url)
|
|
141
|
-
'https://www.example.com/path/to/file.txt'
|
|
142
|
-
|
|
143
|
-
"""
|
|
144
|
-
|
|
145
|
-
_default_ports = {
|
|
146
|
-
"http": 80,
|
|
147
|
-
"https": 443,
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
def __new__(cls, url, **kwargs) -> _UrlPath:
|
|
151
|
-
parsed_url = up.urlparse(url, **kwargs)
|
|
152
|
-
return super().__new__(cls, *parsed_url)
|
|
153
|
-
|
|
154
|
-
def __init__(
|
|
155
|
-
self,
|
|
156
|
-
url: str,
|
|
157
|
-
scheme: str = "",
|
|
158
|
-
allow_fragments: bool = True,
|
|
159
|
-
) -> None:
|
|
160
|
-
"""
|
|
161
|
-
Initialize the `UrlPath` object with a URL string.
|
|
162
|
-
|
|
163
|
-
A `ValueError` is raised if the URL is not valid.
|
|
164
|
-
"""
|
|
165
|
-
self._url = url
|
|
166
|
-
self._kwargs = {
|
|
167
|
-
"scheme": scheme,
|
|
168
|
-
"allow_fragments": allow_fragments,
|
|
169
|
-
}
|
|
170
|
-
self._path = pathlib.PurePosixPath(up.unquote(self.path))
|
|
171
|
-
|
|
172
|
-
def __str__(self) -> str:
|
|
173
|
-
return self.normalize()
|
|
174
|
-
|
|
175
|
-
def geturl(self, normalize: bool = False) -> str:
|
|
176
|
-
"""
|
|
177
|
-
Return a re-combined version of the URL.
|
|
178
|
-
|
|
179
|
-
If `normalize` is `True` scheme and netloc is converted to lowercase,
|
|
180
|
-
default ports are removed and query parameters are sorted.
|
|
181
|
-
"""
|
|
182
|
-
if normalize:
|
|
183
|
-
return self.normalize()
|
|
184
|
-
|
|
185
|
-
return super().geturl()
|
|
186
|
-
|
|
187
|
-
def normalize(self, sort: bool = True, **kwargs) -> str:
|
|
188
|
-
"""
|
|
189
|
-
Normalize the URL by converting the scheme and host to lowercase, removing the
|
|
190
|
-
default port if present, and sorting the query parameters.
|
|
191
|
-
"""
|
|
192
|
-
|
|
193
|
-
ports = kwargs.get("ports", self._default_ports)
|
|
194
|
-
|
|
195
|
-
scheme = self.scheme.lower()
|
|
196
|
-
netloc = UrlNetloc.from_netloc(self.netloc, normalize=True)
|
|
197
|
-
|
|
198
|
-
try:
|
|
199
|
-
if ports[scheme] == netloc.port:
|
|
200
|
-
netloc.port = None
|
|
201
|
-
except KeyError:
|
|
202
|
-
pass
|
|
203
|
-
|
|
204
|
-
path = up.quote(up.unquote(self.path))
|
|
205
|
-
query = up.urlencode(sorted(up.parse_qsl(self.query))) if sort else self.query
|
|
206
|
-
|
|
207
|
-
return up.urlunparse(
|
|
208
|
-
(
|
|
209
|
-
scheme,
|
|
210
|
-
str(netloc),
|
|
211
|
-
path,
|
|
212
|
-
self.params,
|
|
213
|
-
query,
|
|
214
|
-
self.fragment,
|
|
215
|
-
)
|
|
216
|
-
)
|
|
217
|
-
|
|
218
|
-
def __getattr__(self, attr: str) -> Any:
|
|
219
|
-
|
|
220
|
-
try:
|
|
221
|
-
attr = getattr(self._path, attr)
|
|
222
|
-
except AttributeError as e:
|
|
223
|
-
raise AttributeError(
|
|
224
|
-
f"'{self.__class__.__name__}' object has no attribute '{attr}'"
|
|
225
|
-
) from e
|
|
226
|
-
|
|
227
|
-
if not callable(attr):
|
|
228
|
-
return attr
|
|
229
|
-
|
|
230
|
-
@wraps(attr)
|
|
231
|
-
def wrapper(*args, **kwargs) -> _UrlPath:
|
|
232
|
-
result = attr(*args, **kwargs)
|
|
233
|
-
|
|
234
|
-
return self.with_path(result)
|
|
235
|
-
|
|
236
|
-
return wrapper
|
|
237
|
-
|
|
238
|
-
@urlpath
|
|
239
|
-
def with_scheme(self, scheme: str) -> _UrlPath:
|
|
240
|
-
"""
|
|
241
|
-
Change the scheme of the URL.
|
|
242
|
-
"""
|
|
243
|
-
return self._replace(scheme=scheme)
|
|
244
|
-
|
|
245
|
-
@urlpath
|
|
246
|
-
def with_netloc(self, netloc: Union[str, UrlNetloc]) -> _UrlPath:
|
|
247
|
-
"""
|
|
248
|
-
Change the netloc of the URL.
|
|
249
|
-
"""
|
|
250
|
-
return self._replace(netloc=str(netloc))
|
|
251
|
-
|
|
252
|
-
@urlpath
|
|
253
|
-
def with_path(self, path: Union[str, pathlib.PurePosixPath]) -> _UrlPath:
|
|
254
|
-
"""
|
|
255
|
-
Change the path of the URL.
|
|
256
|
-
"""
|
|
257
|
-
|
|
258
|
-
try:
|
|
259
|
-
path = path.as_posix()
|
|
260
|
-
except AttributeError as e:
|
|
261
|
-
if not isinstance(path, str):
|
|
262
|
-
raise TypeError(
|
|
263
|
-
f"Expected str or PurePosixPath, got {type(path)}"
|
|
264
|
-
) from e
|
|
265
|
-
|
|
266
|
-
return self._replace(path=path)
|
|
267
|
-
|
|
268
|
-
@urlpath
|
|
269
|
-
def with_params(self, params: str) -> _UrlPath:
|
|
270
|
-
"""
|
|
271
|
-
Change the parameters of the URL.
|
|
272
|
-
"""
|
|
273
|
-
return self._replace(params=params)
|
|
274
|
-
|
|
275
|
-
@urlpath
|
|
276
|
-
def with_query(self, query: str) -> _UrlPath:
|
|
277
|
-
"""
|
|
278
|
-
Change the query of the URL.
|
|
279
|
-
"""
|
|
280
|
-
return self._replace(query=query)
|
|
281
|
-
|
|
282
|
-
@urlpath
|
|
283
|
-
def with_fragment(self, fragment: str) -> _UrlPath:
|
|
284
|
-
"""
|
|
285
|
-
Change the fragment of the URL.
|
|
286
|
-
"""
|
|
287
|
-
return self._replace(fragment=fragment)
|
|
288
|
-
|
|
289
|
-
def with_port(self, port: int) -> _UrlPath:
|
|
290
|
-
"""
|
|
291
|
-
change the port in the netloc of the URL.
|
|
292
|
-
|
|
293
|
-
If `port` is `None`, the port is removed.
|
|
294
|
-
"""
|
|
295
|
-
|
|
296
|
-
netloc = UrlNetloc.from_netloc(self.netloc)
|
|
297
|
-
netloc.port = port
|
|
298
|
-
|
|
299
|
-
return self.with_netloc(netloc)
|
|
300
|
-
|
|
301
|
-
def with_hostname(self, hostname: str) -> _UrlPath:
|
|
302
|
-
"""
|
|
303
|
-
change the hostname in the netloc of the URL
|
|
304
|
-
"""
|
|
305
|
-
|
|
306
|
-
netloc = UrlNetloc.from_netloc(self.netloc)
|
|
307
|
-
netloc.hostname = hostname
|
|
308
|
-
|
|
309
|
-
return self.with_netloc(netloc)
|
|
310
|
-
|
|
311
|
-
def with_credentials(self, username: str, password: str = None) -> _UrlPath:
|
|
312
|
-
"""
|
|
313
|
-
change the username and password in the netloc of the URL
|
|
314
|
-
|
|
315
|
-
to change only `username` the `password` must also be provided.
|
|
316
|
-
|
|
317
|
-
If `username` is `None`, the credentials are removed.
|
|
318
|
-
"""
|
|
319
|
-
|
|
320
|
-
netloc = UrlNetloc.from_netloc(self.netloc)
|
|
321
|
-
netloc.username = username
|
|
322
|
-
netloc.password = password
|
|
323
|
-
|
|
324
|
-
return self.with_netloc(netloc)
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
__all__ = [
|
|
328
|
-
"UrlNetloc",
|
|
329
|
-
"UrlPath",
|
|
330
|
-
"normalize_url",
|
|
331
|
-
]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|