pathlibutil 0.3.1__tar.gz → 0.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,27 @@
1
+ Metadata-Version: 2.1
2
+ Name: pathlibutil
3
+ Version: 0.3.3
4
+ Summary: inherits from pathlib.Path with methods for hashing, copying, deleting and more
5
+ License: MIT
6
+ Keywords: pathlib,hashlib,shutil,urllib.parse,json,urlpath
7
+ Author: Christoph Dörrer
8
+ Author-email: d-chris@web.de
9
+ Requires-Python: >=3.8.1,<4.0.0
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.8
19
+ Provides-Extra: 7z
20
+ Requires-Dist: py7zr (>=0.20.2,<0.21.0) ; extra == "7z"
21
+ Project-URL: documentation, https://d-chris.github.io/pathlibutil
22
+ Project-URL: repository, https://github.com/d-chris/pathlibutil
23
+ Description-Content-Type: text/markdown
24
+
1
25
  <!--
2
26
  filename: ./README.md
3
27
  -->
@@ -49,7 +73,8 @@ Parse and modify URLs with `pathlibutil.urlpath`.
49
73
 
50
74
  - `pathlibutil.urlpath.UrlPath()` modify URL and easy access the `path` of the url like a `pathlib.PurePosixPath` object.
51
75
  - `pathlibutil.urlpath.UrlNetloc()` to parse and modify the `netloc` part of a URL.
52
- - `pathlibutil.urlpath.normalize_url()` to normalize a URL string.
76
+ - `pathlibutil.urlpath.normalize()` to normalize a URL string.
77
+ - `pathlibutil.urlpath.url_from()` to create a URL from an UNC path object.
53
78
 
54
79
 
55
80
  ## Installation
@@ -296,4 +321,93 @@ os.getcwd is K:/pathlibutil
296
321
  Path.cwd(frozen=True) is K:/pathlibutil/examples
297
322
  Path.cwd(frozen=False) is K:/pathlibutil
298
323
  Path.cwd(frozen=_MEIPASS) is C:/Users/CHRIST~1.DOE/AppData/Local/Temp/_MEI106042
299
- ```
324
+ ```
325
+
326
+ ## Example 7
327
+
328
+ Console application to convert UNC paths to intranet URLs.
329
+
330
+ By default, it checks if the filename and URL are available and copies the
331
+ normalized URL to the clipboard.
332
+
333
+ > `pathlibutil.urlpath.url_from()`
334
+
335
+ ```python
336
+ import argparse
337
+ import sys
338
+
339
+ try:
340
+ import pyperclip
341
+
342
+ import pathlibutil.urlpath as up
343
+ except ModuleNotFoundError as e:
344
+ raise ModuleNotFoundError(f"pip install {e.name.split('.')[0]}") from e
345
+
346
+
347
+ def intranet_from(uncpath: str, check: bool = True) -> str:
348
+ """
349
+ Return the intranet URL for the given UNC path.
350
+ """
351
+
352
+ url = up.url_from(
353
+ uncpath,
354
+ hostname="http://intranet.example.de",
355
+ strict=check,
356
+ )
357
+
358
+ return url.normalize()
359
+
360
+
361
+ def cli():
362
+
363
+ parser = argparse.ArgumentParser(
364
+ description=intranet_from.__doc__,
365
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
366
+ )
367
+
368
+ parser.add_argument(
369
+ "filename",
370
+ nargs="*",
371
+ help="The UNC path to the file.",
372
+ )
373
+ parser.add_argument(
374
+ "-c",
375
+ "--no-check",
376
+ action="store_false",
377
+ dest="check",
378
+ help="Don't check if filename and url is available.",
379
+ )
380
+ parser.add_argument(
381
+ "-s",
382
+ "--silent",
383
+ action="store_true",
384
+ help="Do not print the url to stdout.",
385
+ )
386
+ parser.add_argument(
387
+ "-n",
388
+ "--no-clip",
389
+ action="store_false",
390
+ dest="clip",
391
+ help="Don't copy the url to the clipboard.",
392
+ )
393
+
394
+ args = parser.parse_args()
395
+ filename = " ".join(args.filename)
396
+
397
+ url = intranet_from(filename, check=args.check)
398
+
399
+ if not args.silent:
400
+ print(url)
401
+
402
+ if args.clip:
403
+ pyperclip.copy(url)
404
+
405
+
406
+ if __name__ == "__main__":
407
+ try:
408
+ cli()
409
+ except Exception as e:
410
+ print(e, file=sys.stderr)
411
+ sys.exit(1)
412
+ ```
413
+
@@ -1,27 +1,3 @@
1
- Metadata-Version: 2.1
2
- Name: pathlibutil
3
- Version: 0.3.1
4
- Summary: inherits from pathlib.Path with methods for hashing, copying, deleting and more
5
- Home-page: https://d-chris.github.io
6
- License: MIT
7
- Keywords: pathlib,hashlib,shutil,urllib.parse
8
- Author: Christoph Dörrer
9
- Author-email: d-chris@web.de
10
- Requires-Python: >=3.8.1,<4.0.0
11
- Classifier: License :: OSI Approved :: MIT License
12
- Classifier: Operating System :: OS Independent
13
- Classifier: Programming Language :: Python :: 3
14
- Classifier: Programming Language :: Python :: 3.9
15
- Classifier: Programming Language :: Python :: 3.10
16
- Classifier: Programming Language :: Python :: 3.11
17
- Classifier: Programming Language :: Python :: 3.12
18
- Classifier: Programming Language :: Python :: 3.8
19
- Provides-Extra: 7z
20
- Requires-Dist: py7zr (>=0.20.2,<0.21.0) ; extra == "7z"
21
- Project-URL: Documentation, https://d-chris.github.io/pathlibutil
22
- Project-URL: Repository, https://github.com/d-chris/pathlibutil
23
- Description-Content-Type: text/markdown
24
-
25
1
  <!--
26
2
  filename: ./README.md
27
3
  -->
@@ -73,7 +49,8 @@ Parse and modify URLs with `pathlibutil.urlpath`.
73
49
 
74
50
  - `pathlibutil.urlpath.UrlPath()` modify URL and easy access the `path` of the url like a `pathlib.PurePosixPath` object.
75
51
  - `pathlibutil.urlpath.UrlNetloc()` to parse and modify the `netloc` part of a URL.
76
- - `pathlibutil.urlpath.normalize_url()` to normalize a URL string.
52
+ - `pathlibutil.urlpath.normalize()` to normalize a URL string.
53
+ - `pathlibutil.urlpath.url_from()` to create a URL from an UNC path object.
77
54
 
78
55
 
79
56
  ## Installation
@@ -321,3 +298,91 @@ Path.cwd(frozen=True) is K:/pathlibutil/examples
321
298
  Path.cwd(frozen=False) is K:/pathlibutil
322
299
  Path.cwd(frozen=_MEIPASS) is C:/Users/CHRIST~1.DOE/AppData/Local/Temp/_MEI106042
323
300
  ```
301
+
302
+ ## Example 7
303
+
304
+ Console application to convert UNC paths to intranet URLs.
305
+
306
+ By default, it checks if the filename and URL are available and copies the
307
+ normalized URL to the clipboard.
308
+
309
+ > `pathlibutil.urlpath.url_from()`
310
+
311
+ ```python
312
+ import argparse
313
+ import sys
314
+
315
+ try:
316
+ import pyperclip
317
+
318
+ import pathlibutil.urlpath as up
319
+ except ModuleNotFoundError as e:
320
+ raise ModuleNotFoundError(f"pip install {e.name.split('.')[0]}") from e
321
+
322
+
323
+ def intranet_from(uncpath: str, check: bool = True) -> str:
324
+ """
325
+ Return the intranet URL for the given UNC path.
326
+ """
327
+
328
+ url = up.url_from(
329
+ uncpath,
330
+ hostname="http://intranet.example.de",
331
+ strict=check,
332
+ )
333
+
334
+ return url.normalize()
335
+
336
+
337
+ def cli():
338
+
339
+ parser = argparse.ArgumentParser(
340
+ description=intranet_from.__doc__,
341
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
342
+ )
343
+
344
+ parser.add_argument(
345
+ "filename",
346
+ nargs="*",
347
+ help="The UNC path to the file.",
348
+ )
349
+ parser.add_argument(
350
+ "-c",
351
+ "--no-check",
352
+ action="store_false",
353
+ dest="check",
354
+ help="Don't check if filename and url is available.",
355
+ )
356
+ parser.add_argument(
357
+ "-s",
358
+ "--silent",
359
+ action="store_true",
360
+ help="Do not print the url to stdout.",
361
+ )
362
+ parser.add_argument(
363
+ "-n",
364
+ "--no-clip",
365
+ action="store_false",
366
+ dest="clip",
367
+ help="Don't copy the url to the clipboard.",
368
+ )
369
+
370
+ args = parser.parse_args()
371
+ filename = " ".join(args.filename)
372
+
373
+ url = intranet_from(filename, check=args.check)
374
+
375
+ if not args.silent:
376
+ print(url)
377
+
378
+ if args.clip:
379
+ pyperclip.copy(url)
380
+
381
+
382
+ if __name__ == "__main__":
383
+ try:
384
+ cli()
385
+ except Exception as e:
386
+ print(e, file=sys.stderr)
387
+ sys.exit(1)
388
+ ```
@@ -0,0 +1,717 @@
1
+ import itertools
2
+ import pathlib
3
+ import re
4
+ import urllib.parse as up
5
+ import urllib.request
6
+ from dataclasses import asdict, dataclass, field
7
+ from functools import cached_property, wraps
8
+ from typing import Any, Dict, Optional, Tuple, TypeVar, Union
9
+
10
+
11
+ @dataclass
12
+ class UrlNetloc:
13
+ """
14
+ A dataclass to represent the netloc part of a URL.
15
+
16
+ Attributes:
17
+ hostname (str): The hostname of the URL.
18
+ port (Optional[int]): The port number of the URL. Defaults to None.
19
+ username (Optional[str]): The username for authentication. Defaults to None.
20
+ password (Optional[str]): The password for authentication. Defaults to None.
21
+
22
+ Examples:
23
+ >>> url = UrlNetloc.from_netloc("www.example.com:443")
24
+ >>> url.port = None
25
+ >>> str(url)
26
+ 'www.example.com'
27
+ """
28
+
29
+ hostname: str
30
+ """
31
+ The hostname of the URL.
32
+
33
+ Examples:
34
+ 'www.example.com'
35
+ """
36
+ port: Optional[int] = field(default=None)
37
+ """
38
+ The port number of the URL. Defaults to None.
39
+ """
40
+ username: Optional[str] = field(default=None)
41
+ """
42
+ The username for authentication. Defaults to None
43
+ """
44
+ password: Optional[str] = field(default=None)
45
+ """
46
+ The password for authentication. Defaults to None.
47
+ """
48
+
49
+ def __str__(self) -> str:
50
+ return self.netloc
51
+
52
+ @property
53
+ def netloc(self) -> str:
54
+ """
55
+ Return the netloc string representation of the `dataclass`.
56
+
57
+ Returns:
58
+ str: The netloc string representation.
59
+
60
+ Examples:
61
+ >>> UrlNetloc("www.example.de", 433, "user", "pass").netloc
62
+ 'user:pass@www.example.de:433'
63
+ """
64
+
65
+ netloc = ""
66
+
67
+ if self.username:
68
+ netloc += self.username
69
+
70
+ if self.password:
71
+ netloc += f":{self.password}"
72
+
73
+ netloc += "@"
74
+
75
+ if ":" in self.hostname:
76
+ netloc += f"[{self.hostname}]"
77
+ else:
78
+ netloc += self.hostname
79
+
80
+ if self.port:
81
+ netloc += f":{self.port:d}"
82
+
83
+ return netloc
84
+
85
+ @classmethod
86
+ def from_netloc(cls, netloc: str, normalize: bool = False) -> "UrlNetloc":
87
+ """
88
+ Parse a netloc string into a `UrlNetloc` object.
89
+
90
+ Args:
91
+ netloc (str): The netloc string to parse.
92
+
93
+ Returns:
94
+ `UrlNetloc`: An instance of `UrlNetloc` with the parsed components.
95
+
96
+ Examples:
97
+ >>> UrlNetloc.from_netloc("user:pass@example.de:433")
98
+ UrlNetloc(hostname='example.de', port=433, username='user', password='pass')
99
+ """
100
+
101
+ if not netloc.startswith("//"):
102
+ netloc = f"//{netloc}"
103
+
104
+ url = up.urlparse(netloc)
105
+
106
+ hostname = url.hostname
107
+
108
+ if normalize is False:
109
+ try:
110
+ pattern = re.escape(url.hostname)
111
+ hostname = re.search(pattern, netloc, re.IGNORECASE).group()
112
+ except AttributeError:
113
+ pass
114
+
115
+ return cls(
116
+ hostname=hostname,
117
+ port=url.port,
118
+ username=url.username,
119
+ password=url.password,
120
+ )
121
+
122
+ def to_dict(self, prune: bool = False) -> Dict[str, Any]:
123
+ """
124
+ Convert the `UrlNetloc` object to a dictionary.
125
+
126
+ Args:
127
+ prune (bool): If True, removes key-value pairs where the value is `None`.
128
+ Defaults to False.
129
+
130
+ Returns:
131
+ dict: A dictionary representation of the `UrlNetloc` object.
132
+
133
+ Examples:
134
+ >>> loc = UrlNetloc("example.de", 80, "user")
135
+
136
+ >>> loc.to_dict()
137
+ {'hostname': 'example.de', 'port': 80, 'username': 'user', 'password': None}
138
+
139
+ >>> loc.to_dict(prune=True)
140
+ {'hostname': 'www.example.de', 'port': 80, 'username': 'user'}
141
+ """
142
+
143
+ data = asdict(self)
144
+
145
+ if not prune:
146
+ return data
147
+
148
+ return {k: v for k, v in data.items() if v is not None}
149
+
150
+
151
+ _UrlPath = TypeVar("_UrlPath", bound="UrlPath")
152
+
153
+
154
+ def urlpath(func):
155
+ """
156
+ decorator to return a `UrlPath` object from a `urllib.parse.ParseResult` object.
157
+ """
158
+
159
+ @wraps(func)
160
+ def wrapper(self, *args, **kwargs) -> _UrlPath:
161
+ result = func(self, *args, **kwargs)
162
+
163
+ return self.__class__(result.geturl(), **self._kwargs)
164
+
165
+ return wrapper
166
+
167
+
168
+ class UrlPath(up.ParseResult):
169
+ """
170
+ Class to manipulate URLs to change the scheme, netloc, path, query, and fragment.
171
+
172
+ This class wraps `pathlib.PurePosixPath` methods to return a new `UrlPath` object.
173
+ Attributes and methods from `PurePosixPath`, such as `name` and `with_suffix`,
174
+ are available.
175
+
176
+ Examples:
177
+ >>> url = UrlPath("https://www.example.com/path/file.txt")
178
+
179
+ >>> url.name
180
+ 'file.txt'
181
+
182
+ >>> url.with_suffix(".html")
183
+ UrlPath('https://www.example.com/path/file.html')
184
+ """
185
+
186
+ _default_ports = {
187
+ "http": 80,
188
+ "https": 443,
189
+ }
190
+
191
+ def __new__(cls, url, **kwargs) -> _UrlPath:
192
+ url = (
193
+ f"//{url[1:]}" if url.startswith("/") and not url.startswith("//") else url
194
+ )
195
+
196
+ parsed_url = up.urlparse(url, **kwargs)
197
+ return super().__new__(cls, *parsed_url)
198
+
199
+ def __init__(
200
+ self,
201
+ url: str,
202
+ scheme: str = "",
203
+ allow_fragments: bool = True,
204
+ ) -> None:
205
+ """
206
+ Initialize the `UrlPath` object with a URL string.
207
+
208
+ Args:
209
+ url (str): The URL string to initialize the `UrlPath` object.
210
+ scheme (str, optional): The scheme to use if not present in the URL.
211
+ Defaults to an empty string.
212
+ allow_fragments (bool, optional): Whether to allow fragments in the URL.
213
+ Defaults to True.
214
+
215
+ Raises:
216
+ ValueError: If the URL is not valid.
217
+
218
+ Examples:
219
+ >>> UrlPath("http://example.com/path/file.txt")
220
+ UrlPath('http://example.com/path/file.txt')
221
+ """
222
+ self._url = url
223
+ self._kwargs = {
224
+ "scheme": scheme,
225
+ "allow_fragments": allow_fragments,
226
+ }
227
+ self._path = pathlib.PurePosixPath(up.unquote(self.path))
228
+
229
+ def __str__(self) -> str:
230
+ return self.normalize()
231
+
232
+ def __repr__(self) -> str:
233
+ return f"{self.__class__.__name__}({self.geturl()!r})"
234
+
235
+ def geturl(self, normalize: bool = False) -> str:
236
+ """
237
+ Return a re-combined version of the URL.
238
+
239
+ If `normalize` is `True`, the scheme and netloc are converted to lowercase,
240
+ default ports are removed, and query parameters are sorted.
241
+
242
+ Args:
243
+ normalize (bool): If True, normalizes the URL. Defaults to False.
244
+
245
+ Returns:
246
+ str: The re-combined URL.
247
+
248
+ Examples:
249
+ >>> url = UrlPath("HTTP://Example.COM:80/path/file name.txt?b=2&a=1")
250
+
251
+ >>> url.geturl(normalize=True)
252
+ 'http://example.com/path/file%20name.txt?a=1&b=2'
253
+
254
+ >>> url.geturl()
255
+ 'http://Example.COM:80/path/file name.txt?b=2&a=1'
256
+ """
257
+ if normalize:
258
+ return self.normalize()
259
+
260
+ return super().geturl()
261
+
262
+ def normalize(self, sort: bool = True, **kwargs) -> str:
263
+ """
264
+ Normalize the URL by converting the scheme and host to lowercase, removing the
265
+ default port if present, and sorting the query parameters.
266
+
267
+ Args:
268
+ sort (bool): If True, sorts the query parameters. Defaults to True.
269
+ **kwargs: Additional arguments, such as custom port mappings.
270
+
271
+ Returns:
272
+ str: The normalized URL.
273
+
274
+ Examples:
275
+ >>> url = UrlPath("HTTP://Example.COM:80/path/file name.txt?b=2&a=1")
276
+ >>> url.normalize()
277
+ 'http://example.com/path/file%20name.txt?a=1&b=2'
278
+ """
279
+
280
+ ports = kwargs.get("ports", self._default_ports)
281
+
282
+ scheme = self.scheme.lower()
283
+ netloc = UrlNetloc.from_netloc(self.netloc, normalize=True)
284
+
285
+ try:
286
+ if ports[scheme] == netloc.port:
287
+ netloc.port = None
288
+ except KeyError:
289
+ pass
290
+
291
+ path = up.quote(up.unquote(self.path))
292
+ query = up.urlencode(sorted(up.parse_qsl(self.query))) if sort else self.query
293
+
294
+ return up.urlunparse(
295
+ (
296
+ scheme,
297
+ str(netloc),
298
+ path,
299
+ self.params,
300
+ query,
301
+ self.fragment,
302
+ )
303
+ )
304
+
305
+ def __getattr__(self, attr: str) -> Any:
306
+
307
+ try:
308
+ attr = getattr(self._path, attr)
309
+ except AttributeError as e:
310
+ raise AttributeError(
311
+ f"'{self.__class__.__name__}' object has no attribute '{attr}'"
312
+ ) from e
313
+
314
+ if not callable(attr):
315
+ return attr
316
+
317
+ @wraps(attr)
318
+ def wrapper(*args, **kwargs) -> _UrlPath:
319
+ result = attr(*args, **kwargs)
320
+
321
+ return self.with_path(result)
322
+
323
+ return wrapper
324
+
325
+ @urlpath
326
+ def with_scheme(self, scheme: str) -> _UrlPath:
327
+ """
328
+ Add or Change the `UrlPath.scheme` of the URL.
329
+
330
+ Args:
331
+ scheme (str): The new scheme to set in the URL.
332
+
333
+ Returns:
334
+ `UrlPath`: A new URL with the updated scheme.
335
+
336
+ Examples:
337
+ >>> url = UrlPath("http://example.com/path/file.txt")
338
+ >>> url.with_port(990).with_scheme("ftp")
339
+ UrlPath('ftp://example.com:990/path/file.txt')
340
+ """
341
+ return self._replace(scheme=scheme)
342
+
343
+ @urlpath
344
+ def with_netloc(self, netloc: Union[str, UrlNetloc]) -> _UrlPath:
345
+ """
346
+ Add or Change the `UrlPath.netloc` of the URL.
347
+
348
+ Args:
349
+ netloc (Union[str, UrlNetloc]): The new netloc to set in the URL. It can be
350
+ a string or an instance of `UrlNetloc`.
351
+
352
+ Returns:
353
+ `UrlPath`: A new URL with the updated netloc.
354
+
355
+ Examples:
356
+ >>> url = UrlPath("http://www.oldhost.com/path/file.txt")
357
+ >>> url.with_netloc("example.com")
358
+ UrlPath('http://example.com/path/file.txt')
359
+ """
360
+ return self._replace(netloc=str(netloc))
361
+
362
+ @urlpath
363
+ def with_path(self, path: Union[str, pathlib.PurePosixPath]) -> _UrlPath:
364
+ """
365
+ Add or Change the `UrlPath.path` of the URL.
366
+
367
+ Args:
368
+ path (Union[str, pathlib.PurePosixPath]): The new path to set in the URL.
369
+
370
+ Returns:
371
+ `UrlPath`: A new URL with the updated path.
372
+
373
+ Raises:
374
+ TypeError: If the provided path is of the wrong type
375
+
376
+ Examples:
377
+ >>> url = UrlPath("http://example.com/oldpath")
378
+ >>> url.with_path("/path/file.txt")
379
+ UrlPath('http://example.com/path/file.txt')
380
+ """
381
+
382
+ try:
383
+ path = path.as_posix()
384
+ except AttributeError as e:
385
+ if not isinstance(path, str):
386
+ raise TypeError(
387
+ f"Expected str or PurePosixPath, got {type(path)}"
388
+ ) from e
389
+
390
+ return self._replace(path=path)
391
+
392
+ @urlpath
393
+ def with_params(self, params: str) -> _UrlPath:
394
+ """
395
+ Change the `UrlPath.params` of the URL.
396
+
397
+ Args:
398
+ params (str): The new parameters to set in the URL.
399
+
400
+ Returns:
401
+ `UrlPath`: A new URL with the updated parameters.
402
+
403
+ Examples:
404
+ >>> url = UrlPath("http://example.com/path")
405
+ >>> url.with_params("param1=value1;param2=value2")
406
+ UrlPath('http://example.com/path;param1=value1;param2=value2')
407
+ """
408
+ return self._replace(params=params)
409
+
410
+ @urlpath
411
+ def with_query(self, query: str) -> _UrlPath:
412
+ """
413
+ Add or Change the `UrlPath.query` of the URL.
414
+
415
+ Args:
416
+ query (str): The new query string to set in the URL.
417
+
418
+ Returns:
419
+ `UrlPath`: A new URL with the updated query string.
420
+
421
+ Examples:
422
+ >>> url = UrlPath("http://example.com/path")
423
+ >>> url.with_query("key=value")
424
+ UrlPath('http://example.com/path?key=value')
425
+ """
426
+ return self._replace(query=query)
427
+
428
+ @urlpath
429
+ def with_fragment(self, fragment: str) -> _UrlPath:
430
+ """
431
+ Add or Change the `UrlPath.fragment` of the URL.
432
+
433
+ Args:
434
+ fragment (str): The new fragment to set in the URL.
435
+
436
+ Returns:
437
+ `UrlPath`: A new URL with the updated fragment.
438
+
439
+ Examples:
440
+ >>> url = UrlPath("http://example.com/path")
441
+ >>> url.with_fragment("section1")
442
+ UrlPath('http://example.com/path#section1')
443
+ """
444
+ return self._replace(fragment=fragment)
445
+
446
+ def with_port(self, port: int) -> _UrlPath:
447
+ """
448
+ Add or Change the `UrlPath.port` in the netloc of the URL.
449
+
450
+ If `port` is `None`, the port is removed.
451
+
452
+ Args:
453
+ port (int): The new port to set in the URL.
454
+
455
+ Returns:
456
+ `UrlPath`: A new URL with the updated port.
457
+
458
+ Examples:
459
+ >>> url = UrlPath("http://example.de/path/file.txt")
460
+ >>> url.with_port(8080)
461
+ UrlPath('http://example.de:8080/path/file.txt')
462
+ """
463
+
464
+ netloc = UrlNetloc.from_netloc(self.netloc)
465
+ netloc.port = port
466
+
467
+ return self.with_netloc(netloc)
468
+
469
+ def with_hostname(self, hostname: str) -> _UrlPath:
470
+ """
471
+ Change the `UrlPath.hostname` in the netloc of the URL.
472
+
473
+ Args:
474
+ hostname (str): The new hostname to set in the URL.
475
+
476
+ Returns:
477
+ `UrlPath`: A new URL with the updated hostname.
478
+
479
+ Examples:
480
+ >>> url = UrlPath("http://example.de/path/file.txt")
481
+ >>> url.with_hostname("www.server.com")
482
+ UrlPath('http://www.server.com/path/file.txt')
483
+ """
484
+
485
+ netloc = UrlNetloc.from_netloc(self.netloc)
486
+ netloc.hostname = hostname
487
+
488
+ return self.with_netloc(netloc)
489
+
490
+ def with_credentials(self, username: str, password: str = None) -> _UrlPath:
491
+ """
492
+ Add or change the username and password in the netloc of the URL.
493
+
494
+ To change only `username`, the `password` must also be provided.
495
+ If `username` is `None`, the credentials are removed.
496
+
497
+ Args:
498
+ username (str): The new username to set in the URL.
499
+ password (str, optional): The new password to set in the URL.
500
+ Defaults to None.
501
+
502
+ Returns:
503
+ `UrlPath`: A new URL with the updated credentials.
504
+
505
+ Examples:
506
+ >>> url = UrlPath("ftp://example.com/path")
507
+ >>> url.with_credentials("user", "pass")
508
+ UrlPath('ftp://user:pass@example.com/path')
509
+ """
510
+
511
+ netloc = UrlNetloc.from_netloc(self.netloc)
512
+ netloc.username = username
513
+ netloc.password = password
514
+
515
+ return self.with_netloc(netloc)
516
+
517
+ @cached_property
518
+ def parts(self) -> Tuple[str, ...]:
519
+ """
520
+ Returns the parts of the path without any leading '/'.
521
+
522
+ Returns:
523
+ Tuple[str, ...]: A tuple containing the parts of the path.
524
+
525
+ Examples:
526
+ >>> UrlPath("//server/root/path/file.txt").parts
527
+ ('root', 'path', 'file.txt')
528
+ """
529
+ return tuple(part for part in self._path.parts if not part.startswith("/"))
530
+
531
+ @property
532
+ def anchor(self) -> str:
533
+ """
534
+ Concatenates the netloc and root of the path.
535
+
536
+ Returns:
537
+ str: The combined netloc and root of the path.
538
+
539
+ Examples:
540
+ >>> UrlPath("//server/root/path/file.txt").anchor
541
+ '//server/root'
542
+ """
543
+ try:
544
+ root = self.parts[0]
545
+ except IndexError:
546
+ root = ""
547
+
548
+ return f"//{self.netloc}/{root}"
549
+
550
+ def with_anchor(self, anchor: str, root: bool = False, **kwargs) -> _UrlPath:
551
+ """
552
+ Change the `UrlPath.anchor` of the URL.
553
+
554
+ If `root` is `True`, the root of the path will not be removed.
555
+
556
+ Args:
557
+ anchor (str): The new anchor to set for the URL.
558
+ root (bool): If `True`, the root of the path will not be removed.
559
+ Defaults to `False`.
560
+ **kwargs: Additional arguments to pass to the UrlPath class constructor.
561
+
562
+ Returns:
563
+ `UrlPath`: A new URL with the updated anchor.
564
+
565
+ Examples:
566
+ >>> url = UrlPath("//server/root/path/file.txt")
567
+
568
+ >>> url.with_anchor("https://www.server.com")
569
+ UrlPath('https://www.server.com/path/file.txt')
570
+
571
+ >>> url.with_anchor("https://www.server.com", root=True)
572
+ UrlPath('https://www.server.com/root/path/file.txt')
573
+ """
574
+ anchor = self.__class__(anchor, **kwargs)
575
+
576
+ url = self.with_netloc(anchor.netloc)
577
+
578
+ if anchor.scheme != url.scheme:
579
+ url = url.with_scheme(anchor.scheme)
580
+
581
+ if root is False:
582
+ parts = url.parts[1:]
583
+ else:
584
+ parts = url.parts
585
+
586
+ # if anchor has a path, anchor and url path are concatenated
587
+ if any(anchor.parts):
588
+ return url.with_path("/".join(itertools.chain(anchor.parts, parts)))
589
+
590
+ # if root is False, the root of the path is removed
591
+ if root is False:
592
+ return url.with_path("/".join(parts))
593
+
594
+ return url
595
+
596
+ def exists(self, errors: bool = False, **kwargs) -> bool:
597
+ """
598
+ Check if the URL exists by making an HTTP request.
599
+
600
+ Args:
601
+ errors (bool): If True, raises a FileNotFoundError when the URL does
602
+ not exist. Defaults to False.
603
+ **kwargs: Additional arguments to pass to `urllib.request.urlopen`.
604
+
605
+ Returns:
606
+ bool: True if the URL exists (HTTP status 200), False otherwise.
607
+
608
+ Raises:
609
+ FileNotFoundError: If `errors` is True and the URL does not exist.
610
+ """
611
+ url = self.normalize()
612
+
613
+ try:
614
+ with urllib.request.urlopen(url, **kwargs) as response:
615
+ return response.status == 200
616
+ except Exception as e:
617
+ if errors is not False:
618
+ raise FileNotFoundError(url) from e
619
+
620
+ return False
621
+
622
+
623
+ def url_from(
624
+ uncpath: str,
625
+ hostname: str,
626
+ *,
627
+ strict: bool = False,
628
+ **kwargs,
629
+ ) -> UrlPath:
630
+ """
631
+ Convert a UNC path to an URL.
632
+
633
+ Args:
634
+ uncpath (str): The UNC path to convert.
635
+ hostname (str): The hostname to replace server and root from the UNC path.
636
+ strict (bool, optional): Check if the uncpath and URL exists.
637
+ Defaults to False.
638
+ **kwargs: Additional keyword arguments for `UrlPath.with_anchor()`.
639
+
640
+ Returns:
641
+ UrlPath: The converted URL.
642
+
643
+ Raises:
644
+ FileNotFoundError: If uncpath or URL does not exits and strict is True.
645
+
646
+ Examples:
647
+ >>> url_from(r"\\\\server\\root\\path\\readme.pdf", "https://www.server.com")
648
+ UrlPath('https://www.server.com/path/readme.pdf')
649
+ """
650
+ filename = pathlib.Path(uncpath).resolve(strict=strict)
651
+
652
+ url: UrlPath = UrlPath(filename.as_posix()).with_anchor(hostname, **kwargs)
653
+
654
+ if strict:
655
+ url.exists(errors=True)
656
+
657
+ return url
658
+
659
+
660
+ def normalize(
661
+ url: str,
662
+ port: bool = False,
663
+ sort: bool = True,
664
+ ) -> str:
665
+ """
666
+ Normalize a URL by converting the scheme and host to lowercase, optionally removing
667
+ the port, and sorting the query parameters.
668
+
669
+ Args:
670
+ url (str): The URL to normalize.
671
+ port (bool, optional): If False, remove the port from the URL.
672
+ Defaults to False.
673
+ sort (bool, optional): If True, sort the query parameters. Defaults to True.
674
+
675
+ Returns:
676
+ str: The normalized URL.
677
+
678
+ Examples:
679
+ >>> normalize("https://www.ExamplE.com:443/Path?b=2&a=1")
680
+ 'https://www.example.com/Path?a=1&b=2'
681
+ """
682
+
683
+ url: UrlPath = UrlPath(url)
684
+
685
+ if port is False:
686
+ ports = {url.scheme.lower(): url.port}
687
+ else:
688
+ ports = {}
689
+
690
+ return url.normalize(sort=sort, ports=ports)
691
+
692
+
693
+ def normalize_url(*args, **kwargs) -> str:
694
+ """
695
+ Deprecated function, use `pathlibutil.urlpath.normalize()` instead.
696
+
697
+ Will be removed in the future.
698
+ """
699
+
700
+ import warnings
701
+
702
+ warnings.warn(
703
+ "normalize_url() is deprecated, use normalize() instead.\n"
704
+ + "Will be removed in the future.",
705
+ DeprecationWarning,
706
+ stacklevel=2,
707
+ )
708
+
709
+ return normalize(*args, **kwargs)
710
+
711
+
712
+ __all__ = [
713
+ "UrlNetloc",
714
+ "UrlPath",
715
+ "normalize",
716
+ "url_from",
717
+ ]
@@ -5,7 +5,7 @@ requires = [ "poetry-core" ]
5
5
 
6
6
  [tool.poetry]
7
7
  name = "pathlibutil"
8
- version = "v0.3.1"
8
+ version = "v0.3.3"
9
9
  description = "inherits from pathlib.Path with methods for hashing, copying, deleting and more"
10
10
  authors = [ "Christoph Dörrer <d-chris@web.de>" ]
11
11
  readme = "README.md"
@@ -16,11 +16,13 @@ classifiers = [
16
16
  "Programming Language :: Python :: 3.10",
17
17
  "Programming Language :: Python :: 3.11",
18
18
  "Programming Language :: Python :: 3.12",
19
+ "Programming Language :: Python :: 3.13",
19
20
  "License :: OSI Approved :: MIT License",
20
21
  "Operating System :: OS Independent",
21
22
  ]
22
- keywords = [ "pathlib", "hashlib", "shutil", "urllib.parse" ]
23
- homepage = "https://d-chris.github.io"
23
+ keywords = [ "pathlib", "hashlib", "shutil", "urllib.parse", "json", "urlpath" ]
24
+
25
+ [tool.poetry.urls]
24
26
  repository = "https://github.com/d-chris/pathlibutil"
25
27
  documentation = "https://d-chris.github.io/pathlibutil"
26
28
 
@@ -36,15 +38,15 @@ tox = "^4.11.4"
36
38
  pyinstaller = { version = "^6.10.0", python = "<3.14" }
37
39
 
38
40
  [tool.poetry.group.test.dependencies]
39
- pytest = "^7.4.3"
41
+ pytest = "^8.3.3"
40
42
  pytest-random-order = "^1.1.0"
41
43
  pytest-cov = "^4.1.0"
42
44
  pytest-mock = "^3.12.0"
43
45
  exrex = { git = "https://github.com/asciimoo/exrex", rev = "1c22c70" }
44
46
 
45
47
  [tool.poetry.group.docs.dependencies]
46
- pdoc = "^14.3.0"
47
48
  jinja2-pdoc = "^1.1.0"
49
+ pyperclip = "^1.9.0"
48
50
 
49
51
  [[tool.poetry.source]]
50
52
  name = "PyPI"
@@ -69,3 +71,8 @@ addopts = [
69
71
  "--cov-report=term-missing:skip-covered",
70
72
  "--cov-report=xml",
71
73
  ]
74
+
75
+ [tool.coverage.report]
76
+ exclude_lines = [
77
+ "^def normalize_url",
78
+ ]
@@ -1,331 +0,0 @@
1
- import pathlib
2
- import re
3
- import urllib.parse as up
4
- from dataclasses import asdict, dataclass, field
5
- from functools import wraps
6
- from typing import Any, Dict, Optional, TypeVar, Union
7
-
8
-
9
- @dataclass
10
- class UrlNetloc:
11
- """
12
- A dataclass to represent the netloc part of a URL.
13
-
14
- >>> url = UrlNetloc.from_netloc("www.example.com:443")
15
- >>> url.port = None
16
- >>> str(url)
17
- 'www.example.com'
18
- """
19
-
20
- hostname: str
21
- port: Optional[int] = field(default=None)
22
- username: Optional[str] = field(default=None)
23
- password: Optional[str] = field(default=None)
24
-
25
- def __str__(self) -> str:
26
- return self.netloc
27
-
28
- @property
29
- def netloc(self) -> str:
30
- """netloc string representation of the `dataclass`"""
31
-
32
- netloc = ""
33
-
34
- if self.username:
35
- netloc += self.username
36
-
37
- if self.password:
38
- netloc += f":{self.password}"
39
-
40
- netloc += "@"
41
-
42
- if ":" in self.hostname:
43
- netloc += f"[{self.hostname}]"
44
- else:
45
- netloc += self.hostname
46
-
47
- if self.port:
48
- netloc += f":{self.port:d}"
49
-
50
- return netloc
51
-
52
- @classmethod
53
- def from_netloc(cls, netloc: str, normalize: bool = False) -> "UrlNetloc":
54
- """Parse a netloc string into a `UrlNetloc` object"""
55
-
56
- if not netloc.startswith("//"):
57
- netloc = f"//{netloc}"
58
-
59
- url = up.urlparse(netloc)
60
-
61
- hostname = url.hostname
62
-
63
- if normalize is False:
64
- try:
65
- pattern = re.escape(url.hostname)
66
- hostname = re.search(pattern, netloc, re.IGNORECASE).group()
67
- except AttributeError:
68
- pass
69
-
70
- return cls(
71
- hostname=hostname,
72
- port=url.port,
73
- username=url.username,
74
- password=url.password,
75
- )
76
-
77
- def to_dict(self, prune: bool = False) -> Dict[str, Any]:
78
- """
79
- Convert the `UrlNetloc` object to a dictionary
80
-
81
- If `prune` is `True`, remove all key-value pairs from the dict where the value
82
- is `None`.
83
- """
84
-
85
- data = asdict(self)
86
-
87
- if not prune:
88
- return data
89
-
90
- return {k: v for k, v in data.items() if v is not None}
91
-
92
-
93
- _UrlPath = TypeVar("_UrlPath", bound="UrlPath")
94
-
95
-
96
- def normalize_url(
97
- url: str,
98
- port: bool = False,
99
- sort: bool = True,
100
- ) -> str:
101
- """
102
- Function to normalize a URL by converting the scheme and host to lowercase, removing
103
- port if present, and sorting the query parameters.
104
-
105
- >>> normalize_url("https://www.ExamplE.com:443/Path?b=2&a=1")
106
- 'https://www.example.com/Path?a=1&b=2'
107
- """
108
-
109
- url = UrlPath(url)
110
-
111
- if port is False:
112
- ports = {url.scheme.lower(): url.port}
113
- else:
114
- ports = {}
115
-
116
- return url.normalize(sort=sort, ports=ports)
117
-
118
-
119
- def urlpath(func):
120
- """
121
- decorator to return a `UrlPath` object from a `urllib.parse.ParseResult` object.
122
- """
123
-
124
- @wraps(func)
125
- def wrapper(self, *args, **kwargs):
126
- result = func(self, *args, **kwargs)
127
-
128
- return UrlPath(result.geturl(), **self._kwargs)
129
-
130
- return wrapper
131
-
132
-
133
- class UrlPath(up.ParseResult):
134
- """
135
- Class to manipulate URLs to change the scheme, netloc, path, query, and fragment.
136
-
137
- Wrap the `pathlib.PurePosixPath` methods to return a new `UrlPath` object
138
-
139
- >>> url = UrlPath("https://www.example.com/path/to/file").with_suffix(".txt")
140
- >>> str(url)
141
- 'https://www.example.com/path/to/file.txt'
142
-
143
- """
144
-
145
- _default_ports = {
146
- "http": 80,
147
- "https": 443,
148
- }
149
-
150
- def __new__(cls, url, **kwargs) -> _UrlPath:
151
- parsed_url = up.urlparse(url, **kwargs)
152
- return super().__new__(cls, *parsed_url)
153
-
154
- def __init__(
155
- self,
156
- url: str,
157
- scheme: str = "",
158
- allow_fragments: bool = True,
159
- ) -> None:
160
- """
161
- Initialize the `UrlPath` object with a URL string.
162
-
163
- A `ValueError` is raised if the URL is not valid.
164
- """
165
- self._url = url
166
- self._kwargs = {
167
- "scheme": scheme,
168
- "allow_fragments": allow_fragments,
169
- }
170
- self._path = pathlib.PurePosixPath(up.unquote(self.path))
171
-
172
- def __str__(self) -> str:
173
- return self.normalize()
174
-
175
- def geturl(self, normalize: bool = False) -> str:
176
- """
177
- Return a re-combined version of the URL.
178
-
179
- If `normalize` is `True` scheme and netloc is converted to lowercase,
180
- default ports are removed and query parameters are sorted.
181
- """
182
- if normalize:
183
- return self.normalize()
184
-
185
- return super().geturl()
186
-
187
- def normalize(self, sort: bool = True, **kwargs) -> str:
188
- """
189
- Normalize the URL by converting the scheme and host to lowercase, removing the
190
- default port if present, and sorting the query parameters.
191
- """
192
-
193
- ports = kwargs.get("ports", self._default_ports)
194
-
195
- scheme = self.scheme.lower()
196
- netloc = UrlNetloc.from_netloc(self.netloc, normalize=True)
197
-
198
- try:
199
- if ports[scheme] == netloc.port:
200
- netloc.port = None
201
- except KeyError:
202
- pass
203
-
204
- path = up.quote(up.unquote(self.path))
205
- query = up.urlencode(sorted(up.parse_qsl(self.query))) if sort else self.query
206
-
207
- return up.urlunparse(
208
- (
209
- scheme,
210
- str(netloc),
211
- path,
212
- self.params,
213
- query,
214
- self.fragment,
215
- )
216
- )
217
-
218
- def __getattr__(self, attr: str) -> Any:
219
-
220
- try:
221
- attr = getattr(self._path, attr)
222
- except AttributeError as e:
223
- raise AttributeError(
224
- f"'{self.__class__.__name__}' object has no attribute '{attr}'"
225
- ) from e
226
-
227
- if not callable(attr):
228
- return attr
229
-
230
- @wraps(attr)
231
- def wrapper(*args, **kwargs) -> _UrlPath:
232
- result = attr(*args, **kwargs)
233
-
234
- return self.with_path(result)
235
-
236
- return wrapper
237
-
238
- @urlpath
239
- def with_scheme(self, scheme: str) -> _UrlPath:
240
- """
241
- Change the scheme of the URL.
242
- """
243
- return self._replace(scheme=scheme)
244
-
245
- @urlpath
246
- def with_netloc(self, netloc: Union[str, UrlNetloc]) -> _UrlPath:
247
- """
248
- Change the netloc of the URL.
249
- """
250
- return self._replace(netloc=str(netloc))
251
-
252
- @urlpath
253
- def with_path(self, path: Union[str, pathlib.PurePosixPath]) -> _UrlPath:
254
- """
255
- Change the path of the URL.
256
- """
257
-
258
- try:
259
- path = path.as_posix()
260
- except AttributeError as e:
261
- if not isinstance(path, str):
262
- raise TypeError(
263
- f"Expected str or PurePosixPath, got {type(path)}"
264
- ) from e
265
-
266
- return self._replace(path=path)
267
-
268
- @urlpath
269
- def with_params(self, params: str) -> _UrlPath:
270
- """
271
- Change the parameters of the URL.
272
- """
273
- return self._replace(params=params)
274
-
275
- @urlpath
276
- def with_query(self, query: str) -> _UrlPath:
277
- """
278
- Change the query of the URL.
279
- """
280
- return self._replace(query=query)
281
-
282
- @urlpath
283
- def with_fragment(self, fragment: str) -> _UrlPath:
284
- """
285
- Change the fragment of the URL.
286
- """
287
- return self._replace(fragment=fragment)
288
-
289
- def with_port(self, port: int) -> _UrlPath:
290
- """
291
- change the port in the netloc of the URL.
292
-
293
- If `port` is `None`, the port is removed.
294
- """
295
-
296
- netloc = UrlNetloc.from_netloc(self.netloc)
297
- netloc.port = port
298
-
299
- return self.with_netloc(netloc)
300
-
301
- def with_hostname(self, hostname: str) -> _UrlPath:
302
- """
303
- change the hostname in the netloc of the URL
304
- """
305
-
306
- netloc = UrlNetloc.from_netloc(self.netloc)
307
- netloc.hostname = hostname
308
-
309
- return self.with_netloc(netloc)
310
-
311
- def with_credentials(self, username: str, password: str = None) -> _UrlPath:
312
- """
313
- change the username and password in the netloc of the URL
314
-
315
- to change only `username` the `password` must also be provided.
316
-
317
- If `username` is `None`, the credentials are removed.
318
- """
319
-
320
- netloc = UrlNetloc.from_netloc(self.netloc)
321
- netloc.username = username
322
- netloc.password = password
323
-
324
- return self.with_netloc(netloc)
325
-
326
-
327
- __all__ = [
328
- "UrlNetloc",
329
- "UrlPath",
330
- "normalize_url",
331
- ]
File without changes