python-urlopen 0.0.9__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,21 +1,19 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: python-urlopen
3
- Version: 0.0.9
3
+ Version: 0.1.1
4
4
  Summary: Python urlopen wrapper.
5
- Home-page: https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-urlopen
5
+ Home-page: https://github.com/ChenyangGao/python-modules/tree/main/python-urlopen
6
6
  License: MIT
7
7
  Keywords: urlopen
8
8
  Author: ChenyangGao
9
9
  Author-email: wosiwujm@gmail.com
10
- Requires-Python: >=3.10,<4.0
10
+ Requires-Python: >=3.12,<4.0
11
11
  Classifier: Development Status :: 5 - Production/Stable
12
12
  Classifier: Intended Audience :: Developers
13
13
  Classifier: License :: OSI Approved :: MIT License
14
14
  Classifier: Operating System :: OS Independent
15
15
  Classifier: Programming Language :: Python
16
16
  Classifier: Programming Language :: Python :: 3
17
- Classifier: Programming Language :: Python :: 3.10
18
- Classifier: Programming Language :: Python :: 3.11
19
17
  Classifier: Programming Language :: Python :: 3.12
20
18
  Classifier: Programming Language :: Python :: 3.13
21
19
  Classifier: Programming Language :: Python :: 3 :: Only
@@ -23,12 +21,17 @@ Classifier: Topic :: Software Development
23
21
  Classifier: Topic :: Software Development :: Libraries
24
22
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
25
23
  Requires-Dist: brotli
26
- Requires-Dist: http_response
27
- Requires-Dist: orjson
28
- Requires-Dist: python-argtools
29
- Requires-Dist: python-filewrap
24
+ Requires-Dist: http_response (>=0.0.5)
25
+ Requires-Dist: python-argtools (>=0.0.2)
26
+ Requires-Dist: python-cookietools (>=0.0.6)
27
+ Requires-Dist: python-dicttools (>=0.0.1)
28
+ Requires-Dist: python-ensure (>=0.0.1)
29
+ Requires-Dist: python-filewrap (>=0.2.8)
30
+ Requires-Dist: python-http_request (>=0.0.9)
31
+ Requires-Dist: python-undefined (>=0.0.3)
32
+ Requires-Dist: yarl
30
33
  Requires-Dist: zstandard
31
- Project-URL: Repository, https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-urlopen
34
+ Project-URL: Repository, https://github.com/ChenyangGao/python-modules/tree/main/python-urlopen
32
35
  Description-Content-Type: text/markdown
33
36
 
34
37
  # Python urlopen wrapper.
@@ -1,19 +1,19 @@
1
1
  [tool.poetry]
2
2
  name = "python-urlopen"
3
- version = "0.0.9"
3
+ version = "0.1.1"
4
4
  description = "Python urlopen wrapper."
5
5
  authors = ["ChenyangGao <wosiwujm@gmail.com>"]
6
6
  license = "MIT"
7
7
  readme = "readme.md"
8
- homepage = "https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-urlopen"
9
- repository = "https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-urlopen"
8
+ homepage = "https://github.com/ChenyangGao/python-modules/tree/main/python-urlopen"
9
+ repository = "https://github.com/ChenyangGao/python-modules/tree/main/python-urlopen"
10
10
  keywords = ["urlopen"]
11
11
  classifiers = [
12
12
  "License :: OSI Approved :: MIT License",
13
13
  "Development Status :: 5 - Production/Stable",
14
14
  "Programming Language :: Python",
15
15
  "Programming Language :: Python :: 3",
16
- "Programming Language :: Python :: 3.10",
16
+ "Programming Language :: Python :: 3.12",
17
17
  "Programming Language :: Python :: 3 :: Only",
18
18
  "Operating System :: OS Independent",
19
19
  "Intended Audience :: Developers",
@@ -26,12 +26,17 @@ include = [
26
26
  ]
27
27
 
28
28
  [tool.poetry.dependencies]
29
- python = "^3.10"
29
+ python = "^3.12"
30
30
  brotli = "*"
31
- http_response = "*"
32
- orjson = "*"
33
- python-argtools = "*"
34
- python-filewrap = "*"
31
+ http_response = ">=0.0.5"
32
+ python-argtools = ">=0.0.2"
33
+ python-cookietools = ">=0.0.6"
34
+ python-dicttools = ">=0.0.1"
35
+ python-ensure = ">=0.0.1"
36
+ python-filewrap = ">=0.2.8"
37
+ python-http_request = ">=0.0.9"
38
+ python-undefined = ">=0.0.3"
39
+ yarl = "*"
35
40
  zstandard = "*"
36
41
 
37
42
  [tool.poetry.scripts]
@@ -0,0 +1,408 @@
1
+ #!/usr/bin/env python3
2
+ # coding: utf-8
3
+
4
+ __author__ = "ChenyangGao <https://chenyanggao.github.io>"
5
+ __version__ = (0, 1, 1)
6
+ __all__ = ["urlopen", "request", "download"]
7
+
8
+ import errno
9
+
10
+ from collections import UserString
11
+ from collections.abc import Buffer, Callable, Generator, Iterable, Mapping
12
+ from copy import copy
13
+ from gzip import decompress as decompress_gzip
14
+ from http.client import HTTPResponse
15
+ from http.cookiejar import CookieJar
16
+ from inspect import isgenerator
17
+ from os import fsdecode, fstat, makedirs, PathLike
18
+ from os.path import abspath, dirname, isdir, join as joinpath
19
+ from shutil import COPY_BUFSIZE # type: ignore
20
+ from socket import getdefaulttimeout, setdefaulttimeout
21
+ from ssl import SSLContext, _create_unverified_context
22
+ from types import EllipsisType
23
+ from typing import cast, overload, Any, Literal
24
+ from urllib.error import HTTPError
25
+ from urllib.request import (
26
+ build_opener, BaseHandler, HTTPCookieProcessor, HTTPSHandler,
27
+ HTTPRedirectHandler, OpenerDirector, Request,
28
+ )
29
+ from zlib import compressobj, DEF_MEM_LEVEL, DEFLATED, MAX_WBITS
30
+
31
+ from argtools import argcount
32
+ from cookietools import cookies_dict_to_str
33
+ from dicttools import iter_items
34
+ from ensure import ensure_buffer
35
+ from filewrap import bio_skip_iter, bio_chunk_iter, SupportsRead, SupportsWrite
36
+ from http_request import normalize_request_args, SupportsGeturl
37
+ from http_response import (
38
+ get_filename, get_length, is_chunked, is_range_request,
39
+ parse_response,
40
+ )
41
+ from yarl import URL
42
+ from undefined import undefined, Undefined
43
+
44
+
45
+ type string = Buffer | str | UserString
46
+
47
+ if "__del__" not in HTTPResponse.__dict__:
48
+ setattr(HTTPResponse, "__del__", HTTPResponse.close)
49
+ if "__del__" not in OpenerDirector.__dict__:
50
+ setattr(OpenerDirector, "__del__", OpenerDirector.close)
51
+
52
+ _cookies = CookieJar()
53
+ _opener: OpenerDirector = build_opener(HTTPSHandler(context=_create_unverified_context()), HTTPCookieProcessor(_cookies))
54
+ setattr(_opener, "cookies", _cookies)
55
+
56
+
57
+ if getdefaulttimeout() is None:
58
+ setdefaulttimeout(60)
59
+
60
+
61
+ class NoRedirectHandler(HTTPRedirectHandler):
62
+ def redirect_request(self, /, *args, **kwds):
63
+ return None
64
+
65
+
66
+ def decompress_deflate(data: bytes, compresslevel: int = 9) -> bytes:
67
+ # Fork from: https://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations#answer-1089787
68
+ compress = compressobj(
69
+ compresslevel, # level: 0-9
70
+ DEFLATED, # method: must be DEFLATED
71
+ -MAX_WBITS, # window size in bits:
72
+ # -15..-8: negate, suppress header
73
+ # 8..15: normal
74
+ # 16..30: subtract 16, gzip header
75
+ DEF_MEM_LEVEL, # mem level: 1..8/9
76
+ 0 # strategy:
77
+ # 0 = Z_DEFAULT_STRATEGY
78
+ # 1 = Z_FILTERED
79
+ # 2 = Z_HUFFMAN_ONLY
80
+ # 3 = Z_RLE
81
+ # 4 = Z_FIXED
82
+ )
83
+ deflated = compress.compress(data)
84
+ deflated += compress.flush()
85
+ return deflated
86
+
87
+
88
+ def decompress_response(response: HTTPResponse, /) -> bytes:
89
+ data = response.read()
90
+ content_encoding = response.headers.get("content-encoding")
91
+ match content_encoding:
92
+ case "gzip":
93
+ data = decompress_gzip(data)
94
+ case "deflate":
95
+ data = decompress_deflate(data)
96
+ case "br":
97
+ from brotli import decompress as decompress_br # type: ignore
98
+ data = decompress_br(data)
99
+ case "zstd":
100
+ from zstandard import decompress as decompress_zstd
101
+ data = decompress_zstd(data)
102
+ return data
103
+
104
+
105
+ def urlopen(
106
+ url: string | SupportsGeturl | URL | Request,
107
+ method: string = "GET",
108
+ params: None | string | Mapping | Iterable[tuple[Any, Any]] = None,
109
+ data: Any = None,
110
+ json: Any = None,
111
+ headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
112
+ follow_redirects: bool = True,
113
+ proxies: None | Mapping[str, str] | Iterable[tuple[str, str]] = None,
114
+ context: None | SSLContext = None,
115
+ cookies: None | CookieJar = None,
116
+ timeout: None | Undefined | float = undefined,
117
+ opener: None | OpenerDirector = _opener,
118
+ **_,
119
+ ) -> HTTPResponse:
120
+ if isinstance(url, Request):
121
+ request = url
122
+ else:
123
+ if isinstance(data, PathLike):
124
+ data = bio_chunk_iter(open(data, "rb"))
125
+ elif isinstance(data, SupportsRead):
126
+ data = map(ensure_buffer, bio_chunk_iter(data))
127
+ request = Request(**normalize_request_args( # type: ignore
128
+ method=method,
129
+ url=url,
130
+ params=params,
131
+ data=data,
132
+ json=json,
133
+ headers=headers,
134
+ ensure_ascii=True,
135
+ ))
136
+ if proxies:
137
+ for host, type in iter_items(proxies):
138
+ request.set_proxy(host, type)
139
+ headers_ = request.headers
140
+ if opener is None:
141
+ handlers: list[BaseHandler] = []
142
+ else:
143
+ handlers = list(map(copy, getattr(opener, "handlers")))
144
+ if cookies is None:
145
+ cookies = getattr(opener, "cookies", None)
146
+ if cookies and "cookie" not in headers_:
147
+ headers_["cookie"] = cookies_dict_to_str(cookies)
148
+ if context is not None:
149
+ handlers.append(HTTPSHandler(context=context))
150
+ elif opener is None:
151
+ handlers.append(HTTPSHandler(context=_create_unverified_context()))
152
+ if cookies is not None and (opener is None or all(
153
+ h.cookiejar is not cookies
154
+ for h in getattr(opener, "handlers") if isinstance(h, HTTPCookieProcessor)
155
+ )):
156
+ handlers.append(HTTPCookieProcessor(cookies))
157
+ response_cookies = CookieJar()
158
+ if cookies is None:
159
+ cookies = response_cookies
160
+ handlers.append(HTTPCookieProcessor(response_cookies))
161
+ if not follow_redirects:
162
+ handlers.append(NoRedirectHandler())
163
+ opener = build_opener(*handlers)
164
+ setattr(opener, "cookies", cookies)
165
+ try:
166
+ if timeout is undefined:
167
+ response = opener.open(request)
168
+ else:
169
+ response = opener.open(request, timeout=cast(None|float, timeout))
170
+ setattr(response, "opener", opener)
171
+ setattr(response, "cookies", response_cookies)
172
+ return response
173
+ except HTTPError as e:
174
+ if response := getattr(e, "file", None):
175
+ setattr(response, "opener", opener)
176
+ setattr(response, "cookies", response_cookies)
177
+ raise
178
+
179
+
180
+ @overload
181
+ def request(
182
+ url: string | SupportsGeturl | URL | Request,
183
+ method: string = "GET",
184
+ params: None | string | Mapping | Iterable[tuple[Any, Any]] = None,
185
+ data: Any = None,
186
+ json: Any = None,
187
+ headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
188
+ follow_redirects: bool = True,
189
+ raise_for_status: bool = True,
190
+ *,
191
+ parse: None | EllipsisType = None,
192
+ **request_kwargs,
193
+ ) -> HTTPResponse:
194
+ ...
195
+ @overload
196
+ def request(
197
+ url: string | SupportsGeturl | URL | Request,
198
+ method: string = "GET",
199
+ params: None | string | Mapping | Iterable[tuple[Any, Any]] = None,
200
+ data: Any = None,
201
+ json: Any = None,
202
+ headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
203
+ follow_redirects: bool = True,
204
+ raise_for_status: bool = True,
205
+ *,
206
+ parse: Literal[False],
207
+ **request_kwargs,
208
+ ) -> bytes:
209
+ ...
210
+ @overload
211
+ def request(
212
+ url: string | SupportsGeturl | URL | Request,
213
+ method: string = "GET",
214
+ params: None | string | Mapping | Iterable[tuple[Any, Any]] = None,
215
+ data: Any = None,
216
+ json: Any = None,
217
+ headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
218
+ follow_redirects: bool = True,
219
+ raise_for_status: bool = True,
220
+ *,
221
+ parse: Literal[True],
222
+ **request_kwargs,
223
+ ) -> bytes | str | dict | list | int | float | bool | None:
224
+ ...
225
+ @overload
226
+ def request[T](
227
+ url: string | SupportsGeturl | URL | Request,
228
+ method: string = "GET",
229
+ params: None | string | Mapping | Iterable[tuple[Any, Any]] = None,
230
+ data: Any = None,
231
+ json: Any = None,
232
+ headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
233
+ follow_redirects: bool = True,
234
+ raise_for_status: bool = True,
235
+ *,
236
+ parse: Callable[[HTTPResponse, bytes], T] | Callable[[HTTPResponse], T],
237
+ **request_kwargs,
238
+ ) -> T:
239
+ ...
240
+ def request[T](
241
+ url: string | SupportsGeturl | URL | Request,
242
+ method: string = "GET",
243
+ params: None | string | Mapping | Iterable[tuple[Any, Any]] = None,
244
+ data: Any = None,
245
+ json: Any = None,
246
+ headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
247
+ follow_redirects: bool = True,
248
+ raise_for_status: bool = True,
249
+ *,
250
+ parse: None | EllipsisType| bool | Callable[[HTTPResponse, bytes], T] | Callable[[HTTPResponse], T] = None,
251
+ **request_kwargs,
252
+ ) -> HTTPResponse | bytes | str | dict | list | int | float | bool | None | T:
253
+ try:
254
+ response = urlopen(
255
+ url=url,
256
+ method=method,
257
+ params=params,
258
+ data=data,
259
+ json=json,
260
+ headers=headers,
261
+ follow_redirects=follow_redirects,
262
+ **request_kwargs,
263
+ )
264
+ except HTTPError as e:
265
+ if raise_for_status:
266
+ raise
267
+ response = getattr(e, "file")
268
+ if parse is None:
269
+ return response
270
+ elif parse is ...:
271
+ response.close()
272
+ return response
273
+ with response:
274
+ if isinstance(parse, bool):
275
+ data = decompress_response(response)
276
+ if parse:
277
+ return parse_response(response, data)
278
+ return data
279
+ ac = argcount(parse)
280
+ if ac == 1:
281
+ return cast(Callable[[HTTPResponse], T], parse)(response)
282
+ else:
283
+ return cast(Callable[[HTTPResponse, bytes], T], parse)(
284
+ response, decompress_response(response))
285
+
286
+
287
+ def download(
288
+ url: string | SupportsGeturl | URL | Request,
289
+ file: bytes | str | PathLike | SupportsWrite[bytes] = "",
290
+ resume: bool = False,
291
+ chunksize: int = COPY_BUFSIZE,
292
+ headers: None | Mapping[str, str] | Iterable[tuple[str, str]] = None,
293
+ make_reporthook: None | Callable[[None | int], Callable[[int], Any] | Generator[int, Any, Any]] = None,
294
+ **request_kwargs,
295
+ ) -> str | SupportsWrite[bytes]:
296
+ """Download a URL into a file.
297
+
298
+ Example::
299
+
300
+ 1. use `make_reporthook` to show progress:
301
+
302
+ You can use the following function to show progress for the download task
303
+
304
+ .. code: python
305
+
306
+ from time import perf_counter
307
+
308
+ def progress(total=None):
309
+ read_num = 0
310
+ start_t = perf_counter()
311
+ while True:
312
+ read_num += yield
313
+ speed = read_num / 1024 / 1024 / (perf_counter() - start_t)
314
+ print(f"\r\x1b[K{read_num} / {total} | {speed:.2f} MB/s", end="", flush=True)
315
+
316
+ Or use the following function for more real-time speed
317
+
318
+ .. code: python
319
+
320
+ from collections import deque
321
+ from time import perf_counter
322
+
323
+ def progress(total=None):
324
+ dq = deque(maxlen=64)
325
+ read_num = 0
326
+ dq.append((read_num, perf_counter()))
327
+ while True:
328
+ read_num += yield
329
+ cur_t = perf_counter()
330
+ speed = (read_num - dq[0][0]) / 1024 / 1024 / (cur_t - dq[0][1])
331
+ print(f"\r\x1b[K{read_num} / {total} | {speed:.2f} MB/s", end="", flush=True)
332
+ dq.append((read_num, cur_t))
333
+ """
334
+ if chunksize <= 0:
335
+ chunksize = COPY_BUFSIZE
336
+ headers = request_kwargs["headers"] = dict(headers or ())
337
+ headers["accept-encoding"] = "identity"
338
+ response: HTTPResponse = urlopen(url, **request_kwargs)
339
+ content_length = get_length(response)
340
+ if content_length == 0 and is_chunked(response):
341
+ content_length = None
342
+ fdst: SupportsWrite[bytes]
343
+ if hasattr(file, "write"):
344
+ file = fdst = cast(SupportsWrite[bytes], file)
345
+ else:
346
+ file = abspath(fsdecode(file))
347
+ if isdir(file):
348
+ file = joinpath(file, get_filename(response, "download"))
349
+ try:
350
+ fdst = open(file, "ab" if resume else "wb")
351
+ except FileNotFoundError:
352
+ makedirs(dirname(file), exist_ok=True)
353
+ fdst = open(file, "ab" if resume else "wb")
354
+ filesize = 0
355
+ if resume:
356
+ try:
357
+ fileno = getattr(fdst, "fileno")()
358
+ filesize = fstat(fileno).st_size
359
+ except (AttributeError, OSError):
360
+ pass
361
+ else:
362
+ if filesize == content_length:
363
+ return file
364
+ if filesize and is_range_request(response):
365
+ if filesize == content_length:
366
+ return file
367
+ elif content_length is not None and filesize > content_length:
368
+ raise OSError(
369
+ errno.EIO,
370
+ f"file {file!r} is larger than url {url!r}: {filesize} > {content_length} (in bytes)",
371
+ )
372
+ reporthook_close: None | Callable = None
373
+ if callable(make_reporthook):
374
+ reporthook = make_reporthook(content_length)
375
+ if isgenerator(reporthook):
376
+ reporthook_close = reporthook.close
377
+ next(reporthook)
378
+ reporthook = reporthook.send
379
+ else:
380
+ reporthook_close = getattr(reporthook, "close", None)
381
+ reporthook = cast(Callable[[int], Any], reporthook)
382
+ else:
383
+ reporthook = None
384
+ try:
385
+ if filesize:
386
+ if is_range_request(response):
387
+ response.close()
388
+ response = urlopen(url, headers={**headers, "Range": "bytes=%d-" % filesize}, **request_kwargs)
389
+ if not is_range_request(response):
390
+ raise OSError(errno.EIO, f"range request failed: {url!r}")
391
+ if reporthook is not None:
392
+ reporthook(filesize)
393
+ elif resume:
394
+ for _ in bio_skip_iter(response, filesize, callback=reporthook):
395
+ pass
396
+
397
+ fsrc_read = response.read
398
+ fdst_write = fdst.write
399
+ while (chunk := fsrc_read(chunksize)):
400
+ fdst_write(chunk)
401
+ if reporthook is not None:
402
+ reporthook(len(chunk))
403
+ finally:
404
+ response.close()
405
+ if callable(reporthook_close):
406
+ reporthook_close()
407
+ return file
408
+
@@ -1,372 +0,0 @@
1
- #!/usr/bin/env python3
2
- # coding: utf-8
3
-
4
- __author__ = "ChenyangGao <https://chenyanggao.github.io>"
5
- __version__ = (0, 0, 9)
6
- __all__ = ["urlopen", "request", "download"]
7
-
8
- import errno
9
-
10
- from collections.abc import Callable, Generator, Iterable, Mapping, Sequence
11
- from gzip import decompress as decompress_gzip
12
- from http.client import HTTPResponse
13
- from http.cookiejar import CookieJar
14
- from inspect import isgenerator
15
- from os import fsdecode, fstat, makedirs, PathLike
16
- from os.path import abspath, dirname, isdir, join as joinpath
17
- from re import compile as re_compile
18
- from shutil import COPY_BUFSIZE # type: ignore
19
- from socket import getdefaulttimeout, setdefaulttimeout
20
- from ssl import SSLContext, _create_unverified_context
21
- from string import punctuation
22
- from types import EllipsisType
23
- from typing import cast, Any
24
- from urllib.error import HTTPError
25
- from urllib.parse import quote, urlencode, urlsplit
26
- from urllib.request import (
27
- build_opener, BaseHandler, HTTPCookieProcessor, HTTPSHandler,
28
- HTTPRedirectHandler, OpenerDirector, Request,
29
- )
30
- from zlib import compressobj, DEF_MEM_LEVEL, DEFLATED, MAX_WBITS
31
-
32
- from argtools import argcount
33
- from filewrap import bio_skip_iter, SupportsWrite
34
- from http_response import get_filename, get_length, is_chunked, is_range_request
35
- from orjson import dumps, loads
36
-
37
-
38
- if "__del__" not in HTTPResponse.__dict__:
39
- setattr(HTTPResponse, "__del__", HTTPResponse.close)
40
- if "__del__" not in OpenerDirector.__dict__:
41
- setattr(OpenerDirector, "__del__", OpenerDirector.close)
42
-
43
- _opener: OpenerDirector = build_opener(HTTPSHandler(context=_create_unverified_context()))
44
- CRE_search_charset = re_compile(r"\bcharset=(?P<charset>[^ ;]+)").search
45
-
46
- if getdefaulttimeout() is None:
47
- setdefaulttimeout(60)
48
-
49
-
50
- class NoRedirectHandler(HTTPRedirectHandler):
51
-
52
- def redirect_request(self, req, fp, code, msg, headers, newurl):
53
- return None
54
-
55
-
56
- def decompress_deflate(data: bytes, compresslevel: int = 9) -> bytes:
57
- # Fork from: https://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations#answer-1089787
58
- compress = compressobj(
59
- compresslevel, # level: 0-9
60
- DEFLATED, # method: must be DEFLATED
61
- -MAX_WBITS, # window size in bits:
62
- # -15..-8: negate, suppress header
63
- # 8..15: normal
64
- # 16..30: subtract 16, gzip header
65
- DEF_MEM_LEVEL, # mem level: 1..8/9
66
- 0 # strategy:
67
- # 0 = Z_DEFAULT_STRATEGY
68
- # 1 = Z_FILTERED
69
- # 2 = Z_HUFFMAN_ONLY
70
- # 3 = Z_RLE
71
- # 4 = Z_FIXED
72
- )
73
- deflated = compress.compress(data)
74
- deflated += compress.flush()
75
- return deflated
76
-
77
-
78
- def get_charset(content_type: str, default="utf-8") -> str:
79
- match = CRE_search_charset(content_type)
80
- if match is None:
81
- return "utf-8"
82
- return match["charset"]
83
-
84
-
85
- def ensure_ascii_url(url: str, /) -> str:
86
- if url.isascii():
87
- return url
88
- return quote(url, safe=punctuation)
89
-
90
-
91
- def decompress_response(resp: HTTPResponse, /) -> bytes:
92
- data = resp.read()
93
- content_encoding = resp.headers.get("Content-Encoding")
94
- match content_encoding:
95
- case "gzip":
96
- data = decompress_gzip(data)
97
- case "deflate":
98
- data = decompress_deflate(data)
99
- case "br":
100
- from brotli import decompress as decompress_br # type: ignore
101
- data = decompress_br(data)
102
- case "zstd":
103
- from zstandard import decompress as decompress_zstd
104
- data = decompress_zstd(data)
105
- return data
106
-
107
-
108
- def urlopen(
109
- url: str | Request,
110
- method: str = "GET",
111
- params: None | str | Mapping | Sequence[tuple[Any, Any]] = None,
112
- data: None | bytes | str | Mapping | Sequence[tuple[Any, Any]] | Iterable[bytes] = None,
113
- json: Any = None,
114
- headers: None | Mapping[str, str] = None,
115
- timeout: None | int | float = None,
116
- cookies: None | CookieJar = None,
117
- proxy: None | tuple[str, str] = None,
118
- context: None | SSLContext = None,
119
- allow_redirects: bool = True,
120
- opener: None | OpenerDirector = None,
121
- origin: None | str = None,
122
- ) -> HTTPResponse:
123
- if isinstance(url, str) and not urlsplit(url).scheme:
124
- if origin:
125
- if not url.startswith("/"):
126
- url = "/" + url
127
- url = origin + url
128
- if params:
129
- if not isinstance(params, str):
130
- params = urlencode(params)
131
- params = cast(None | str, params)
132
- if json is not None:
133
- if isinstance(json, bytes):
134
- data = json
135
- else:
136
- data = dumps(json)
137
- if headers:
138
- headers = {**headers, "Content-type": "application/json; charset=UTF-8"}
139
- else:
140
- headers = {"Content-type": "application/json; charset=UTF-8"}
141
- elif data is not None:
142
- if isinstance(data, bytes):
143
- pass
144
- elif isinstance(data, str):
145
- data = data.encode("utf-8")
146
- elif isinstance(data, (Mapping, Sequence)):
147
- data = urlencode(cast(Mapping | Sequence, data)).encode("latin-1")
148
- if headers:
149
- headers = {**headers, "Content-type": "application/x-www-form-urlencoded"}
150
- else:
151
- headers = {"Content-type": "application/x-www-form-urlencoded"}
152
- data = cast(None | bytes | Iterable[bytes], data)
153
- if isinstance(url, Request):
154
- req = url
155
- if params:
156
- req.full_url += "?&"["?" in req.full_url] + params
157
- if headers:
158
- for key, val in headers.items():
159
- req.add_header(key, val)
160
- if data is not None:
161
- req.data = data
162
- req.method = method.upper()
163
- else:
164
- if headers:
165
- headers = dict(headers)
166
- else:
167
- headers = {}
168
- if params:
169
- url += "?&"["?" in url] + params
170
- req = Request(url, data=data, headers=headers, method=method.upper())
171
- if proxy:
172
- req.set_proxy(*proxy)
173
- if opener is None:
174
- handlers: list[BaseHandler] = []
175
- if context is not None:
176
- handlers.append(HTTPSHandler(context=context))
177
- if cookies is not None:
178
- handlers.append(HTTPCookieProcessor(cookies))
179
- if not allow_redirects:
180
- handlers.append(NoRedirectHandler())
181
- if handlers:
182
- if not isinstance(handlers[0], HTTPSHandler):
183
- handlers.insert(0, HTTPSHandler(context=_create_unverified_context()))
184
- opener = build_opener(*handlers)
185
- else:
186
- opener = _opener
187
- req.full_url = ensure_ascii_url(req.full_url)
188
- if timeout is None:
189
- return opener.open(req)
190
- else:
191
- return opener.open(req, timeout=timeout)
192
-
193
-
194
- def request(
195
- url: str | Request,
196
- method: str = "GET",
197
- parse: None | EllipsisType | bool | Callable = None,
198
- raise_for_status: bool = True,
199
- timeout: None | float = 60,
200
- allow_redirects: bool = True,
201
- **request_kwargs,
202
- ):
203
- request_kwargs.pop("stream", None)
204
- try:
205
- resp = urlopen(
206
- url=url,
207
- method=method,
208
- timeout=timeout,
209
- allow_redirects=allow_redirects,
210
- **request_kwargs,
211
- )
212
- except HTTPError as e:
213
- if raise_for_status:
214
- raise
215
- resp = getattr(e, "file")
216
- if parse is None:
217
- return resp
218
- elif parse is ...:
219
- resp.close()
220
- return resp
221
- with resp:
222
- if isinstance(parse, bool):
223
- data = decompress_response(resp)
224
- if parse:
225
- content_type = resp.headers.get("Content-Type", "")
226
- if content_type == "application/json":
227
- return loads(data)
228
- elif content_type.startswith("application/json;"):
229
- return loads(data.decode(get_charset(content_type)))
230
- elif content_type.startswith("text/"):
231
- return data.decode(get_charset(content_type))
232
- return data
233
- else:
234
- ac = argcount(parse)
235
- with resp:
236
- if ac == 1:
237
- return parse(resp)
238
- else:
239
- return parse(resp, decompress_response(resp))
240
-
241
-
242
- def download(
243
- url: str,
244
- file: bytes | str | PathLike | SupportsWrite[bytes] = "",
245
- resume: bool = False,
246
- chunksize: int = COPY_BUFSIZE,
247
- headers: None | Mapping[str, str] = None,
248
- make_reporthook: None | Callable[[None | int], Callable[[int], Any] | Generator[int, Any, Any]] = None,
249
- **urlopen_kwargs,
250
- ) -> str | SupportsWrite[bytes]:
251
- """Download a URL into a file.
252
-
253
- Example::
254
-
255
- 1. use `make_reporthook` to show progress:
256
-
257
- You can use the following function to show progress for the download task
258
-
259
- .. code: python
260
-
261
- from time import perf_counter
262
-
263
- def progress(total=None):
264
- read_num = 0
265
- start_t = perf_counter()
266
- while True:
267
- read_num += yield
268
- speed = read_num / 1024 / 1024 / (perf_counter() - start_t)
269
- print(f"\r\x1b[K{read_num} / {total} | {speed:.2f} MB/s", end="", flush=True)
270
-
271
- Or use the following function for more real-time speed
272
-
273
- .. code: python
274
-
275
- from collections import deque
276
- from time import perf_counter
277
-
278
- def progress(total=None):
279
- dq = deque(maxlen=64)
280
- read_num = 0
281
- dq.append((read_num, perf_counter()))
282
- while True:
283
- read_num += yield
284
- cur_t = perf_counter()
285
- speed = (read_num - dq[0][0]) / 1024 / 1024 / (cur_t - dq[0][1])
286
- print(f"\r\x1b[K{read_num} / {total} | {speed:.2f} MB/s", end="", flush=True)
287
- dq.append((read_num, cur_t))
288
- """
289
- if headers:
290
- headers = {**headers, "Accept-encoding": "identity"}
291
- else:
292
- headers = {"Accept-encoding": "identity"}
293
-
294
- if chunksize <= 0:
295
- chunksize = COPY_BUFSIZE
296
-
297
- resp: HTTPResponse = urlopen(url, headers=headers, **urlopen_kwargs)
298
- content_length = get_length(resp)
299
- if content_length == 0 and is_chunked(resp):
300
- content_length = None
301
-
302
- fdst: SupportsWrite[bytes]
303
- if hasattr(file, "write"):
304
- file = fdst = cast(SupportsWrite[bytes], file)
305
- else:
306
- file = abspath(fsdecode(file))
307
- if isdir(file):
308
- file = joinpath(file, get_filename(resp, "download"))
309
- try:
310
- fdst = open(file, "ab" if resume else "wb")
311
- except FileNotFoundError:
312
- makedirs(dirname(file), exist_ok=True)
313
- fdst = open(file, "ab" if resume else "wb")
314
-
315
- filesize = 0
316
- if resume:
317
- try:
318
- fileno = getattr(fdst, "fileno")()
319
- filesize = fstat(fileno).st_size
320
- except (AttributeError, OSError):
321
- pass
322
- else:
323
- if filesize == content_length:
324
- return file
325
- if filesize and is_range_request(resp):
326
- if filesize == content_length:
327
- return file
328
- elif content_length is not None and filesize > content_length:
329
- raise OSError(
330
- errno.EIO,
331
- f"file {file!r} is larger than url {url!r}: {filesize} > {content_length} (in bytes)",
332
- )
333
-
334
- reporthook_close: None | Callable = None
335
- if callable(make_reporthook):
336
- reporthook = make_reporthook(content_length)
337
- if isgenerator(reporthook):
338
- reporthook_close = reporthook.close
339
- next(reporthook)
340
- reporthook = reporthook.send
341
- else:
342
- reporthook_close = getattr(reporthook, "close", None)
343
- reporthook = cast(Callable[[int], Any], reporthook)
344
- else:
345
- reporthook = None
346
-
347
- try:
348
- if filesize:
349
- if is_range_request(resp):
350
- resp.close()
351
- resp = urlopen(url, headers={**headers, "Range": "bytes=%d-" % filesize}, **urlopen_kwargs)
352
- if not is_range_request(resp):
353
- raise OSError(errno.EIO, f"range request failed: {url!r}")
354
- if reporthook is not None:
355
- reporthook(filesize)
356
- elif resume:
357
- for _ in bio_skip_iter(resp, filesize, callback=reporthook):
358
- pass
359
-
360
- fsrc_read = resp.read
361
- fdst_write = fdst.write
362
- while (chunk := fsrc_read(chunksize)):
363
- fdst_write(chunk)
364
- if reporthook is not None:
365
- reporthook(len(chunk))
366
- finally:
367
- resp.close()
368
- if callable(reporthook_close):
369
- reporthook_close()
370
-
371
- return file
372
-
File without changes
File without changes