python-urlopen 0.0.9__tar.gz → 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_urlopen-0.0.9 → python_urlopen-0.1.1}/PKG-INFO +13 -10
- {python_urlopen-0.0.9 → python_urlopen-0.1.1}/pyproject.toml +14 -9
- python_urlopen-0.1.1/urlopen/__init__.py +408 -0
- python_urlopen-0.0.9/urlopen/__init__.py +0 -372
- {python_urlopen-0.0.9 → python_urlopen-0.1.1}/LICENSE +0 -0
- {python_urlopen-0.0.9 → python_urlopen-0.1.1}/readme.md +0 -0
- {python_urlopen-0.0.9 → python_urlopen-0.1.1}/urlopen/__main__.py +0 -0
- {python_urlopen-0.0.9 → python_urlopen-0.1.1}/urlopen/py.typed +0 -0
|
@@ -1,21 +1,19 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: python-urlopen
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.1.1
|
|
4
4
|
Summary: Python urlopen wrapper.
|
|
5
|
-
Home-page: https://github.com/ChenyangGao/
|
|
5
|
+
Home-page: https://github.com/ChenyangGao/python-modules/tree/main/python-urlopen
|
|
6
6
|
License: MIT
|
|
7
7
|
Keywords: urlopen
|
|
8
8
|
Author: ChenyangGao
|
|
9
9
|
Author-email: wosiwujm@gmail.com
|
|
10
|
-
Requires-Python: >=3.
|
|
10
|
+
Requires-Python: >=3.12,<4.0
|
|
11
11
|
Classifier: Development Status :: 5 - Production/Stable
|
|
12
12
|
Classifier: Intended Audience :: Developers
|
|
13
13
|
Classifier: License :: OSI Approved :: MIT License
|
|
14
14
|
Classifier: Operating System :: OS Independent
|
|
15
15
|
Classifier: Programming Language :: Python
|
|
16
16
|
Classifier: Programming Language :: Python :: 3
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
19
17
|
Classifier: Programming Language :: Python :: 3.12
|
|
20
18
|
Classifier: Programming Language :: Python :: 3.13
|
|
21
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
@@ -23,12 +21,17 @@ Classifier: Topic :: Software Development
|
|
|
23
21
|
Classifier: Topic :: Software Development :: Libraries
|
|
24
22
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
25
23
|
Requires-Dist: brotli
|
|
26
|
-
Requires-Dist: http_response
|
|
27
|
-
Requires-Dist:
|
|
28
|
-
Requires-Dist: python-
|
|
29
|
-
Requires-Dist: python-
|
|
24
|
+
Requires-Dist: http_response (>=0.0.5)
|
|
25
|
+
Requires-Dist: python-argtools (>=0.0.2)
|
|
26
|
+
Requires-Dist: python-cookietools (>=0.0.6)
|
|
27
|
+
Requires-Dist: python-dicttools (>=0.0.1)
|
|
28
|
+
Requires-Dist: python-ensure (>=0.0.1)
|
|
29
|
+
Requires-Dist: python-filewrap (>=0.2.8)
|
|
30
|
+
Requires-Dist: python-http_request (>=0.0.9)
|
|
31
|
+
Requires-Dist: python-undefined (>=0.0.3)
|
|
32
|
+
Requires-Dist: yarl
|
|
30
33
|
Requires-Dist: zstandard
|
|
31
|
-
Project-URL: Repository, https://github.com/ChenyangGao/
|
|
34
|
+
Project-URL: Repository, https://github.com/ChenyangGao/python-modules/tree/main/python-urlopen
|
|
32
35
|
Description-Content-Type: text/markdown
|
|
33
36
|
|
|
34
37
|
# Python urlopen wrapper.
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "python-urlopen"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.1.1"
|
|
4
4
|
description = "Python urlopen wrapper."
|
|
5
5
|
authors = ["ChenyangGao <wosiwujm@gmail.com>"]
|
|
6
6
|
license = "MIT"
|
|
7
7
|
readme = "readme.md"
|
|
8
|
-
homepage = "https://github.com/ChenyangGao/
|
|
9
|
-
repository = "https://github.com/ChenyangGao/
|
|
8
|
+
homepage = "https://github.com/ChenyangGao/python-modules/tree/main/python-urlopen"
|
|
9
|
+
repository = "https://github.com/ChenyangGao/python-modules/tree/main/python-urlopen"
|
|
10
10
|
keywords = ["urlopen"]
|
|
11
11
|
classifiers = [
|
|
12
12
|
"License :: OSI Approved :: MIT License",
|
|
13
13
|
"Development Status :: 5 - Production/Stable",
|
|
14
14
|
"Programming Language :: Python",
|
|
15
15
|
"Programming Language :: Python :: 3",
|
|
16
|
-
"Programming Language :: Python :: 3.
|
|
16
|
+
"Programming Language :: Python :: 3.12",
|
|
17
17
|
"Programming Language :: Python :: 3 :: Only",
|
|
18
18
|
"Operating System :: OS Independent",
|
|
19
19
|
"Intended Audience :: Developers",
|
|
@@ -26,12 +26,17 @@ include = [
|
|
|
26
26
|
]
|
|
27
27
|
|
|
28
28
|
[tool.poetry.dependencies]
|
|
29
|
-
python = "^3.
|
|
29
|
+
python = "^3.12"
|
|
30
30
|
brotli = "*"
|
|
31
|
-
http_response = "
|
|
32
|
-
|
|
33
|
-
python-
|
|
34
|
-
python-
|
|
31
|
+
http_response = ">=0.0.5"
|
|
32
|
+
python-argtools = ">=0.0.2"
|
|
33
|
+
python-cookietools = ">=0.0.6"
|
|
34
|
+
python-dicttools = ">=0.0.1"
|
|
35
|
+
python-ensure = ">=0.0.1"
|
|
36
|
+
python-filewrap = ">=0.2.8"
|
|
37
|
+
python-http_request = ">=0.0.9"
|
|
38
|
+
python-undefined = ">=0.0.3"
|
|
39
|
+
yarl = "*"
|
|
35
40
|
zstandard = "*"
|
|
36
41
|
|
|
37
42
|
[tool.poetry.scripts]
|
|
@@ -0,0 +1,408 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# coding: utf-8
|
|
3
|
+
|
|
4
|
+
__author__ = "ChenyangGao <https://chenyanggao.github.io>"
|
|
5
|
+
__version__ = (0, 1, 1)
|
|
6
|
+
__all__ = ["urlopen", "request", "download"]
|
|
7
|
+
|
|
8
|
+
import errno
|
|
9
|
+
|
|
10
|
+
from collections import UserString
|
|
11
|
+
from collections.abc import Buffer, Callable, Generator, Iterable, Mapping
|
|
12
|
+
from copy import copy
|
|
13
|
+
from gzip import decompress as decompress_gzip
|
|
14
|
+
from http.client import HTTPResponse
|
|
15
|
+
from http.cookiejar import CookieJar
|
|
16
|
+
from inspect import isgenerator
|
|
17
|
+
from os import fsdecode, fstat, makedirs, PathLike
|
|
18
|
+
from os.path import abspath, dirname, isdir, join as joinpath
|
|
19
|
+
from shutil import COPY_BUFSIZE # type: ignore
|
|
20
|
+
from socket import getdefaulttimeout, setdefaulttimeout
|
|
21
|
+
from ssl import SSLContext, _create_unverified_context
|
|
22
|
+
from types import EllipsisType
|
|
23
|
+
from typing import cast, overload, Any, Literal
|
|
24
|
+
from urllib.error import HTTPError
|
|
25
|
+
from urllib.request import (
|
|
26
|
+
build_opener, BaseHandler, HTTPCookieProcessor, HTTPSHandler,
|
|
27
|
+
HTTPRedirectHandler, OpenerDirector, Request,
|
|
28
|
+
)
|
|
29
|
+
from zlib import compressobj, DEF_MEM_LEVEL, DEFLATED, MAX_WBITS
|
|
30
|
+
|
|
31
|
+
from argtools import argcount
|
|
32
|
+
from cookietools import cookies_dict_to_str
|
|
33
|
+
from dicttools import iter_items
|
|
34
|
+
from ensure import ensure_buffer
|
|
35
|
+
from filewrap import bio_skip_iter, bio_chunk_iter, SupportsRead, SupportsWrite
|
|
36
|
+
from http_request import normalize_request_args, SupportsGeturl
|
|
37
|
+
from http_response import (
|
|
38
|
+
get_filename, get_length, is_chunked, is_range_request,
|
|
39
|
+
parse_response,
|
|
40
|
+
)
|
|
41
|
+
from yarl import URL
|
|
42
|
+
from undefined import undefined, Undefined
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
type string = Buffer | str | UserString
|
|
46
|
+
|
|
47
|
+
if "__del__" not in HTTPResponse.__dict__:
|
|
48
|
+
setattr(HTTPResponse, "__del__", HTTPResponse.close)
|
|
49
|
+
if "__del__" not in OpenerDirector.__dict__:
|
|
50
|
+
setattr(OpenerDirector, "__del__", OpenerDirector.close)
|
|
51
|
+
|
|
52
|
+
_cookies = CookieJar()
|
|
53
|
+
_opener: OpenerDirector = build_opener(HTTPSHandler(context=_create_unverified_context()), HTTPCookieProcessor(_cookies))
|
|
54
|
+
setattr(_opener, "cookies", _cookies)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
if getdefaulttimeout() is None:
|
|
58
|
+
setdefaulttimeout(60)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class NoRedirectHandler(HTTPRedirectHandler):
|
|
62
|
+
def redirect_request(self, /, *args, **kwds):
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def decompress_deflate(data: bytes, compresslevel: int = 9) -> bytes:
|
|
67
|
+
# Fork from: https://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations#answer-1089787
|
|
68
|
+
compress = compressobj(
|
|
69
|
+
compresslevel, # level: 0-9
|
|
70
|
+
DEFLATED, # method: must be DEFLATED
|
|
71
|
+
-MAX_WBITS, # window size in bits:
|
|
72
|
+
# -15..-8: negate, suppress header
|
|
73
|
+
# 8..15: normal
|
|
74
|
+
# 16..30: subtract 16, gzip header
|
|
75
|
+
DEF_MEM_LEVEL, # mem level: 1..8/9
|
|
76
|
+
0 # strategy:
|
|
77
|
+
# 0 = Z_DEFAULT_STRATEGY
|
|
78
|
+
# 1 = Z_FILTERED
|
|
79
|
+
# 2 = Z_HUFFMAN_ONLY
|
|
80
|
+
# 3 = Z_RLE
|
|
81
|
+
# 4 = Z_FIXED
|
|
82
|
+
)
|
|
83
|
+
deflated = compress.compress(data)
|
|
84
|
+
deflated += compress.flush()
|
|
85
|
+
return deflated
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def decompress_response(response: HTTPResponse, /) -> bytes:
|
|
89
|
+
data = response.read()
|
|
90
|
+
content_encoding = response.headers.get("content-encoding")
|
|
91
|
+
match content_encoding:
|
|
92
|
+
case "gzip":
|
|
93
|
+
data = decompress_gzip(data)
|
|
94
|
+
case "deflate":
|
|
95
|
+
data = decompress_deflate(data)
|
|
96
|
+
case "br":
|
|
97
|
+
from brotli import decompress as decompress_br # type: ignore
|
|
98
|
+
data = decompress_br(data)
|
|
99
|
+
case "zstd":
|
|
100
|
+
from zstandard import decompress as decompress_zstd
|
|
101
|
+
data = decompress_zstd(data)
|
|
102
|
+
return data
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def urlopen(
|
|
106
|
+
url: string | SupportsGeturl | URL | Request,
|
|
107
|
+
method: string = "GET",
|
|
108
|
+
params: None | string | Mapping | Iterable[tuple[Any, Any]] = None,
|
|
109
|
+
data: Any = None,
|
|
110
|
+
json: Any = None,
|
|
111
|
+
headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
|
|
112
|
+
follow_redirects: bool = True,
|
|
113
|
+
proxies: None | Mapping[str, str] | Iterable[tuple[str, str]] = None,
|
|
114
|
+
context: None | SSLContext = None,
|
|
115
|
+
cookies: None | CookieJar = None,
|
|
116
|
+
timeout: None | Undefined | float = undefined,
|
|
117
|
+
opener: None | OpenerDirector = _opener,
|
|
118
|
+
**_,
|
|
119
|
+
) -> HTTPResponse:
|
|
120
|
+
if isinstance(url, Request):
|
|
121
|
+
request = url
|
|
122
|
+
else:
|
|
123
|
+
if isinstance(data, PathLike):
|
|
124
|
+
data = bio_chunk_iter(open(data, "rb"))
|
|
125
|
+
elif isinstance(data, SupportsRead):
|
|
126
|
+
data = map(ensure_buffer, bio_chunk_iter(data))
|
|
127
|
+
request = Request(**normalize_request_args( # type: ignore
|
|
128
|
+
method=method,
|
|
129
|
+
url=url,
|
|
130
|
+
params=params,
|
|
131
|
+
data=data,
|
|
132
|
+
json=json,
|
|
133
|
+
headers=headers,
|
|
134
|
+
ensure_ascii=True,
|
|
135
|
+
))
|
|
136
|
+
if proxies:
|
|
137
|
+
for host, type in iter_items(proxies):
|
|
138
|
+
request.set_proxy(host, type)
|
|
139
|
+
headers_ = request.headers
|
|
140
|
+
if opener is None:
|
|
141
|
+
handlers: list[BaseHandler] = []
|
|
142
|
+
else:
|
|
143
|
+
handlers = list(map(copy, getattr(opener, "handlers")))
|
|
144
|
+
if cookies is None:
|
|
145
|
+
cookies = getattr(opener, "cookies", None)
|
|
146
|
+
if cookies and "cookie" not in headers_:
|
|
147
|
+
headers_["cookie"] = cookies_dict_to_str(cookies)
|
|
148
|
+
if context is not None:
|
|
149
|
+
handlers.append(HTTPSHandler(context=context))
|
|
150
|
+
elif opener is None:
|
|
151
|
+
handlers.append(HTTPSHandler(context=_create_unverified_context()))
|
|
152
|
+
if cookies is not None and (opener is None or all(
|
|
153
|
+
h.cookiejar is not cookies
|
|
154
|
+
for h in getattr(opener, "handlers") if isinstance(h, HTTPCookieProcessor)
|
|
155
|
+
)):
|
|
156
|
+
handlers.append(HTTPCookieProcessor(cookies))
|
|
157
|
+
response_cookies = CookieJar()
|
|
158
|
+
if cookies is None:
|
|
159
|
+
cookies = response_cookies
|
|
160
|
+
handlers.append(HTTPCookieProcessor(response_cookies))
|
|
161
|
+
if not follow_redirects:
|
|
162
|
+
handlers.append(NoRedirectHandler())
|
|
163
|
+
opener = build_opener(*handlers)
|
|
164
|
+
setattr(opener, "cookies", cookies)
|
|
165
|
+
try:
|
|
166
|
+
if timeout is undefined:
|
|
167
|
+
response = opener.open(request)
|
|
168
|
+
else:
|
|
169
|
+
response = opener.open(request, timeout=cast(None|float, timeout))
|
|
170
|
+
setattr(response, "opener", opener)
|
|
171
|
+
setattr(response, "cookies", response_cookies)
|
|
172
|
+
return response
|
|
173
|
+
except HTTPError as e:
|
|
174
|
+
if response := getattr(e, "file", None):
|
|
175
|
+
setattr(response, "opener", opener)
|
|
176
|
+
setattr(response, "cookies", response_cookies)
|
|
177
|
+
raise
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
@overload
|
|
181
|
+
def request(
|
|
182
|
+
url: string | SupportsGeturl | URL | Request,
|
|
183
|
+
method: string = "GET",
|
|
184
|
+
params: None | string | Mapping | Iterable[tuple[Any, Any]] = None,
|
|
185
|
+
data: Any = None,
|
|
186
|
+
json: Any = None,
|
|
187
|
+
headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
|
|
188
|
+
follow_redirects: bool = True,
|
|
189
|
+
raise_for_status: bool = True,
|
|
190
|
+
*,
|
|
191
|
+
parse: None | EllipsisType = None,
|
|
192
|
+
**request_kwargs,
|
|
193
|
+
) -> HTTPResponse:
|
|
194
|
+
...
|
|
195
|
+
@overload
|
|
196
|
+
def request(
|
|
197
|
+
url: string | SupportsGeturl | URL | Request,
|
|
198
|
+
method: string = "GET",
|
|
199
|
+
params: None | string | Mapping | Iterable[tuple[Any, Any]] = None,
|
|
200
|
+
data: Any = None,
|
|
201
|
+
json: Any = None,
|
|
202
|
+
headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
|
|
203
|
+
follow_redirects: bool = True,
|
|
204
|
+
raise_for_status: bool = True,
|
|
205
|
+
*,
|
|
206
|
+
parse: Literal[False],
|
|
207
|
+
**request_kwargs,
|
|
208
|
+
) -> bytes:
|
|
209
|
+
...
|
|
210
|
+
@overload
|
|
211
|
+
def request(
|
|
212
|
+
url: string | SupportsGeturl | URL | Request,
|
|
213
|
+
method: string = "GET",
|
|
214
|
+
params: None | string | Mapping | Iterable[tuple[Any, Any]] = None,
|
|
215
|
+
data: Any = None,
|
|
216
|
+
json: Any = None,
|
|
217
|
+
headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
|
|
218
|
+
follow_redirects: bool = True,
|
|
219
|
+
raise_for_status: bool = True,
|
|
220
|
+
*,
|
|
221
|
+
parse: Literal[True],
|
|
222
|
+
**request_kwargs,
|
|
223
|
+
) -> bytes | str | dict | list | int | float | bool | None:
|
|
224
|
+
...
|
|
225
|
+
@overload
|
|
226
|
+
def request[T](
|
|
227
|
+
url: string | SupportsGeturl | URL | Request,
|
|
228
|
+
method: string = "GET",
|
|
229
|
+
params: None | string | Mapping | Iterable[tuple[Any, Any]] = None,
|
|
230
|
+
data: Any = None,
|
|
231
|
+
json: Any = None,
|
|
232
|
+
headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
|
|
233
|
+
follow_redirects: bool = True,
|
|
234
|
+
raise_for_status: bool = True,
|
|
235
|
+
*,
|
|
236
|
+
parse: Callable[[HTTPResponse, bytes], T] | Callable[[HTTPResponse], T],
|
|
237
|
+
**request_kwargs,
|
|
238
|
+
) -> T:
|
|
239
|
+
...
|
|
240
|
+
def request[T](
|
|
241
|
+
url: string | SupportsGeturl | URL | Request,
|
|
242
|
+
method: string = "GET",
|
|
243
|
+
params: None | string | Mapping | Iterable[tuple[Any, Any]] = None,
|
|
244
|
+
data: Any = None,
|
|
245
|
+
json: Any = None,
|
|
246
|
+
headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
|
|
247
|
+
follow_redirects: bool = True,
|
|
248
|
+
raise_for_status: bool = True,
|
|
249
|
+
*,
|
|
250
|
+
parse: None | EllipsisType| bool | Callable[[HTTPResponse, bytes], T] | Callable[[HTTPResponse], T] = None,
|
|
251
|
+
**request_kwargs,
|
|
252
|
+
) -> HTTPResponse | bytes | str | dict | list | int | float | bool | None | T:
|
|
253
|
+
try:
|
|
254
|
+
response = urlopen(
|
|
255
|
+
url=url,
|
|
256
|
+
method=method,
|
|
257
|
+
params=params,
|
|
258
|
+
data=data,
|
|
259
|
+
json=json,
|
|
260
|
+
headers=headers,
|
|
261
|
+
follow_redirects=follow_redirects,
|
|
262
|
+
**request_kwargs,
|
|
263
|
+
)
|
|
264
|
+
except HTTPError as e:
|
|
265
|
+
if raise_for_status:
|
|
266
|
+
raise
|
|
267
|
+
response = getattr(e, "file")
|
|
268
|
+
if parse is None:
|
|
269
|
+
return response
|
|
270
|
+
elif parse is ...:
|
|
271
|
+
response.close()
|
|
272
|
+
return response
|
|
273
|
+
with response:
|
|
274
|
+
if isinstance(parse, bool):
|
|
275
|
+
data = decompress_response(response)
|
|
276
|
+
if parse:
|
|
277
|
+
return parse_response(response, data)
|
|
278
|
+
return data
|
|
279
|
+
ac = argcount(parse)
|
|
280
|
+
if ac == 1:
|
|
281
|
+
return cast(Callable[[HTTPResponse], T], parse)(response)
|
|
282
|
+
else:
|
|
283
|
+
return cast(Callable[[HTTPResponse, bytes], T], parse)(
|
|
284
|
+
response, decompress_response(response))
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def download(
|
|
288
|
+
url: string | SupportsGeturl | URL | Request,
|
|
289
|
+
file: bytes | str | PathLike | SupportsWrite[bytes] = "",
|
|
290
|
+
resume: bool = False,
|
|
291
|
+
chunksize: int = COPY_BUFSIZE,
|
|
292
|
+
headers: None | Mapping[str, str] | Iterable[tuple[str, str]] = None,
|
|
293
|
+
make_reporthook: None | Callable[[None | int], Callable[[int], Any] | Generator[int, Any, Any]] = None,
|
|
294
|
+
**request_kwargs,
|
|
295
|
+
) -> str | SupportsWrite[bytes]:
|
|
296
|
+
"""Download a URL into a file.
|
|
297
|
+
|
|
298
|
+
Example::
|
|
299
|
+
|
|
300
|
+
1. use `make_reporthook` to show progress:
|
|
301
|
+
|
|
302
|
+
You can use the following function to show progress for the download task
|
|
303
|
+
|
|
304
|
+
.. code: python
|
|
305
|
+
|
|
306
|
+
from time import perf_counter
|
|
307
|
+
|
|
308
|
+
def progress(total=None):
|
|
309
|
+
read_num = 0
|
|
310
|
+
start_t = perf_counter()
|
|
311
|
+
while True:
|
|
312
|
+
read_num += yield
|
|
313
|
+
speed = read_num / 1024 / 1024 / (perf_counter() - start_t)
|
|
314
|
+
print(f"\r\x1b[K{read_num} / {total} | {speed:.2f} MB/s", end="", flush=True)
|
|
315
|
+
|
|
316
|
+
Or use the following function for more real-time speed
|
|
317
|
+
|
|
318
|
+
.. code: python
|
|
319
|
+
|
|
320
|
+
from collections import deque
|
|
321
|
+
from time import perf_counter
|
|
322
|
+
|
|
323
|
+
def progress(total=None):
|
|
324
|
+
dq = deque(maxlen=64)
|
|
325
|
+
read_num = 0
|
|
326
|
+
dq.append((read_num, perf_counter()))
|
|
327
|
+
while True:
|
|
328
|
+
read_num += yield
|
|
329
|
+
cur_t = perf_counter()
|
|
330
|
+
speed = (read_num - dq[0][0]) / 1024 / 1024 / (cur_t - dq[0][1])
|
|
331
|
+
print(f"\r\x1b[K{read_num} / {total} | {speed:.2f} MB/s", end="", flush=True)
|
|
332
|
+
dq.append((read_num, cur_t))
|
|
333
|
+
"""
|
|
334
|
+
if chunksize <= 0:
|
|
335
|
+
chunksize = COPY_BUFSIZE
|
|
336
|
+
headers = request_kwargs["headers"] = dict(headers or ())
|
|
337
|
+
headers["accept-encoding"] = "identity"
|
|
338
|
+
response: HTTPResponse = urlopen(url, **request_kwargs)
|
|
339
|
+
content_length = get_length(response)
|
|
340
|
+
if content_length == 0 and is_chunked(response):
|
|
341
|
+
content_length = None
|
|
342
|
+
fdst: SupportsWrite[bytes]
|
|
343
|
+
if hasattr(file, "write"):
|
|
344
|
+
file = fdst = cast(SupportsWrite[bytes], file)
|
|
345
|
+
else:
|
|
346
|
+
file = abspath(fsdecode(file))
|
|
347
|
+
if isdir(file):
|
|
348
|
+
file = joinpath(file, get_filename(response, "download"))
|
|
349
|
+
try:
|
|
350
|
+
fdst = open(file, "ab" if resume else "wb")
|
|
351
|
+
except FileNotFoundError:
|
|
352
|
+
makedirs(dirname(file), exist_ok=True)
|
|
353
|
+
fdst = open(file, "ab" if resume else "wb")
|
|
354
|
+
filesize = 0
|
|
355
|
+
if resume:
|
|
356
|
+
try:
|
|
357
|
+
fileno = getattr(fdst, "fileno")()
|
|
358
|
+
filesize = fstat(fileno).st_size
|
|
359
|
+
except (AttributeError, OSError):
|
|
360
|
+
pass
|
|
361
|
+
else:
|
|
362
|
+
if filesize == content_length:
|
|
363
|
+
return file
|
|
364
|
+
if filesize and is_range_request(response):
|
|
365
|
+
if filesize == content_length:
|
|
366
|
+
return file
|
|
367
|
+
elif content_length is not None and filesize > content_length:
|
|
368
|
+
raise OSError(
|
|
369
|
+
errno.EIO,
|
|
370
|
+
f"file {file!r} is larger than url {url!r}: {filesize} > {content_length} (in bytes)",
|
|
371
|
+
)
|
|
372
|
+
reporthook_close: None | Callable = None
|
|
373
|
+
if callable(make_reporthook):
|
|
374
|
+
reporthook = make_reporthook(content_length)
|
|
375
|
+
if isgenerator(reporthook):
|
|
376
|
+
reporthook_close = reporthook.close
|
|
377
|
+
next(reporthook)
|
|
378
|
+
reporthook = reporthook.send
|
|
379
|
+
else:
|
|
380
|
+
reporthook_close = getattr(reporthook, "close", None)
|
|
381
|
+
reporthook = cast(Callable[[int], Any], reporthook)
|
|
382
|
+
else:
|
|
383
|
+
reporthook = None
|
|
384
|
+
try:
|
|
385
|
+
if filesize:
|
|
386
|
+
if is_range_request(response):
|
|
387
|
+
response.close()
|
|
388
|
+
response = urlopen(url, headers={**headers, "Range": "bytes=%d-" % filesize}, **request_kwargs)
|
|
389
|
+
if not is_range_request(response):
|
|
390
|
+
raise OSError(errno.EIO, f"range request failed: {url!r}")
|
|
391
|
+
if reporthook is not None:
|
|
392
|
+
reporthook(filesize)
|
|
393
|
+
elif resume:
|
|
394
|
+
for _ in bio_skip_iter(response, filesize, callback=reporthook):
|
|
395
|
+
pass
|
|
396
|
+
|
|
397
|
+
fsrc_read = response.read
|
|
398
|
+
fdst_write = fdst.write
|
|
399
|
+
while (chunk := fsrc_read(chunksize)):
|
|
400
|
+
fdst_write(chunk)
|
|
401
|
+
if reporthook is not None:
|
|
402
|
+
reporthook(len(chunk))
|
|
403
|
+
finally:
|
|
404
|
+
response.close()
|
|
405
|
+
if callable(reporthook_close):
|
|
406
|
+
reporthook_close()
|
|
407
|
+
return file
|
|
408
|
+
|
|
@@ -1,372 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# coding: utf-8
|
|
3
|
-
|
|
4
|
-
__author__ = "ChenyangGao <https://chenyanggao.github.io>"
|
|
5
|
-
__version__ = (0, 0, 9)
|
|
6
|
-
__all__ = ["urlopen", "request", "download"]
|
|
7
|
-
|
|
8
|
-
import errno
|
|
9
|
-
|
|
10
|
-
from collections.abc import Callable, Generator, Iterable, Mapping, Sequence
|
|
11
|
-
from gzip import decompress as decompress_gzip
|
|
12
|
-
from http.client import HTTPResponse
|
|
13
|
-
from http.cookiejar import CookieJar
|
|
14
|
-
from inspect import isgenerator
|
|
15
|
-
from os import fsdecode, fstat, makedirs, PathLike
|
|
16
|
-
from os.path import abspath, dirname, isdir, join as joinpath
|
|
17
|
-
from re import compile as re_compile
|
|
18
|
-
from shutil import COPY_BUFSIZE # type: ignore
|
|
19
|
-
from socket import getdefaulttimeout, setdefaulttimeout
|
|
20
|
-
from ssl import SSLContext, _create_unverified_context
|
|
21
|
-
from string import punctuation
|
|
22
|
-
from types import EllipsisType
|
|
23
|
-
from typing import cast, Any
|
|
24
|
-
from urllib.error import HTTPError
|
|
25
|
-
from urllib.parse import quote, urlencode, urlsplit
|
|
26
|
-
from urllib.request import (
|
|
27
|
-
build_opener, BaseHandler, HTTPCookieProcessor, HTTPSHandler,
|
|
28
|
-
HTTPRedirectHandler, OpenerDirector, Request,
|
|
29
|
-
)
|
|
30
|
-
from zlib import compressobj, DEF_MEM_LEVEL, DEFLATED, MAX_WBITS
|
|
31
|
-
|
|
32
|
-
from argtools import argcount
|
|
33
|
-
from filewrap import bio_skip_iter, SupportsWrite
|
|
34
|
-
from http_response import get_filename, get_length, is_chunked, is_range_request
|
|
35
|
-
from orjson import dumps, loads
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
if "__del__" not in HTTPResponse.__dict__:
|
|
39
|
-
setattr(HTTPResponse, "__del__", HTTPResponse.close)
|
|
40
|
-
if "__del__" not in OpenerDirector.__dict__:
|
|
41
|
-
setattr(OpenerDirector, "__del__", OpenerDirector.close)
|
|
42
|
-
|
|
43
|
-
_opener: OpenerDirector = build_opener(HTTPSHandler(context=_create_unverified_context()))
|
|
44
|
-
CRE_search_charset = re_compile(r"\bcharset=(?P<charset>[^ ;]+)").search
|
|
45
|
-
|
|
46
|
-
if getdefaulttimeout() is None:
|
|
47
|
-
setdefaulttimeout(60)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
class NoRedirectHandler(HTTPRedirectHandler):
|
|
51
|
-
|
|
52
|
-
def redirect_request(self, req, fp, code, msg, headers, newurl):
|
|
53
|
-
return None
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def decompress_deflate(data: bytes, compresslevel: int = 9) -> bytes:
|
|
57
|
-
# Fork from: https://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations#answer-1089787
|
|
58
|
-
compress = compressobj(
|
|
59
|
-
compresslevel, # level: 0-9
|
|
60
|
-
DEFLATED, # method: must be DEFLATED
|
|
61
|
-
-MAX_WBITS, # window size in bits:
|
|
62
|
-
# -15..-8: negate, suppress header
|
|
63
|
-
# 8..15: normal
|
|
64
|
-
# 16..30: subtract 16, gzip header
|
|
65
|
-
DEF_MEM_LEVEL, # mem level: 1..8/9
|
|
66
|
-
0 # strategy:
|
|
67
|
-
# 0 = Z_DEFAULT_STRATEGY
|
|
68
|
-
# 1 = Z_FILTERED
|
|
69
|
-
# 2 = Z_HUFFMAN_ONLY
|
|
70
|
-
# 3 = Z_RLE
|
|
71
|
-
# 4 = Z_FIXED
|
|
72
|
-
)
|
|
73
|
-
deflated = compress.compress(data)
|
|
74
|
-
deflated += compress.flush()
|
|
75
|
-
return deflated
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
def get_charset(content_type: str, default="utf-8") -> str:
|
|
79
|
-
match = CRE_search_charset(content_type)
|
|
80
|
-
if match is None:
|
|
81
|
-
return "utf-8"
|
|
82
|
-
return match["charset"]
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def ensure_ascii_url(url: str, /) -> str:
|
|
86
|
-
if url.isascii():
|
|
87
|
-
return url
|
|
88
|
-
return quote(url, safe=punctuation)
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
def decompress_response(resp: HTTPResponse, /) -> bytes:
|
|
92
|
-
data = resp.read()
|
|
93
|
-
content_encoding = resp.headers.get("Content-Encoding")
|
|
94
|
-
match content_encoding:
|
|
95
|
-
case "gzip":
|
|
96
|
-
data = decompress_gzip(data)
|
|
97
|
-
case "deflate":
|
|
98
|
-
data = decompress_deflate(data)
|
|
99
|
-
case "br":
|
|
100
|
-
from brotli import decompress as decompress_br # type: ignore
|
|
101
|
-
data = decompress_br(data)
|
|
102
|
-
case "zstd":
|
|
103
|
-
from zstandard import decompress as decompress_zstd
|
|
104
|
-
data = decompress_zstd(data)
|
|
105
|
-
return data
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
def urlopen(
|
|
109
|
-
url: str | Request,
|
|
110
|
-
method: str = "GET",
|
|
111
|
-
params: None | str | Mapping | Sequence[tuple[Any, Any]] = None,
|
|
112
|
-
data: None | bytes | str | Mapping | Sequence[tuple[Any, Any]] | Iterable[bytes] = None,
|
|
113
|
-
json: Any = None,
|
|
114
|
-
headers: None | Mapping[str, str] = None,
|
|
115
|
-
timeout: None | int | float = None,
|
|
116
|
-
cookies: None | CookieJar = None,
|
|
117
|
-
proxy: None | tuple[str, str] = None,
|
|
118
|
-
context: None | SSLContext = None,
|
|
119
|
-
allow_redirects: bool = True,
|
|
120
|
-
opener: None | OpenerDirector = None,
|
|
121
|
-
origin: None | str = None,
|
|
122
|
-
) -> HTTPResponse:
|
|
123
|
-
if isinstance(url, str) and not urlsplit(url).scheme:
|
|
124
|
-
if origin:
|
|
125
|
-
if not url.startswith("/"):
|
|
126
|
-
url = "/" + url
|
|
127
|
-
url = origin + url
|
|
128
|
-
if params:
|
|
129
|
-
if not isinstance(params, str):
|
|
130
|
-
params = urlencode(params)
|
|
131
|
-
params = cast(None | str, params)
|
|
132
|
-
if json is not None:
|
|
133
|
-
if isinstance(json, bytes):
|
|
134
|
-
data = json
|
|
135
|
-
else:
|
|
136
|
-
data = dumps(json)
|
|
137
|
-
if headers:
|
|
138
|
-
headers = {**headers, "Content-type": "application/json; charset=UTF-8"}
|
|
139
|
-
else:
|
|
140
|
-
headers = {"Content-type": "application/json; charset=UTF-8"}
|
|
141
|
-
elif data is not None:
|
|
142
|
-
if isinstance(data, bytes):
|
|
143
|
-
pass
|
|
144
|
-
elif isinstance(data, str):
|
|
145
|
-
data = data.encode("utf-8")
|
|
146
|
-
elif isinstance(data, (Mapping, Sequence)):
|
|
147
|
-
data = urlencode(cast(Mapping | Sequence, data)).encode("latin-1")
|
|
148
|
-
if headers:
|
|
149
|
-
headers = {**headers, "Content-type": "application/x-www-form-urlencoded"}
|
|
150
|
-
else:
|
|
151
|
-
headers = {"Content-type": "application/x-www-form-urlencoded"}
|
|
152
|
-
data = cast(None | bytes | Iterable[bytes], data)
|
|
153
|
-
if isinstance(url, Request):
|
|
154
|
-
req = url
|
|
155
|
-
if params:
|
|
156
|
-
req.full_url += "?&"["?" in req.full_url] + params
|
|
157
|
-
if headers:
|
|
158
|
-
for key, val in headers.items():
|
|
159
|
-
req.add_header(key, val)
|
|
160
|
-
if data is not None:
|
|
161
|
-
req.data = data
|
|
162
|
-
req.method = method.upper()
|
|
163
|
-
else:
|
|
164
|
-
if headers:
|
|
165
|
-
headers = dict(headers)
|
|
166
|
-
else:
|
|
167
|
-
headers = {}
|
|
168
|
-
if params:
|
|
169
|
-
url += "?&"["?" in url] + params
|
|
170
|
-
req = Request(url, data=data, headers=headers, method=method.upper())
|
|
171
|
-
if proxy:
|
|
172
|
-
req.set_proxy(*proxy)
|
|
173
|
-
if opener is None:
|
|
174
|
-
handlers: list[BaseHandler] = []
|
|
175
|
-
if context is not None:
|
|
176
|
-
handlers.append(HTTPSHandler(context=context))
|
|
177
|
-
if cookies is not None:
|
|
178
|
-
handlers.append(HTTPCookieProcessor(cookies))
|
|
179
|
-
if not allow_redirects:
|
|
180
|
-
handlers.append(NoRedirectHandler())
|
|
181
|
-
if handlers:
|
|
182
|
-
if not isinstance(handlers[0], HTTPSHandler):
|
|
183
|
-
handlers.insert(0, HTTPSHandler(context=_create_unverified_context()))
|
|
184
|
-
opener = build_opener(*handlers)
|
|
185
|
-
else:
|
|
186
|
-
opener = _opener
|
|
187
|
-
req.full_url = ensure_ascii_url(req.full_url)
|
|
188
|
-
if timeout is None:
|
|
189
|
-
return opener.open(req)
|
|
190
|
-
else:
|
|
191
|
-
return opener.open(req, timeout=timeout)
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
def request(
|
|
195
|
-
url: str | Request,
|
|
196
|
-
method: str = "GET",
|
|
197
|
-
parse: None | EllipsisType | bool | Callable = None,
|
|
198
|
-
raise_for_status: bool = True,
|
|
199
|
-
timeout: None | float = 60,
|
|
200
|
-
allow_redirects: bool = True,
|
|
201
|
-
**request_kwargs,
|
|
202
|
-
):
|
|
203
|
-
request_kwargs.pop("stream", None)
|
|
204
|
-
try:
|
|
205
|
-
resp = urlopen(
|
|
206
|
-
url=url,
|
|
207
|
-
method=method,
|
|
208
|
-
timeout=timeout,
|
|
209
|
-
allow_redirects=allow_redirects,
|
|
210
|
-
**request_kwargs,
|
|
211
|
-
)
|
|
212
|
-
except HTTPError as e:
|
|
213
|
-
if raise_for_status:
|
|
214
|
-
raise
|
|
215
|
-
resp = getattr(e, "file")
|
|
216
|
-
if parse is None:
|
|
217
|
-
return resp
|
|
218
|
-
elif parse is ...:
|
|
219
|
-
resp.close()
|
|
220
|
-
return resp
|
|
221
|
-
with resp:
|
|
222
|
-
if isinstance(parse, bool):
|
|
223
|
-
data = decompress_response(resp)
|
|
224
|
-
if parse:
|
|
225
|
-
content_type = resp.headers.get("Content-Type", "")
|
|
226
|
-
if content_type == "application/json":
|
|
227
|
-
return loads(data)
|
|
228
|
-
elif content_type.startswith("application/json;"):
|
|
229
|
-
return loads(data.decode(get_charset(content_type)))
|
|
230
|
-
elif content_type.startswith("text/"):
|
|
231
|
-
return data.decode(get_charset(content_type))
|
|
232
|
-
return data
|
|
233
|
-
else:
|
|
234
|
-
ac = argcount(parse)
|
|
235
|
-
with resp:
|
|
236
|
-
if ac == 1:
|
|
237
|
-
return parse(resp)
|
|
238
|
-
else:
|
|
239
|
-
return parse(resp, decompress_response(resp))
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
def download(
|
|
243
|
-
url: str,
|
|
244
|
-
file: bytes | str | PathLike | SupportsWrite[bytes] = "",
|
|
245
|
-
resume: bool = False,
|
|
246
|
-
chunksize: int = COPY_BUFSIZE,
|
|
247
|
-
headers: None | Mapping[str, str] = None,
|
|
248
|
-
make_reporthook: None | Callable[[None | int], Callable[[int], Any] | Generator[int, Any, Any]] = None,
|
|
249
|
-
**urlopen_kwargs,
|
|
250
|
-
) -> str | SupportsWrite[bytes]:
|
|
251
|
-
"""Download a URL into a file.
|
|
252
|
-
|
|
253
|
-
Example::
|
|
254
|
-
|
|
255
|
-
1. use `make_reporthook` to show progress:
|
|
256
|
-
|
|
257
|
-
You can use the following function to show progress for the download task
|
|
258
|
-
|
|
259
|
-
.. code: python
|
|
260
|
-
|
|
261
|
-
from time import perf_counter
|
|
262
|
-
|
|
263
|
-
def progress(total=None):
|
|
264
|
-
read_num = 0
|
|
265
|
-
start_t = perf_counter()
|
|
266
|
-
while True:
|
|
267
|
-
read_num += yield
|
|
268
|
-
speed = read_num / 1024 / 1024 / (perf_counter() - start_t)
|
|
269
|
-
print(f"\r\x1b[K{read_num} / {total} | {speed:.2f} MB/s", end="", flush=True)
|
|
270
|
-
|
|
271
|
-
Or use the following function for more real-time speed
|
|
272
|
-
|
|
273
|
-
.. code: python
|
|
274
|
-
|
|
275
|
-
from collections import deque
|
|
276
|
-
from time import perf_counter
|
|
277
|
-
|
|
278
|
-
def progress(total=None):
|
|
279
|
-
dq = deque(maxlen=64)
|
|
280
|
-
read_num = 0
|
|
281
|
-
dq.append((read_num, perf_counter()))
|
|
282
|
-
while True:
|
|
283
|
-
read_num += yield
|
|
284
|
-
cur_t = perf_counter()
|
|
285
|
-
speed = (read_num - dq[0][0]) / 1024 / 1024 / (cur_t - dq[0][1])
|
|
286
|
-
print(f"\r\x1b[K{read_num} / {total} | {speed:.2f} MB/s", end="", flush=True)
|
|
287
|
-
dq.append((read_num, cur_t))
|
|
288
|
-
"""
|
|
289
|
-
if headers:
|
|
290
|
-
headers = {**headers, "Accept-encoding": "identity"}
|
|
291
|
-
else:
|
|
292
|
-
headers = {"Accept-encoding": "identity"}
|
|
293
|
-
|
|
294
|
-
if chunksize <= 0:
|
|
295
|
-
chunksize = COPY_BUFSIZE
|
|
296
|
-
|
|
297
|
-
resp: HTTPResponse = urlopen(url, headers=headers, **urlopen_kwargs)
|
|
298
|
-
content_length = get_length(resp)
|
|
299
|
-
if content_length == 0 and is_chunked(resp):
|
|
300
|
-
content_length = None
|
|
301
|
-
|
|
302
|
-
fdst: SupportsWrite[bytes]
|
|
303
|
-
if hasattr(file, "write"):
|
|
304
|
-
file = fdst = cast(SupportsWrite[bytes], file)
|
|
305
|
-
else:
|
|
306
|
-
file = abspath(fsdecode(file))
|
|
307
|
-
if isdir(file):
|
|
308
|
-
file = joinpath(file, get_filename(resp, "download"))
|
|
309
|
-
try:
|
|
310
|
-
fdst = open(file, "ab" if resume else "wb")
|
|
311
|
-
except FileNotFoundError:
|
|
312
|
-
makedirs(dirname(file), exist_ok=True)
|
|
313
|
-
fdst = open(file, "ab" if resume else "wb")
|
|
314
|
-
|
|
315
|
-
filesize = 0
|
|
316
|
-
if resume:
|
|
317
|
-
try:
|
|
318
|
-
fileno = getattr(fdst, "fileno")()
|
|
319
|
-
filesize = fstat(fileno).st_size
|
|
320
|
-
except (AttributeError, OSError):
|
|
321
|
-
pass
|
|
322
|
-
else:
|
|
323
|
-
if filesize == content_length:
|
|
324
|
-
return file
|
|
325
|
-
if filesize and is_range_request(resp):
|
|
326
|
-
if filesize == content_length:
|
|
327
|
-
return file
|
|
328
|
-
elif content_length is not None and filesize > content_length:
|
|
329
|
-
raise OSError(
|
|
330
|
-
errno.EIO,
|
|
331
|
-
f"file {file!r} is larger than url {url!r}: {filesize} > {content_length} (in bytes)",
|
|
332
|
-
)
|
|
333
|
-
|
|
334
|
-
reporthook_close: None | Callable = None
|
|
335
|
-
if callable(make_reporthook):
|
|
336
|
-
reporthook = make_reporthook(content_length)
|
|
337
|
-
if isgenerator(reporthook):
|
|
338
|
-
reporthook_close = reporthook.close
|
|
339
|
-
next(reporthook)
|
|
340
|
-
reporthook = reporthook.send
|
|
341
|
-
else:
|
|
342
|
-
reporthook_close = getattr(reporthook, "close", None)
|
|
343
|
-
reporthook = cast(Callable[[int], Any], reporthook)
|
|
344
|
-
else:
|
|
345
|
-
reporthook = None
|
|
346
|
-
|
|
347
|
-
try:
|
|
348
|
-
if filesize:
|
|
349
|
-
if is_range_request(resp):
|
|
350
|
-
resp.close()
|
|
351
|
-
resp = urlopen(url, headers={**headers, "Range": "bytes=%d-" % filesize}, **urlopen_kwargs)
|
|
352
|
-
if not is_range_request(resp):
|
|
353
|
-
raise OSError(errno.EIO, f"range request failed: {url!r}")
|
|
354
|
-
if reporthook is not None:
|
|
355
|
-
reporthook(filesize)
|
|
356
|
-
elif resume:
|
|
357
|
-
for _ in bio_skip_iter(resp, filesize, callback=reporthook):
|
|
358
|
-
pass
|
|
359
|
-
|
|
360
|
-
fsrc_read = resp.read
|
|
361
|
-
fdst_write = fdst.write
|
|
362
|
-
while (chunk := fsrc_read(chunksize)):
|
|
363
|
-
fdst_write(chunk)
|
|
364
|
-
if reporthook is not None:
|
|
365
|
-
reporthook(len(chunk))
|
|
366
|
-
finally:
|
|
367
|
-
resp.close()
|
|
368
|
-
if callable(reporthook_close):
|
|
369
|
-
reporthook_close()
|
|
370
|
-
|
|
371
|
-
return file
|
|
372
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|