python-urlopen 0.0.8__tar.gz → 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,32 +1,36 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: python-urlopen
3
- Version: 0.0.8
3
+ Version: 0.1.0
4
4
  Summary: Python urlopen wrapper.
5
- Home-page: https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-urlopen
5
+ Home-page: https://github.com/ChenyangGao/python-modules/tree/main/python-urlopen
6
6
  License: MIT
7
7
  Keywords: urlopen
8
8
  Author: ChenyangGao
9
9
  Author-email: wosiwujm@gmail.com
10
- Requires-Python: >=3.10,<4.0
10
+ Requires-Python: >=3.12,<4.0
11
11
  Classifier: Development Status :: 5 - Production/Stable
12
12
  Classifier: Intended Audience :: Developers
13
13
  Classifier: License :: OSI Approved :: MIT License
14
14
  Classifier: Operating System :: OS Independent
15
15
  Classifier: Programming Language :: Python
16
16
  Classifier: Programming Language :: Python :: 3
17
- Classifier: Programming Language :: Python :: 3.10
18
- Classifier: Programming Language :: Python :: 3.11
19
17
  Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
20
19
  Classifier: Programming Language :: Python :: 3 :: Only
21
20
  Classifier: Topic :: Software Development
22
21
  Classifier: Topic :: Software Development :: Libraries
23
22
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
24
23
  Requires-Dist: brotli
25
- Requires-Dist: http_response
26
- Requires-Dist: python-argtools
27
- Requires-Dist: python-filewrap
24
+ Requires-Dist: http_response (>=0.0.5)
25
+ Requires-Dist: python-argtools (>=0.0.2)
26
+ Requires-Dist: python-dicttools (>=0.0.1)
27
+ Requires-Dist: python-ensure (>=0.0.1)
28
+ Requires-Dist: python-filewrap (>=0.2.8)
29
+ Requires-Dist: python-http_request (>=0.0.9)
30
+ Requires-Dist: python-undefined (>=0.0.3)
31
+ Requires-Dist: yarl
28
32
  Requires-Dist: zstandard
29
- Project-URL: Repository, https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-urlopen
33
+ Project-URL: Repository, https://github.com/ChenyangGao/python-modules/tree/main/python-urlopen
30
34
  Description-Content-Type: text/markdown
31
35
 
32
36
  # Python urlopen wrapper.
@@ -1,19 +1,19 @@
1
1
  [tool.poetry]
2
2
  name = "python-urlopen"
3
- version = "0.0.8"
3
+ version = "0.1.0"
4
4
  description = "Python urlopen wrapper."
5
5
  authors = ["ChenyangGao <wosiwujm@gmail.com>"]
6
6
  license = "MIT"
7
7
  readme = "readme.md"
8
- homepage = "https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-urlopen"
9
- repository = "https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-urlopen"
8
+ homepage = "https://github.com/ChenyangGao/python-modules/tree/main/python-urlopen"
9
+ repository = "https://github.com/ChenyangGao/python-modules/tree/main/python-urlopen"
10
10
  keywords = ["urlopen"]
11
11
  classifiers = [
12
12
  "License :: OSI Approved :: MIT License",
13
13
  "Development Status :: 5 - Production/Stable",
14
14
  "Programming Language :: Python",
15
15
  "Programming Language :: Python :: 3",
16
- "Programming Language :: Python :: 3.10",
16
+ "Programming Language :: Python :: 3.12",
17
17
  "Programming Language :: Python :: 3 :: Only",
18
18
  "Operating System :: OS Independent",
19
19
  "Intended Audience :: Developers",
@@ -26,11 +26,16 @@ include = [
26
26
  ]
27
27
 
28
28
  [tool.poetry.dependencies]
29
- python = "^3.10"
29
+ python = "^3.12"
30
30
  brotli = "*"
31
- http_response = "*"
32
- python-argtools = "*"
33
- python-filewrap = "*"
31
+ http_response = ">=0.0.5"
32
+ python-argtools = ">=0.0.2"
33
+ python-dicttools = ">=0.0.1"
34
+ python-ensure = ">=0.0.1"
35
+ python-filewrap = ">=0.2.8"
36
+ python-http_request = ">=0.0.9"
37
+ python-undefined = ">=0.0.3"
38
+ yarl = "*"
34
39
  zstandard = "*"
35
40
 
36
41
  [tool.poetry.scripts]
@@ -2,49 +2,63 @@
2
2
  # coding: utf-8
3
3
 
4
4
  __author__ = "ChenyangGao <https://chenyanggao.github.io>"
5
- __version__ = (0, 0, 8)
5
+ __version__ = (0, 1, 0)
6
6
  __all__ = ["urlopen", "request", "download"]
7
7
 
8
8
  import errno
9
9
 
10
- from collections.abc import Callable, Generator, Iterable, Mapping, Sequence
11
- from copy import copy
10
+ from collections import UserString
11
+ from collections.abc import Buffer, Callable, Generator, Iterable, Mapping, Sequence
12
12
  from gzip import decompress as decompress_gzip
13
13
  from http.client import HTTPResponse
14
14
  from http.cookiejar import CookieJar
15
15
  from inspect import isgenerator
16
- from json import dumps, loads
17
16
  from os import fsdecode, fstat, makedirs, PathLike
18
17
  from os.path import abspath, dirname, isdir, join as joinpath
19
- from re import compile as re_compile
20
18
  from shutil import COPY_BUFSIZE # type: ignore
21
19
  from socket import getdefaulttimeout, setdefaulttimeout
22
20
  from ssl import SSLContext, _create_unverified_context
23
- from string import punctuation
24
21
  from types import EllipsisType
25
- from typing import cast, Any, Literal
22
+ from typing import cast, overload, Any, Literal
26
23
  from urllib.error import HTTPError
27
- from urllib.parse import quote, urlencode, urlsplit
28
- from urllib.request import build_opener, HTTPCookieProcessor, HTTPSHandler, OpenerDirector, Request
24
+ from urllib.request import (
25
+ build_opener, BaseHandler, HTTPCookieProcessor, HTTPSHandler,
26
+ HTTPRedirectHandler, OpenerDirector, Request,
27
+ )
29
28
  from zlib import compressobj, DEF_MEM_LEVEL, DEFLATED, MAX_WBITS
30
29
 
31
30
  from argtools import argcount
32
- from filewrap import bio_skip_iter, SupportsWrite
33
- from http_response import get_filename, get_length, is_chunked, is_range_request
31
+ from dicttools import iter_items
32
+ from ensure import ensure_buffer
33
+ from filewrap import bio_skip_iter, bio_chunk_iter, SupportsRead, SupportsWrite
34
+ from http_request import normalize_request_args, SupportsGeturl
35
+ from http_response import (
36
+ get_filename, get_length, is_chunked, is_range_request,
37
+ parse_response,
38
+ )
39
+ from yarl import URL
40
+ from undefined import undefined, Undefined
34
41
 
35
42
 
43
+ type string = Buffer | str | UserString
44
+
36
45
  if "__del__" not in HTTPResponse.__dict__:
37
46
  setattr(HTTPResponse, "__del__", HTTPResponse.close)
38
47
  if "__del__" not in OpenerDirector.__dict__:
39
48
  setattr(OpenerDirector, "__del__", OpenerDirector.close)
40
49
 
41
- _opener: None | OpenerDirector = None
42
- CRE_search_charset = re_compile(r"\bcharset=(?P<charset>[^ ;]+)").search
50
+ _opener: OpenerDirector = build_opener(HTTPSHandler(context=_create_unverified_context()))
51
+
43
52
 
44
53
  if getdefaulttimeout() is None:
45
54
  setdefaulttimeout(60)
46
55
 
47
56
 
57
+ class NoRedirectHandler(HTTPRedirectHandler):
58
+ def redirect_request(self, /, *args, **kwds):
59
+ return None
60
+
61
+
48
62
  def decompress_deflate(data: bytes, compresslevel: int = 9) -> bytes:
49
63
  # Fork from: https://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations#answer-1089787
50
64
  compress = compressobj(
@@ -67,22 +81,9 @@ def decompress_deflate(data: bytes, compresslevel: int = 9) -> bytes:
67
81
  return deflated
68
82
 
69
83
 
70
- def get_charset(content_type: str, default="utf-8") -> str:
71
- match = CRE_search_charset(content_type)
72
- if match is None:
73
- return "utf-8"
74
- return match["charset"]
75
-
76
-
77
- def ensure_ascii_url(url: str, /) -> str:
78
- if url.isascii():
79
- return url
80
- return quote(url, safe=punctuation)
81
-
82
-
83
- def decompress_response(resp: HTTPResponse, /) -> bytes:
84
- data = resp.read()
85
- content_encoding = resp.headers.get("Content-Encoding")
84
+ def decompress_response(response: HTTPResponse, /) -> bytes:
85
+ data = response.read()
86
+ content_encoding = response.headers.get("content-encoding")
86
87
  match content_encoding:
87
88
  case "gzip":
88
89
  data = decompress_gzip(data)
@@ -98,142 +99,174 @@ def decompress_response(resp: HTTPResponse, /) -> bytes:
98
99
 
99
100
 
100
101
  def urlopen(
101
- url: str | Request,
102
- method: str = "GET",
103
- params: None | str | Mapping | Sequence[tuple[Any, Any]] = None,
104
- data: None | bytes | str | Mapping | Sequence[tuple[Any, Any]] | Iterable[bytes] = None,
102
+ url: string | SupportsGeturl | URL | Request,
103
+ method: string = "GET",
104
+ params: None | string | Mapping | Sequence[tuple[Any, Any]] = None,
105
+ data: Any = None,
105
106
  json: Any = None,
106
- headers: None | Mapping[str, str] = None,
107
- timeout: None | int | float = None,
108
- cookies: None | CookieJar = None,
109
- proxy: None | tuple[str, str] = None,
107
+ headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
108
+ follow_redirects: bool = True,
109
+ proxies: None | Mapping[str, str] | Iterable[tuple[str, str]] = None,
110
110
  context: None | SSLContext = None,
111
+ cookies: None | CookieJar = None,
112
+ timeout: None | Undefined | float = undefined,
111
113
  opener: None | OpenerDirector = None,
112
- origin: None | str = None,
114
+ **_,
113
115
  ) -> HTTPResponse:
114
- global _opener
115
- if isinstance(url, str) and not urlsplit(url).scheme:
116
- if origin:
117
- if not url.startswith("/"):
118
- url = "/" + url
119
- url = origin + url
120
- if params:
121
- if not isinstance(params, str):
122
- params = urlencode(params)
123
- params = cast(None | str, params)
124
- if json is not None:
125
- if isinstance(json, bytes):
126
- data = json
127
- else:
128
- data = dumps(json).encode("utf-8")
129
- if headers:
130
- headers = {**headers, "Content-type": "application/json"}
131
- else:
132
- headers = {"Content-type": "application/json"}
133
- elif data is not None:
134
- if isinstance(data, bytes):
135
- pass
136
- elif isinstance(data, str):
137
- data = data.encode("utf-8")
138
- elif isinstance(data, (Mapping, Sequence)):
139
- data = urlencode(cast(Mapping | Sequence, data)).encode("latin-1")
140
- if headers:
141
- headers = {**headers, "Content-type": "application/x-www-form-urlencoded"}
142
- else:
143
- headers = {"Content-type": "application/x-www-form-urlencoded"}
144
- data = cast(None | bytes | Iterable[bytes], data)
145
116
  if isinstance(url, Request):
146
- req = url
147
- if params:
148
- req.full_url += "?&"["?" in req.full_url] + params
149
- if headers:
150
- for key, val in headers.items():
151
- req.add_header(key, val)
152
- if data is not None:
153
- req.data = data
154
- req.method = method.upper()
117
+ request = url
155
118
  else:
156
- if headers:
157
- headers = dict(headers)
158
- else:
159
- headers = {}
160
- if params:
161
- url += "?&"["?" in url] + params
162
- req = Request(url, data=data, headers=headers, method=method.upper())
163
- if proxy:
164
- req.set_proxy(*proxy)
119
+ if isinstance(data, PathLike):
120
+ data = bio_chunk_iter(open(data, "rb"))
121
+ elif isinstance(data, SupportsRead):
122
+ data = map(ensure_buffer, bio_chunk_iter(data))
123
+ request = Request(**normalize_request_args( # type: ignore
124
+ method=method,
125
+ url=url,
126
+ params=params,
127
+ data=data,
128
+ json=json,
129
+ headers=headers,
130
+ ensure_ascii=True,
131
+ ))
132
+ if proxies:
133
+ for host, type in iter_items(proxies):
134
+ request.set_proxy(host, type)
165
135
  if opener is None:
166
- if _opener is None:
167
- opener = _opener = build_opener(HTTPSHandler(context=_create_unverified_context()))
168
- else:
169
- opener = _opener
170
- if context is not None or cookies is not None:
171
- opener = copy(opener)
136
+ handlers: list[BaseHandler] = []
172
137
  if context is not None:
173
- opener.add_handler(HTTPSHandler(context=context))
138
+ handlers.append(HTTPSHandler(context=context))
174
139
  if cookies is not None:
175
- opener.add_handler(HTTPCookieProcessor(cookies))
176
- req.full_url = ensure_ascii_url(req.full_url)
177
- if timeout is None:
178
- return opener.open(req)
140
+ handlers.append(HTTPCookieProcessor(cookies))
141
+ if not follow_redirects:
142
+ handlers.append(NoRedirectHandler())
143
+ if handlers:
144
+ if not isinstance(handlers[0], HTTPSHandler):
145
+ handlers.insert(0, HTTPSHandler(context=_create_unverified_context()))
146
+ opener = build_opener(*handlers)
147
+ else:
148
+ opener = _opener
149
+ if timeout is undefined:
150
+ return opener.open(request)
179
151
  else:
180
- return opener.open(req, timeout=timeout)
152
+ return opener.open(request, timeout=cast(None|float, timeout))
181
153
 
182
154
 
155
+ @overload
183
156
  def request(
184
- url: str | Request,
185
- method: str = "GET",
186
- parse: None | EllipsisType | bool | Callable = None,
157
+ url: string | SupportsGeturl | URL | Request,
158
+ method: string = "GET",
159
+ params: None | string | Mapping | Sequence[tuple[Any, Any]] = None,
160
+ data: Any = None,
161
+ json: Any = None,
162
+ headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
163
+ follow_redirects: bool = True,
164
+ raise_for_status: bool = True,
165
+ *,
166
+ parse: None | EllipsisType = None,
167
+ **request_kwargs,
168
+ ) -> HTTPResponse:
169
+ ...
170
+ @overload
171
+ def request(
172
+ url: string | SupportsGeturl | URL | Request,
173
+ method: string = "GET",
174
+ params: None | string | Mapping | Sequence[tuple[Any, Any]] = None,
175
+ data: Any = None,
176
+ json: Any = None,
177
+ headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
178
+ follow_redirects: bool = True,
179
+ raise_for_status: bool = True,
180
+ *,
181
+ parse: Literal[False],
182
+ **request_kwargs,
183
+ ) -> bytes:
184
+ ...
185
+ @overload
186
+ def request(
187
+ url: string | SupportsGeturl | URL | Request,
188
+ method: string = "GET",
189
+ params: None | string | Mapping | Sequence[tuple[Any, Any]] = None,
190
+ data: Any = None,
191
+ json: Any = None,
192
+ headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
193
+ follow_redirects: bool = True,
187
194
  raise_for_status: bool = True,
188
- timeout: None | float = 60,
195
+ *,
196
+ parse: Literal[True],
189
197
  **request_kwargs,
190
- ):
191
- request_kwargs.pop("stream", None)
198
+ ) -> bytes | str | dict | list | int | float | bool | None:
199
+ ...
200
+ @overload
201
+ def request[T](
202
+ url: string | SupportsGeturl | URL | Request,
203
+ method: string = "GET",
204
+ params: None | string | Mapping | Sequence[tuple[Any, Any]] = None,
205
+ data: Any = None,
206
+ json: Any = None,
207
+ headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
208
+ follow_redirects: bool = True,
209
+ raise_for_status: bool = True,
210
+ *,
211
+ parse: Callable[[HTTPResponse, bytes], T] | Callable[[HTTPResponse], T],
212
+ **request_kwargs,
213
+ ) -> T:
214
+ ...
215
+ def request[T](
216
+ url: string | SupportsGeturl | URL | Request,
217
+ method: string = "GET",
218
+ params: None | string | Mapping | Sequence[tuple[Any, Any]] = None,
219
+ data: Any = None,
220
+ json: Any = None,
221
+ headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
222
+ follow_redirects: bool = True,
223
+ raise_for_status: bool = True,
224
+ *,
225
+ parse: None | EllipsisType| bool | Callable[[HTTPResponse, bytes], T] | Callable[[HTTPResponse], T] = None,
226
+ **request_kwargs,
227
+ ) -> HTTPResponse | bytes | str | dict | list | int | float | bool | None | T:
192
228
  try:
193
- resp = urlopen(
229
+ response = urlopen(
194
230
  url=url,
195
231
  method=method,
196
- timeout=timeout,
232
+ params=params,
233
+ data=data,
234
+ json=json,
235
+ headers=headers,
236
+ follow_redirects=follow_redirects,
197
237
  **request_kwargs,
198
238
  )
199
239
  except HTTPError as e:
200
240
  if raise_for_status:
201
241
  raise
202
- resp = getattr(e, "file")
242
+ response = getattr(e, "file")
203
243
  if parse is None:
204
- return resp
244
+ return response
205
245
  elif parse is ...:
206
- resp.close()
207
- return resp
208
- with resp:
246
+ response.close()
247
+ return response
248
+ with response:
209
249
  if isinstance(parse, bool):
210
- data = decompress_response(resp)
250
+ data = decompress_response(response)
211
251
  if parse:
212
- content_type = resp.headers.get("Content-Type", "")
213
- if content_type == "application/json":
214
- return loads(data)
215
- elif content_type.startswith("application/json;"):
216
- return loads(data.decode(get_charset(content_type)))
217
- elif content_type.startswith("text/"):
218
- return data.decode(get_charset(content_type))
252
+ return parse_response(response, data)
219
253
  return data
254
+ ac = argcount(parse)
255
+ if ac == 1:
256
+ return cast(Callable[[HTTPResponse], T], parse)(response)
220
257
  else:
221
- ac = argcount(parse)
222
- with resp:
223
- if ac == 1:
224
- return parse(resp)
225
- else:
226
- return parse(resp, decompress_response(resp))
258
+ return cast(Callable[[HTTPResponse, bytes], T], parse)(
259
+ response, decompress_response(response))
227
260
 
228
261
 
229
262
  def download(
230
- url: str,
263
+ url: string | SupportsGeturl | URL | Request,
231
264
  file: bytes | str | PathLike | SupportsWrite[bytes] = "",
232
265
  resume: bool = False,
233
266
  chunksize: int = COPY_BUFSIZE,
234
- headers: None | Mapping[str, str] = None,
267
+ headers: None | Mapping[str, str] | Iterable[tuple[str, str]] = None,
235
268
  make_reporthook: None | Callable[[None | int], Callable[[int], Any] | Generator[int, Any, Any]] = None,
236
- **urlopen_kwargs,
269
+ **request_kwargs,
237
270
  ) -> str | SupportsWrite[bytes]:
238
271
  """Download a URL into a file.
239
272
 
@@ -273,17 +306,14 @@ def download(
273
306
  print(f"\r\x1b[K{read_num} / {total} | {speed:.2f} MB/s", end="", flush=True)
274
307
  dq.append((read_num, cur_t))
275
308
  """
276
- if headers:
277
- headers = {**headers, "Accept-encoding": "identity"}
278
- else:
279
- headers = {"Accept-encoding": "identity"}
280
-
281
309
  if chunksize <= 0:
282
310
  chunksize = COPY_BUFSIZE
311
+ headers = request_kwargs["headers"] = dict(headers or ())
312
+ headers["accept-encoding"] = "identity"
283
313
 
284
- resp: HTTPResponse = urlopen(url, headers=headers, **urlopen_kwargs)
285
- content_length = get_length(resp)
286
- if content_length == 0 and is_chunked(resp):
314
+ response: HTTPResponse = urlopen(url, **request_kwargs)
315
+ content_length = get_length(response)
316
+ if content_length == 0 and is_chunked(response):
287
317
  content_length = None
288
318
 
289
319
  fdst: SupportsWrite[bytes]
@@ -292,7 +322,7 @@ def download(
292
322
  else:
293
323
  file = abspath(fsdecode(file))
294
324
  if isdir(file):
295
- file = joinpath(file, get_filename(resp, "download"))
325
+ file = joinpath(file, get_filename(response, "download"))
296
326
  try:
297
327
  fdst = open(file, "ab" if resume else "wb")
298
328
  except FileNotFoundError:
@@ -309,7 +339,7 @@ def download(
309
339
  else:
310
340
  if filesize == content_length:
311
341
  return file
312
- if filesize and is_range_request(resp):
342
+ if filesize and is_range_request(response):
313
343
  if filesize == content_length:
314
344
  return file
315
345
  elif content_length is not None and filesize > content_length:
@@ -333,25 +363,25 @@ def download(
333
363
 
334
364
  try:
335
365
  if filesize:
336
- if is_range_request(resp):
337
- resp.close()
338
- resp = urlopen(url, headers={**headers, "Range": "bytes=%d-" % filesize}, **urlopen_kwargs)
339
- if not is_range_request(resp):
366
+ if is_range_request(response):
367
+ response.close()
368
+ response = urlopen(url, headers={**headers, "Range": "bytes=%d-" % filesize}, **request_kwargs)
369
+ if not is_range_request(response):
340
370
  raise OSError(errno.EIO, f"range request failed: {url!r}")
341
371
  if reporthook is not None:
342
372
  reporthook(filesize)
343
373
  elif resume:
344
- for _ in bio_skip_iter(resp, filesize, callback=reporthook):
374
+ for _ in bio_skip_iter(response, filesize, callback=reporthook):
345
375
  pass
346
376
 
347
- fsrc_read = resp.read
377
+ fsrc_read = response.read
348
378
  fdst_write = fdst.write
349
379
  while (chunk := fsrc_read(chunksize)):
350
380
  fdst_write(chunk)
351
381
  if reporthook is not None:
352
382
  reporthook(len(chunk))
353
383
  finally:
354
- resp.close()
384
+ response.close()
355
385
  if callable(reporthook_close):
356
386
  reporthook_close()
357
387
 
File without changes
File without changes