python-urlopen 0.0.4__tar.gz → 0.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: python-urlopen
3
- Version: 0.0.4
3
+ Version: 0.0.5
4
4
  Summary: Python urlopen wrapper.
5
5
  Home-page: https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-urlopen
6
6
  License: MIT
@@ -22,6 +22,7 @@ Classifier: Topic :: Software Development
22
22
  Classifier: Topic :: Software Development :: Libraries
23
23
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
24
24
  Requires-Dist: http_response
25
+ Requires-Dist: python-argtools
25
26
  Requires-Dist: python-filewrap
26
27
  Project-URL: Repository, https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-urlopen
27
28
  Description-Content-Type: text/markdown
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "python-urlopen"
3
- version = "0.0.4"
3
+ version = "0.0.5"
4
4
  description = "Python urlopen wrapper."
5
5
  authors = ["ChenyangGao <wosiwujm@gmail.com>"]
6
6
  license = "MIT"
@@ -28,6 +28,7 @@ include = [
28
28
  [tool.poetry.dependencies]
29
29
  python = "^3.10"
30
30
  http_response = "*"
31
+ python-argtools = "*"
31
32
  python-filewrap = "*"
32
33
 
33
34
  [tool.poetry.scripts]
@@ -2,25 +2,28 @@
2
2
  # coding: utf-8
3
3
 
4
4
  __author__ = "ChenyangGao <https://chenyanggao.github.io>"
5
- __version__ = (0, 0, 4)
6
- __all__ = ["urlopen", "download"]
5
+ __version__ = (0, 0, 5)
6
+ __all__ = ["urlopen", "request", "download"]
7
7
 
8
8
  import errno
9
9
 
10
- from collections.abc import Callable, Generator, Mapping, Sequence
10
+ from collections.abc import Callable, Generator, Iterable, Mapping, Sequence
11
11
  from copy import copy
12
12
  from http.client import HTTPResponse
13
13
  from http.cookiejar import CookieJar
14
14
  from inspect import isgenerator
15
- from json import dumps
15
+ from json import dumps, loads
16
16
  from os import fsdecode, fstat, makedirs, PathLike
17
17
  from os.path import abspath, dirname, isdir, join as joinpath
18
+ from re import compile as re_compile
18
19
  from shutil import COPY_BUFSIZE # type: ignore
19
20
  from ssl import SSLContext, _create_unverified_context
20
21
  from typing import cast, Any
22
+ from urllib.error import HTTPError
21
23
  from urllib.parse import urlencode, urlsplit
22
24
  from urllib.request import build_opener, HTTPCookieProcessor, HTTPSHandler, OpenerDirector, Request
23
25
 
26
+ from argtools import argcount
24
27
  from filewrap import bio_skip_iter, SupportsRead, SupportsWrite
25
28
  from http_response import get_filename, get_length, is_chunked, is_range_request
26
29
 
@@ -28,12 +31,14 @@ from http_response import get_filename, get_length, is_chunked, is_range_request
28
31
  if "__del__" not in HTTPResponse.__dict__:
29
32
  setattr(HTTPResponse, "__del__", HTTPResponse.close)
30
33
 
34
+ CRE_search_charset = re_compile(r"\bcharset=(?P<charset>[^ ;]+)").search
35
+
31
36
 
32
37
  def urlopen(
33
38
  url: str | Request,
34
39
  method: str = "GET",
35
40
  params: None | str | Mapping | Sequence[tuple[Any, Any]] = None,
36
- data: None | bytes | str | Mapping | Sequence[tuple[Any, Any]] = None,
41
+ data: None | bytes | str | Mapping | Sequence[tuple[Any, Any]] | Iterable[bytes] = None,
37
42
  json: Any = None,
38
43
  headers: None | dict[str, str] = {"User-agent": ""},
39
44
  timeout: None | int | float = None,
@@ -66,9 +71,13 @@ def urlopen(
66
71
  pass
67
72
  elif isinstance(data, str):
68
73
  data = data.encode("utf-8")
69
- else:
70
- data = urlencode(data).encode("latin-1")
71
- data = cast(None | bytes, data)
74
+ elif isinstance(data, (Mapping, Sequence)):
75
+ data = urlencode(cast(Mapping | Sequence, data)).encode("latin-1")
76
+ if headers:
77
+ headers = {**headers, "Content-type": "application/x-www-form-urlencoded"}
78
+ else:
79
+ headers = {"Content-type": "application/x-www-form-urlencoded"}
80
+ data = cast(None | bytes | Iterable[bytes], data)
72
81
  if isinstance(url, Request):
73
82
  req = url
74
83
  if params:
@@ -97,6 +106,57 @@ def urlopen(
97
106
  return opener.open(req, timeout=timeout)
98
107
 
99
108
 
109
+ def get_charset(content_type: str, default="utf-8") -> str:
110
+ match = CRE_search_charset(content_type)
111
+ if match is None:
112
+ return "utf-8"
113
+ return match["charset"]
114
+
115
+
116
+ def request(
117
+ url: str | Request,
118
+ method: str = "GET",
119
+ parse: None | bool | Callable = None,
120
+ raise_for_status: bool = True,
121
+ timeout: None | float = 60,
122
+ **request_kwargs,
123
+ ):
124
+ request_kwargs.pop("stream", None)
125
+ try:
126
+ resp = urlopen(
127
+ url=url,
128
+ method=method,
129
+ timeout=timeout,
130
+ **request_kwargs,
131
+ )
132
+ except HTTPError as e:
133
+ if raise_for_status:
134
+ raise
135
+ resp = getattr(e, "file")
136
+ if parse is None:
137
+ return resp
138
+ with resp:
139
+ if parse is False:
140
+ return resp.read()
141
+ elif parse is True:
142
+ data = resp.read()
143
+ content_type = resp.headers.get("Content-Type", "")
144
+ if content_type == "application/json":
145
+ return loads(data)
146
+ elif content_type.startswith("application/json;"):
147
+ return loads(data.decode(get_charset(content_type)))
148
+ elif content_type.startswith("text/"):
149
+ return data.decode(get_charset(content_type))
150
+ return data
151
+ else:
152
+ ac = argcount(parse)
153
+ with resp:
154
+ if ac == 1:
155
+ return parse(resp)
156
+ else:
157
+ return parse(resp, resp.read())
158
+
159
+
100
160
  def download(
101
161
  url: str,
102
162
  file: bytes | str | PathLike | SupportsWrite[bytes] = "",
@@ -209,7 +269,7 @@ def download(
209
269
  resp = urlopen(url, headers={**headers, "Range": "bytes=%d-" % filesize}, **urlopen_kwargs)
210
270
  if not is_range_request(resp):
211
271
  raise OSError(errno.EIO, f"range request failed: {url!r}")
212
- if reporthook:
272
+ if reporthook is not None:
213
273
  reporthook(filesize)
214
274
  elif resume:
215
275
  for _ in bio_skip_iter(resp, filesize, callback=reporthook):
@@ -219,7 +279,7 @@ def download(
219
279
  fdst_write = fdst.write
220
280
  while (chunk := fsrc_read(chunksize)):
221
281
  fdst_write(chunk)
222
- if reporthook:
282
+ if reporthook is not None:
223
283
  reporthook(len(chunk))
224
284
  finally:
225
285
  resp.close()
File without changes
File without changes