python-urlopen 0.0.9__tar.gz → 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,21 +1,19 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: python-urlopen
3
- Version: 0.0.9
3
+ Version: 0.1.0
4
4
  Summary: Python urlopen wrapper.
5
- Home-page: https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-urlopen
5
+ Home-page: https://github.com/ChenyangGao/python-modules/tree/main/python-urlopen
6
6
  License: MIT
7
7
  Keywords: urlopen
8
8
  Author: ChenyangGao
9
9
  Author-email: wosiwujm@gmail.com
10
- Requires-Python: >=3.10,<4.0
10
+ Requires-Python: >=3.12,<4.0
11
11
  Classifier: Development Status :: 5 - Production/Stable
12
12
  Classifier: Intended Audience :: Developers
13
13
  Classifier: License :: OSI Approved :: MIT License
14
14
  Classifier: Operating System :: OS Independent
15
15
  Classifier: Programming Language :: Python
16
16
  Classifier: Programming Language :: Python :: 3
17
- Classifier: Programming Language :: Python :: 3.10
18
- Classifier: Programming Language :: Python :: 3.11
19
17
  Classifier: Programming Language :: Python :: 3.12
20
18
  Classifier: Programming Language :: Python :: 3.13
21
19
  Classifier: Programming Language :: Python :: 3 :: Only
@@ -23,12 +21,16 @@ Classifier: Topic :: Software Development
23
21
  Classifier: Topic :: Software Development :: Libraries
24
22
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
25
23
  Requires-Dist: brotli
26
- Requires-Dist: http_response
27
- Requires-Dist: orjson
28
- Requires-Dist: python-argtools
29
- Requires-Dist: python-filewrap
24
+ Requires-Dist: http_response (>=0.0.5)
25
+ Requires-Dist: python-argtools (>=0.0.2)
26
+ Requires-Dist: python-dicttools (>=0.0.1)
27
+ Requires-Dist: python-ensure (>=0.0.1)
28
+ Requires-Dist: python-filewrap (>=0.2.8)
29
+ Requires-Dist: python-http_request (>=0.0.9)
30
+ Requires-Dist: python-undefined (>=0.0.3)
31
+ Requires-Dist: yarl
30
32
  Requires-Dist: zstandard
31
- Project-URL: Repository, https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-urlopen
33
+ Project-URL: Repository, https://github.com/ChenyangGao/python-modules/tree/main/python-urlopen
32
34
  Description-Content-Type: text/markdown
33
35
 
34
36
  # Python urlopen wrapper.
@@ -1,19 +1,19 @@
1
1
  [tool.poetry]
2
2
  name = "python-urlopen"
3
- version = "0.0.9"
3
+ version = "0.1.0"
4
4
  description = "Python urlopen wrapper."
5
5
  authors = ["ChenyangGao <wosiwujm@gmail.com>"]
6
6
  license = "MIT"
7
7
  readme = "readme.md"
8
- homepage = "https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-urlopen"
9
- repository = "https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-urlopen"
8
+ homepage = "https://github.com/ChenyangGao/python-modules/tree/main/python-urlopen"
9
+ repository = "https://github.com/ChenyangGao/python-modules/tree/main/python-urlopen"
10
10
  keywords = ["urlopen"]
11
11
  classifiers = [
12
12
  "License :: OSI Approved :: MIT License",
13
13
  "Development Status :: 5 - Production/Stable",
14
14
  "Programming Language :: Python",
15
15
  "Programming Language :: Python :: 3",
16
- "Programming Language :: Python :: 3.10",
16
+ "Programming Language :: Python :: 3.12",
17
17
  "Programming Language :: Python :: 3 :: Only",
18
18
  "Operating System :: OS Independent",
19
19
  "Intended Audience :: Developers",
@@ -26,12 +26,16 @@ include = [
26
26
  ]
27
27
 
28
28
  [tool.poetry.dependencies]
29
- python = "^3.10"
29
+ python = "^3.12"
30
30
  brotli = "*"
31
- http_response = "*"
32
- orjson = "*"
33
- python-argtools = "*"
34
- python-filewrap = "*"
31
+ http_response = ">=0.0.5"
32
+ python-argtools = ">=0.0.2"
33
+ python-dicttools = ">=0.0.1"
34
+ python-ensure = ">=0.0.1"
35
+ python-filewrap = ">=0.2.8"
36
+ python-http_request = ">=0.0.9"
37
+ python-undefined = ">=0.0.3"
38
+ yarl = "*"
35
39
  zstandard = "*"
36
40
 
37
41
  [tool.poetry.scripts]
@@ -2,27 +2,25 @@
2
2
  # coding: utf-8
3
3
 
4
4
  __author__ = "ChenyangGao <https://chenyanggao.github.io>"
5
- __version__ = (0, 0, 9)
5
+ __version__ = (0, 1, 0)
6
6
  __all__ = ["urlopen", "request", "download"]
7
7
 
8
8
  import errno
9
9
 
10
- from collections.abc import Callable, Generator, Iterable, Mapping, Sequence
10
+ from collections import UserString
11
+ from collections.abc import Buffer, Callable, Generator, Iterable, Mapping, Sequence
11
12
  from gzip import decompress as decompress_gzip
12
13
  from http.client import HTTPResponse
13
14
  from http.cookiejar import CookieJar
14
15
  from inspect import isgenerator
15
16
  from os import fsdecode, fstat, makedirs, PathLike
16
17
  from os.path import abspath, dirname, isdir, join as joinpath
17
- from re import compile as re_compile
18
18
  from shutil import COPY_BUFSIZE # type: ignore
19
19
  from socket import getdefaulttimeout, setdefaulttimeout
20
20
  from ssl import SSLContext, _create_unverified_context
21
- from string import punctuation
22
21
  from types import EllipsisType
23
- from typing import cast, Any
22
+ from typing import cast, overload, Any, Literal
24
23
  from urllib.error import HTTPError
25
- from urllib.parse import quote, urlencode, urlsplit
26
24
  from urllib.request import (
27
25
  build_opener, BaseHandler, HTTPCookieProcessor, HTTPSHandler,
28
26
  HTTPRedirectHandler, OpenerDirector, Request,
@@ -30,10 +28,19 @@ from urllib.request import (
30
28
  from zlib import compressobj, DEF_MEM_LEVEL, DEFLATED, MAX_WBITS
31
29
 
32
30
  from argtools import argcount
33
- from filewrap import bio_skip_iter, SupportsWrite
34
- from http_response import get_filename, get_length, is_chunked, is_range_request
35
- from orjson import dumps, loads
31
+ from dicttools import iter_items
32
+ from ensure import ensure_buffer
33
+ from filewrap import bio_skip_iter, bio_chunk_iter, SupportsRead, SupportsWrite
34
+ from http_request import normalize_request_args, SupportsGeturl
35
+ from http_response import (
36
+ get_filename, get_length, is_chunked, is_range_request,
37
+ parse_response,
38
+ )
39
+ from yarl import URL
40
+ from undefined import undefined, Undefined
41
+
36
42
 
43
+ type string = Buffer | str | UserString
37
44
 
38
45
  if "__del__" not in HTTPResponse.__dict__:
39
46
  setattr(HTTPResponse, "__del__", HTTPResponse.close)
@@ -41,15 +48,14 @@ if "__del__" not in OpenerDirector.__dict__:
41
48
  setattr(OpenerDirector, "__del__", OpenerDirector.close)
42
49
 
43
50
  _opener: OpenerDirector = build_opener(HTTPSHandler(context=_create_unverified_context()))
44
- CRE_search_charset = re_compile(r"\bcharset=(?P<charset>[^ ;]+)").search
51
+
45
52
 
46
53
  if getdefaulttimeout() is None:
47
54
  setdefaulttimeout(60)
48
55
 
49
56
 
50
57
  class NoRedirectHandler(HTTPRedirectHandler):
51
-
52
- def redirect_request(self, req, fp, code, msg, headers, newurl):
58
+ def redirect_request(self, /, *args, **kwds):
53
59
  return None
54
60
 
55
61
 
@@ -75,22 +81,9 @@ def decompress_deflate(data: bytes, compresslevel: int = 9) -> bytes:
75
81
  return deflated
76
82
 
77
83
 
78
- def get_charset(content_type: str, default="utf-8") -> str:
79
- match = CRE_search_charset(content_type)
80
- if match is None:
81
- return "utf-8"
82
- return match["charset"]
83
-
84
-
85
- def ensure_ascii_url(url: str, /) -> str:
86
- if url.isascii():
87
- return url
88
- return quote(url, safe=punctuation)
89
-
90
-
91
- def decompress_response(resp: HTTPResponse, /) -> bytes:
92
- data = resp.read()
93
- content_encoding = resp.headers.get("Content-Encoding")
84
+ def decompress_response(response: HTTPResponse, /) -> bytes:
85
+ data = response.read()
86
+ content_encoding = response.headers.get("content-encoding")
94
87
  match content_encoding:
95
88
  case "gzip":
96
89
  data = decompress_gzip(data)
@@ -106,77 +99,46 @@ def decompress_response(resp: HTTPResponse, /) -> bytes:
106
99
 
107
100
 
108
101
  def urlopen(
109
- url: str | Request,
110
- method: str = "GET",
111
- params: None | str | Mapping | Sequence[tuple[Any, Any]] = None,
112
- data: None | bytes | str | Mapping | Sequence[tuple[Any, Any]] | Iterable[bytes] = None,
102
+ url: string | SupportsGeturl | URL | Request,
103
+ method: string = "GET",
104
+ params: None | string | Mapping | Sequence[tuple[Any, Any]] = None,
105
+ data: Any = None,
113
106
  json: Any = None,
114
- headers: None | Mapping[str, str] = None,
115
- timeout: None | int | float = None,
116
- cookies: None | CookieJar = None,
117
- proxy: None | tuple[str, str] = None,
107
+ headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
108
+ follow_redirects: bool = True,
109
+ proxies: None | Mapping[str, str] | Iterable[tuple[str, str]] = None,
118
110
  context: None | SSLContext = None,
119
- allow_redirects: bool = True,
111
+ cookies: None | CookieJar = None,
112
+ timeout: None | Undefined | float = undefined,
120
113
  opener: None | OpenerDirector = None,
121
- origin: None | str = None,
114
+ **_,
122
115
  ) -> HTTPResponse:
123
- if isinstance(url, str) and not urlsplit(url).scheme:
124
- if origin:
125
- if not url.startswith("/"):
126
- url = "/" + url
127
- url = origin + url
128
- if params:
129
- if not isinstance(params, str):
130
- params = urlencode(params)
131
- params = cast(None | str, params)
132
- if json is not None:
133
- if isinstance(json, bytes):
134
- data = json
135
- else:
136
- data = dumps(json)
137
- if headers:
138
- headers = {**headers, "Content-type": "application/json; charset=UTF-8"}
139
- else:
140
- headers = {"Content-type": "application/json; charset=UTF-8"}
141
- elif data is not None:
142
- if isinstance(data, bytes):
143
- pass
144
- elif isinstance(data, str):
145
- data = data.encode("utf-8")
146
- elif isinstance(data, (Mapping, Sequence)):
147
- data = urlencode(cast(Mapping | Sequence, data)).encode("latin-1")
148
- if headers:
149
- headers = {**headers, "Content-type": "application/x-www-form-urlencoded"}
150
- else:
151
- headers = {"Content-type": "application/x-www-form-urlencoded"}
152
- data = cast(None | bytes | Iterable[bytes], data)
153
116
  if isinstance(url, Request):
154
- req = url
155
- if params:
156
- req.full_url += "?&"["?" in req.full_url] + params
157
- if headers:
158
- for key, val in headers.items():
159
- req.add_header(key, val)
160
- if data is not None:
161
- req.data = data
162
- req.method = method.upper()
117
+ request = url
163
118
  else:
164
- if headers:
165
- headers = dict(headers)
166
- else:
167
- headers = {}
168
- if params:
169
- url += "?&"["?" in url] + params
170
- req = Request(url, data=data, headers=headers, method=method.upper())
171
- if proxy:
172
- req.set_proxy(*proxy)
119
+ if isinstance(data, PathLike):
120
+ data = bio_chunk_iter(open(data, "rb"))
121
+ elif isinstance(data, SupportsRead):
122
+ data = map(ensure_buffer, bio_chunk_iter(data))
123
+ request = Request(**normalize_request_args( # type: ignore
124
+ method=method,
125
+ url=url,
126
+ params=params,
127
+ data=data,
128
+ json=json,
129
+ headers=headers,
130
+ ensure_ascii=True,
131
+ ))
132
+ if proxies:
133
+ for host, type in iter_items(proxies):
134
+ request.set_proxy(host, type)
173
135
  if opener is None:
174
136
  handlers: list[BaseHandler] = []
175
137
  if context is not None:
176
138
  handlers.append(HTTPSHandler(context=context))
177
139
  if cookies is not None:
178
140
  handlers.append(HTTPCookieProcessor(cookies))
179
- if not allow_redirects:
141
+ if not follow_redirects:
180
142
  handlers.append(NoRedirectHandler())
181
143
  if handlers:
182
144
  if not isinstance(handlers[0], HTTPSHandler):
@@ -184,69 +146,127 @@ def urlopen(
184
146
  opener = build_opener(*handlers)
185
147
  else:
186
148
  opener = _opener
187
- req.full_url = ensure_ascii_url(req.full_url)
188
- if timeout is None:
189
- return opener.open(req)
149
+ if timeout is undefined:
150
+ return opener.open(request)
190
151
  else:
191
- return opener.open(req, timeout=timeout)
152
+ return opener.open(request, timeout=cast(None|float, timeout))
192
153
 
193
154
 
155
+ @overload
194
156
  def request(
195
- url: str | Request,
196
- method: str = "GET",
197
- parse: None | EllipsisType | bool | Callable = None,
157
+ url: string | SupportsGeturl | URL | Request,
158
+ method: string = "GET",
159
+ params: None | string | Mapping | Sequence[tuple[Any, Any]] = None,
160
+ data: Any = None,
161
+ json: Any = None,
162
+ headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
163
+ follow_redirects: bool = True,
198
164
  raise_for_status: bool = True,
199
- timeout: None | float = 60,
200
- allow_redirects: bool = True,
165
+ *,
166
+ parse: None | EllipsisType = None,
201
167
  **request_kwargs,
202
- ):
203
- request_kwargs.pop("stream", None)
168
+ ) -> HTTPResponse:
169
+ ...
170
+ @overload
171
+ def request(
172
+ url: string | SupportsGeturl | URL | Request,
173
+ method: string = "GET",
174
+ params: None | string | Mapping | Sequence[tuple[Any, Any]] = None,
175
+ data: Any = None,
176
+ json: Any = None,
177
+ headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
178
+ follow_redirects: bool = True,
179
+ raise_for_status: bool = True,
180
+ *,
181
+ parse: Literal[False],
182
+ **request_kwargs,
183
+ ) -> bytes:
184
+ ...
185
+ @overload
186
+ def request(
187
+ url: string | SupportsGeturl | URL | Request,
188
+ method: string = "GET",
189
+ params: None | string | Mapping | Sequence[tuple[Any, Any]] = None,
190
+ data: Any = None,
191
+ json: Any = None,
192
+ headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
193
+ follow_redirects: bool = True,
194
+ raise_for_status: bool = True,
195
+ *,
196
+ parse: Literal[True],
197
+ **request_kwargs,
198
+ ) -> bytes | str | dict | list | int | float | bool | None:
199
+ ...
200
+ @overload
201
+ def request[T](
202
+ url: string | SupportsGeturl | URL | Request,
203
+ method: string = "GET",
204
+ params: None | string | Mapping | Sequence[tuple[Any, Any]] = None,
205
+ data: Any = None,
206
+ json: Any = None,
207
+ headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
208
+ follow_redirects: bool = True,
209
+ raise_for_status: bool = True,
210
+ *,
211
+ parse: Callable[[HTTPResponse, bytes], T] | Callable[[HTTPResponse], T],
212
+ **request_kwargs,
213
+ ) -> T:
214
+ ...
215
+ def request[T](
216
+ url: string | SupportsGeturl | URL | Request,
217
+ method: string = "GET",
218
+ params: None | string | Mapping | Sequence[tuple[Any, Any]] = None,
219
+ data: Any = None,
220
+ json: Any = None,
221
+ headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
222
+ follow_redirects: bool = True,
223
+ raise_for_status: bool = True,
224
+ *,
225
+ parse: None | EllipsisType| bool | Callable[[HTTPResponse, bytes], T] | Callable[[HTTPResponse], T] = None,
226
+ **request_kwargs,
227
+ ) -> HTTPResponse | bytes | str | dict | list | int | float | bool | None | T:
204
228
  try:
205
- resp = urlopen(
229
+ response = urlopen(
206
230
  url=url,
207
231
  method=method,
208
- timeout=timeout,
209
- allow_redirects=allow_redirects,
232
+ params=params,
233
+ data=data,
234
+ json=json,
235
+ headers=headers,
236
+ follow_redirects=follow_redirects,
210
237
  **request_kwargs,
211
238
  )
212
239
  except HTTPError as e:
213
240
  if raise_for_status:
214
241
  raise
215
- resp = getattr(e, "file")
242
+ response = getattr(e, "file")
216
243
  if parse is None:
217
- return resp
244
+ return response
218
245
  elif parse is ...:
219
- resp.close()
220
- return resp
221
- with resp:
246
+ response.close()
247
+ return response
248
+ with response:
222
249
  if isinstance(parse, bool):
223
- data = decompress_response(resp)
250
+ data = decompress_response(response)
224
251
  if parse:
225
- content_type = resp.headers.get("Content-Type", "")
226
- if content_type == "application/json":
227
- return loads(data)
228
- elif content_type.startswith("application/json;"):
229
- return loads(data.decode(get_charset(content_type)))
230
- elif content_type.startswith("text/"):
231
- return data.decode(get_charset(content_type))
252
+ return parse_response(response, data)
232
253
  return data
254
+ ac = argcount(parse)
255
+ if ac == 1:
256
+ return cast(Callable[[HTTPResponse], T], parse)(response)
233
257
  else:
234
- ac = argcount(parse)
235
- with resp:
236
- if ac == 1:
237
- return parse(resp)
238
- else:
239
- return parse(resp, decompress_response(resp))
258
+ return cast(Callable[[HTTPResponse, bytes], T], parse)(
259
+ response, decompress_response(response))
240
260
 
241
261
 
242
262
  def download(
243
- url: str,
263
+ url: string | SupportsGeturl | URL | Request,
244
264
  file: bytes | str | PathLike | SupportsWrite[bytes] = "",
245
265
  resume: bool = False,
246
266
  chunksize: int = COPY_BUFSIZE,
247
- headers: None | Mapping[str, str] = None,
267
+ headers: None | Mapping[str, str] | Iterable[tuple[str, str]] = None,
248
268
  make_reporthook: None | Callable[[None | int], Callable[[int], Any] | Generator[int, Any, Any]] = None,
249
- **urlopen_kwargs,
269
+ **request_kwargs,
250
270
  ) -> str | SupportsWrite[bytes]:
251
271
  """Download a URL into a file.
252
272
 
@@ -286,17 +306,14 @@ def download(
286
306
  print(f"\r\x1b[K{read_num} / {total} | {speed:.2f} MB/s", end="", flush=True)
287
307
  dq.append((read_num, cur_t))
288
308
  """
289
- if headers:
290
- headers = {**headers, "Accept-encoding": "identity"}
291
- else:
292
- headers = {"Accept-encoding": "identity"}
293
-
294
309
  if chunksize <= 0:
295
310
  chunksize = COPY_BUFSIZE
311
+ headers = request_kwargs["headers"] = dict(headers or ())
312
+ headers["accept-encoding"] = "identity"
296
313
 
297
- resp: HTTPResponse = urlopen(url, headers=headers, **urlopen_kwargs)
298
- content_length = get_length(resp)
299
- if content_length == 0 and is_chunked(resp):
314
+ response: HTTPResponse = urlopen(url, **request_kwargs)
315
+ content_length = get_length(response)
316
+ if content_length == 0 and is_chunked(response):
300
317
  content_length = None
301
318
 
302
319
  fdst: SupportsWrite[bytes]
@@ -305,7 +322,7 @@ def download(
305
322
  else:
306
323
  file = abspath(fsdecode(file))
307
324
  if isdir(file):
308
- file = joinpath(file, get_filename(resp, "download"))
325
+ file = joinpath(file, get_filename(response, "download"))
309
326
  try:
310
327
  fdst = open(file, "ab" if resume else "wb")
311
328
  except FileNotFoundError:
@@ -322,7 +339,7 @@ def download(
322
339
  else:
323
340
  if filesize == content_length:
324
341
  return file
325
- if filesize and is_range_request(resp):
342
+ if filesize and is_range_request(response):
326
343
  if filesize == content_length:
327
344
  return file
328
345
  elif content_length is not None and filesize > content_length:
@@ -346,25 +363,25 @@ def download(
346
363
 
347
364
  try:
348
365
  if filesize:
349
- if is_range_request(resp):
350
- resp.close()
351
- resp = urlopen(url, headers={**headers, "Range": "bytes=%d-" % filesize}, **urlopen_kwargs)
352
- if not is_range_request(resp):
366
+ if is_range_request(response):
367
+ response.close()
368
+ response = urlopen(url, headers={**headers, "Range": "bytes=%d-" % filesize}, **request_kwargs)
369
+ if not is_range_request(response):
353
370
  raise OSError(errno.EIO, f"range request failed: {url!r}")
354
371
  if reporthook is not None:
355
372
  reporthook(filesize)
356
373
  elif resume:
357
- for _ in bio_skip_iter(resp, filesize, callback=reporthook):
374
+ for _ in bio_skip_iter(response, filesize, callback=reporthook):
358
375
  pass
359
376
 
360
- fsrc_read = resp.read
377
+ fsrc_read = response.read
361
378
  fdst_write = fdst.write
362
379
  while (chunk := fsrc_read(chunksize)):
363
380
  fdst_write(chunk)
364
381
  if reporthook is not None:
365
382
  reporthook(len(chunk))
366
383
  finally:
367
- resp.close()
384
+ response.close()
368
385
  if callable(reporthook_close):
369
386
  reporthook_close()
370
387
 
File without changes
File without changes