python-urlopen 0.1.2__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: python-urlopen
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: Python urlopen wrapper.
5
5
  Home-page: https://github.com/ChenyangGao/python-modules/tree/main/python-urlopen
6
6
  License: MIT
@@ -20,16 +20,14 @@ Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Classifier: Topic :: Software Development
21
21
  Classifier: Topic :: Software Development :: Libraries
22
22
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
- Requires-Dist: brotli
24
- Requires-Dist: http_response (>=0.0.6)
23
+ Requires-Dist: http_response (>=0.0.8)
25
24
  Requires-Dist: python-argtools (>=0.0.2)
26
- Requires-Dist: python-cookietools (>=0.0.8)
25
+ Requires-Dist: python-cookietools (>=0.0.11)
27
26
  Requires-Dist: python-dicttools (>=0.0.2)
28
27
  Requires-Dist: python-filewrap (>=0.2.8)
29
- Requires-Dist: python-http_request (>=0.1.0)
28
+ Requires-Dist: python-http_request (>=0.1.3)
30
29
  Requires-Dist: python-undefined (>=0.0.3)
31
30
  Requires-Dist: yarl
32
- Requires-Dist: zstandard
33
31
  Project-URL: Repository, https://github.com/ChenyangGao/python-modules/tree/main/python-urlopen
34
32
  Description-Content-Type: text/markdown
35
33
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "python-urlopen"
3
- version = "0.1.2"
3
+ version = "0.1.4"
4
4
  description = "Python urlopen wrapper."
5
5
  authors = ["ChenyangGao <wosiwujm@gmail.com>"]
6
6
  license = "MIT"
@@ -27,16 +27,14 @@ include = [
27
27
 
28
28
  [tool.poetry.dependencies]
29
29
  python = "^3.12"
30
- brotli = "*"
31
- http_response = ">=0.0.6"
30
+ http_response = ">=0.0.8"
32
31
  python-argtools = ">=0.0.2"
33
- python-cookietools = ">=0.0.8"
32
+ python-cookietools = ">=0.0.11"
34
33
  python-dicttools = ">=0.0.2"
35
34
  python-filewrap = ">=0.2.8"
36
- python-http_request = ">=0.1.0"
35
+ python-http_request = ">=0.1.3"
37
36
  python-undefined = ">=0.0.3"
38
37
  yarl = "*"
39
- zstandard = "*"
40
38
 
41
39
  [tool.poetry.scripts]
42
40
  python-urlopen = "urlopen.__main__:main"
@@ -2,105 +2,257 @@
2
2
  # coding: utf-8
3
3
 
4
4
  __author__ = "ChenyangGao <https://chenyanggao.github.io>"
5
- __version__ = (0, 1, 2)
5
+ __version__ = (0, 1, 4)
6
6
  __all__ = ["urlopen", "request", "download"]
7
7
 
8
- import errno
9
-
10
- from collections import UserString
8
+ from collections import defaultdict, deque, UserString
11
9
  from collections.abc import Buffer, Callable, Generator, Iterable, Mapping
12
10
  from copy import copy
13
- from gzip import decompress as decompress_gzip
14
- from http.client import HTTPResponse
11
+ from http.client import HTTPConnection, HTTPSConnection, HTTPResponse
15
12
  from http.cookiejar import CookieJar
13
+ from http.cookies import SimpleCookie
16
14
  from inspect import isgenerator
17
15
  from os import fsdecode, fstat, makedirs, PathLike
18
16
  from os.path import abspath, dirname, isdir, join as joinpath
19
17
  from shutil import COPY_BUFSIZE # type: ignore
20
- from socket import getdefaulttimeout, setdefaulttimeout
18
+ from socket import socket
21
19
  from ssl import SSLContext, _create_unverified_context
22
20
  from types import EllipsisType
23
21
  from typing import cast, overload, Any, Literal
24
- from urllib.error import HTTPError
22
+ from urllib.error import HTTPError, URLError
23
+ from urllib.parse import urlsplit, ParseResult, SplitResult
25
24
  from urllib.request import (
26
- build_opener, BaseHandler, HTTPCookieProcessor, HTTPSHandler,
27
- HTTPRedirectHandler, OpenerDirector, Request,
25
+ build_opener, AbstractHTTPHandler, BaseHandler, HTTPHandler,
26
+ HTTPSHandler, HTTPRedirectHandler, OpenerDirector, Request,
28
27
  )
29
- from zlib import compressobj, DEF_MEM_LEVEL, DEFLATED, MAX_WBITS
30
28
 
31
29
  from argtools import argcount
32
- from cookietools import cookies_dict_to_str
30
+ from cookietools import cookies_to_str, extract_cookies, update_cookies
33
31
  from dicttools import iter_items
34
32
  from filewrap import bio_skip_iter, bio_chunk_iter, SupportsRead, SupportsWrite
35
33
  from http_request import normalize_request_args, SupportsGeturl
36
34
  from http_response import (
37
- get_filename, get_length, is_chunked, is_range_request,
35
+ decompress_response, get_filename, get_length, is_chunked, is_range_request,
38
36
  parse_response,
39
37
  )
38
+ from property import locked_cacheproperty
40
39
  from yarl import URL
41
40
  from undefined import undefined, Undefined
42
41
 
43
42
 
44
43
  type string = Buffer | str | UserString
45
44
 
45
+ if "__del__" not in HTTPConnection.__dict__:
46
+ setattr(HTTPConnection, "__del__", HTTPConnection.close)
47
+ if "__del__" not in HTTPSConnection.__dict__:
48
+ setattr(HTTPSConnection, "__del__", HTTPSConnection.close)
46
49
  if "__del__" not in HTTPResponse.__dict__:
47
50
  setattr(HTTPResponse, "__del__", HTTPResponse.close)
48
51
  if "__del__" not in OpenerDirector.__dict__:
49
52
  setattr(OpenerDirector, "__del__", OpenerDirector.close)
50
53
 
51
- _cookies = CookieJar()
52
- _opener: OpenerDirector = build_opener(HTTPSHandler(context=_create_unverified_context()), HTTPCookieProcessor(_cookies))
53
- setattr(_opener, "cookies", _cookies)
54
+ def _close_conn(self, /):
55
+ fp = self.fp
56
+ self.fp = None
57
+ pool = getattr(self, "pool", None)
58
+ conn = getattr(self, "connection", None)
59
+ if pool and conn:
60
+ try:
61
+ pool.return_connection(conn)
62
+ except NameError:
63
+ pass
64
+ else:
65
+ fp.close()
66
+
67
+ setattr(HTTPResponse, "_close_conn", _close_conn)
68
+
69
+
70
+ def is_ipv6(host: str, /) -> bool:
71
+ from ipaddress import _BaseV6, AddressValueError
72
+ try:
73
+ _BaseV6._ip_int_from_string(host) # type: ignore
74
+ return True
75
+ except AddressValueError:
76
+ return False
54
77
 
55
78
 
56
- if getdefaulttimeout() is None:
57
- setdefaulttimeout(60)
79
+ class HTTPCookieProcessor(BaseHandler):
80
+
81
+ def __init__(
82
+ self,
83
+ /,
84
+ cookies: None | CookieJar | SimpleCookie = None,
85
+ ):
86
+ if cookies is None:
87
+ cookies = CookieJar()
88
+ self.cookies = cookies
89
+
90
+ def http_request(self, request):
91
+ cookies = self.cookies
92
+ if cookies:
93
+ if isinstance(cookies, SimpleCookie):
94
+ cookies = update_cookies(CookieJar(), cookies)
95
+ cookies.add_cookie_header(request)
96
+ return request
97
+
98
+ def http_response(self, request, response):
99
+ extract_cookies(self.cookies, request.full_url, response) # type: ignore
100
+ return response
101
+
102
+ https_request = http_request
103
+ https_response = http_response
104
+
105
+
106
+ class ConnectionPool:
107
+
108
+ def __init__(
109
+ self,
110
+ /,
111
+ pool: None | defaultdict[str, deque[HTTPConnection] | deque[HTTPSConnection]] = None,
112
+ ):
113
+ if pool is None:
114
+ pool = defaultdict(deque)
115
+ self.pool = pool
116
+
117
+ def __del__(self, /):
118
+ for dq in self.pool.values():
119
+ for con in dq:
120
+ con.close()
121
+
122
+ def __repr__(self, /) -> str:
123
+ cls = type(self)
124
+ return f"{cls.__module__}.{cls.__qualname__}({self.pool!r})"
125
+
126
+ def get_connection(
127
+ self,
128
+ /,
129
+ url: str | ParseResult | SplitResult,
130
+ timeout: None | float = None,
131
+ ) -> HTTPConnection | HTTPSConnection:
132
+ if isinstance(url, str):
133
+ url = urlsplit(url)
134
+ assert url.scheme, "not a complete URL"
135
+ host = url.hostname or "localhost"
136
+ if is_ipv6(host):
137
+ host = f"[{host}]"
138
+ port = url.port or (443 if url.scheme == 'https' else 80)
139
+ origin = f"{url.scheme}://{host}:{port}"
140
+ dq = self.pool[origin]
141
+ while True:
142
+ try:
143
+ con = dq.popleft()
144
+ except IndexError:
145
+ break
146
+ sock = con.sock
147
+ if not sock or getattr(sock, "_closed"):
148
+ con.connect()
149
+ else:
150
+ sock.setblocking(False)
151
+ try:
152
+ if socket.recv(sock, 1):
153
+ con.connect()
154
+ except BlockingIOError:
155
+ pass
156
+ finally:
157
+ sock.setblocking(True)
158
+ con.timeout = timeout
159
+ return con
160
+ if url.scheme == "https":
161
+ return HTTPSConnection(url.hostname or "localhost", url.port, timeout=timeout)
162
+ else:
163
+ return HTTPConnection(url.hostname or "localhost", url.port, timeout=timeout)
164
+
165
+ def return_connection(
166
+ self,
167
+ con: HTTPConnection | HTTPSConnection,
168
+ /,
169
+ ) -> str:
170
+ if isinstance(con, HTTPSConnection):
171
+ scheme = "https"
172
+ else:
173
+ scheme = "http"
174
+ host = con.host
175
+ if is_ipv6(host):
176
+ host = f"[{host}]"
177
+ origin = f"{scheme}://{host}:{con.port}"
178
+ self.pool[origin].append(con) # type: ignore
179
+ return origin
180
+
181
+
182
+ class KeepAliveBaseHTTPHandler(AbstractHTTPHandler):
183
+
184
+ @locked_cacheproperty
185
+ def pool(self, /) -> ConnectionPool:
186
+ return ConnectionPool()
187
+
188
+ def do_open(self, /, http_class, req, **http_conn_args) -> HTTPResponse:
189
+ host = req.host
190
+ if not host:
191
+ raise URLError("no host given")
192
+ pool = self.pool
193
+ if issubclass(http_class, HTTPSHandler):
194
+ origin = "https://" + host
195
+ else:
196
+ origin = "http://" + host
197
+ h = pool.get_connection(origin, timeout=req.timeout)
198
+ h.set_debuglevel(self._debuglevel) # type: ignore
199
+ headers = dict(req.unredirected_hdrs)
200
+ headers.update({k: v for k, v in req.headers.items()
201
+ if k not in headers})
202
+ headers.setdefault("connection", "keep-alive")
203
+ if req._tunnel_host:
204
+ tunnel_headers = {}
205
+ proxy_auth_hdr = "Proxy-Authorization"
206
+ if proxy_auth_hdr in headers:
207
+ tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
208
+ del headers[proxy_auth_hdr]
209
+ h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
210
+ try:
211
+ try:
212
+ h.request(req.get_method(), req.selector, req.data, headers,
213
+ encode_chunked=req.has_header('Transfer-encoding'))
214
+ except OSError as err:
215
+ raise URLError(err)
216
+ r = h.getresponse()
217
+ except:
218
+ pool.return_connection(h)
219
+ raise
220
+ r.url = req.get_full_url()
221
+ r.msg = r.reason
222
+ if headers.get("connection") == "keep-alive":
223
+ setattr(r, "pool", pool)
224
+ setattr(r, "connection", h)
225
+ return r
226
+
227
+
228
+ class KeepAliveHTTPHandler(HTTPHandler, KeepAliveBaseHTTPHandler):
229
+ pass
230
+
231
+
232
+ class KeepAliveHTTPSHandler(HTTPSHandler, KeepAliveBaseHTTPHandler):
233
+ pass
234
+
235
+
236
+ _pool = ConnectionPool()
237
+ _http_handler = KeepAliveHTTPHandler()
238
+ _http_handler.pool = _pool
239
+ _https_handler = KeepAliveHTTPSHandler(context=_create_unverified_context())
240
+ _https_handler.pool = _pool
241
+ _cookies = CookieJar()
242
+ _opener: OpenerDirector = build_opener(
243
+ _http_handler,
244
+ _https_handler,
245
+ HTTPCookieProcessor(_cookies),
246
+ )
247
+ setattr(_opener, "cookies", _cookies)
58
248
 
59
249
 
60
250
  class NoRedirectHandler(HTTPRedirectHandler):
251
+
61
252
  def redirect_request(self, /, *args, **kwds):
62
253
  return None
63
254
 
64
255
 
65
- def decompress_deflate(data: bytes, compresslevel: int = 9) -> bytes:
66
- # Fork from: https://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations#answer-1089787
67
- compress = compressobj(
68
- compresslevel, # level: 0-9
69
- DEFLATED, # method: must be DEFLATED
70
- -MAX_WBITS, # window size in bits:
71
- # -15..-8: negate, suppress header
72
- # 8..15: normal
73
- # 16..30: subtract 16, gzip header
74
- DEF_MEM_LEVEL, # mem level: 1..8/9
75
- 0 # strategy:
76
- # 0 = Z_DEFAULT_STRATEGY
77
- # 1 = Z_FILTERED
78
- # 2 = Z_HUFFMAN_ONLY
79
- # 3 = Z_RLE
80
- # 4 = Z_FIXED
81
- )
82
- deflated = compress.compress(data)
83
- deflated += compress.flush()
84
- return deflated
85
-
86
-
87
- def decompress_response(response: HTTPResponse, /) -> bytes:
88
- data = response.read()
89
- content_encoding = response.headers.get("content-encoding")
90
- match content_encoding:
91
- case "gzip":
92
- data = decompress_gzip(data)
93
- case "deflate":
94
- data = decompress_deflate(data)
95
- case "br":
96
- from brotli import decompress as decompress_br # type: ignore
97
- data = decompress_br(data)
98
- case "zstd":
99
- from zstandard import decompress as decompress_zstd
100
- data = decompress_zstd(data)
101
- return data
102
-
103
-
104
256
  def urlopen(
105
257
  url: string | SupportsGeturl | URL | Request,
106
258
  method: string = "GET",
@@ -112,7 +264,7 @@ def urlopen(
112
264
  follow_redirects: bool = True,
113
265
  proxies: None | Mapping[str, str] | Iterable[tuple[str, str]] = None,
114
266
  context: None | SSLContext = None,
115
- cookies: None | CookieJar = None,
267
+ cookies: None | CookieJar | SimpleCookie = None,
116
268
  timeout: None | Undefined | float = undefined,
117
269
  opener: None | OpenerDirector = _opener,
118
270
  **_,
@@ -140,18 +292,51 @@ def urlopen(
140
292
  headers_ = request.headers
141
293
  if opener is None:
142
294
  handlers: list[BaseHandler] = []
295
+ if cookies is None:
296
+ cookies = CookieJar()
143
297
  else:
144
298
  handlers = list(map(copy, getattr(opener, "handlers")))
145
299
  if cookies is None:
146
300
  cookies = getattr(opener, "cookies", None)
147
301
  if cookies and "cookie" not in headers_:
148
- headers_["cookie"] = cookies_dict_to_str(cookies)
149
- if context is not None:
150
- handlers.append(HTTPSHandler(context=context))
151
- elif opener is None:
152
- handlers.append(HTTPSHandler(context=_create_unverified_context()))
153
- if cookies is not None and (opener is None or all(
154
- h.cookiejar is not cookies
302
+ headers_["cookie"] = cookies_to_str(cookies)
303
+ if context is None:
304
+ if opener is None:
305
+ handlers.append(copy(_https_handler))
306
+ else:
307
+ for i, handler in enumerate(handlers):
308
+ if isinstance(handler, KeepAliveHTTPSHandler):
309
+ break
310
+ elif isinstance(handler, HTTPSHandler):
311
+ handlers[i] = copy(_https_handler)
312
+ break
313
+ else:
314
+ handlers.append(copy(_https_handler))
315
+ else:
316
+ https_handler = KeepAliveHTTPSHandler(context=context)
317
+ https_handler.pool = _pool
318
+ if opener is None:
319
+ handlers.append(https_handler)
320
+ else:
321
+ for i, handler in enumerate(handlers):
322
+ if isinstance(handler, HTTPSHandler):
323
+ handlers[i] = https_handler
324
+ break
325
+ else:
326
+ handlers.append(https_handler)
327
+ if opener is None:
328
+ handlers.append(copy(_http_handler))
329
+ else:
330
+ for i, handler in enumerate(handlers):
331
+ if isinstance(handler, KeepAliveHTTPHandler):
332
+ break
333
+ elif isinstance(handler, HTTPHandler):
334
+ handlers[i] = copy(_http_handler)
335
+ break
336
+ else:
337
+ handlers.append(copy(_http_handler))
338
+ if cookies and (opener is None or all(
339
+ h.cookies is not cookies
155
340
  for h in getattr(opener, "handlers") if isinstance(h, HTTPCookieProcessor)
156
341
  )):
157
342
  handlers.append(HTTPCookieProcessor(cookies))
@@ -167,7 +352,7 @@ def urlopen(
167
352
  if timeout is undefined:
168
353
  response = opener.open(request)
169
354
  else:
170
- response = opener.open(request, timeout=cast(None|float, timeout))
355
+ response = opener.open(request, timeout=cast(None | float, timeout))
171
356
  setattr(response, "opener", opener)
172
357
  setattr(response, "cookies", response_cookies)
173
358
  return response
@@ -189,6 +374,7 @@ def request(
189
374
  headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
190
375
  follow_redirects: bool = True,
191
376
  raise_for_status: bool = True,
377
+ cookies: None | CookieJar | SimpleCookie = None,
192
378
  *,
193
379
  parse: None | EllipsisType = None,
194
380
  **request_kwargs,
@@ -205,6 +391,7 @@ def request(
205
391
  headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
206
392
  follow_redirects: bool = True,
207
393
  raise_for_status: bool = True,
394
+ cookies: None | CookieJar | SimpleCookie = None,
208
395
  *,
209
396
  parse: Literal[False],
210
397
  **request_kwargs,
@@ -221,6 +408,7 @@ def request(
221
408
  headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
222
409
  follow_redirects: bool = True,
223
410
  raise_for_status: bool = True,
411
+ cookies: None | CookieJar | SimpleCookie = None,
224
412
  *,
225
413
  parse: Literal[True],
226
414
  **request_kwargs,
@@ -237,6 +425,7 @@ def request[T](
237
425
  headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
238
426
  follow_redirects: bool = True,
239
427
  raise_for_status: bool = True,
428
+ cookies: None | CookieJar | SimpleCookie = None,
240
429
  *,
241
430
  parse: Callable[[HTTPResponse, bytes], T] | Callable[[HTTPResponse], T],
242
431
  **request_kwargs,
@@ -252,6 +441,7 @@ def request[T](
252
441
  headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
253
442
  follow_redirects: bool = True,
254
443
  raise_for_status: bool = True,
444
+ cookies: None | CookieJar | SimpleCookie = None,
255
445
  *,
256
446
  parse: None | EllipsisType| bool | Callable[[HTTPResponse, bytes], T] | Callable[[HTTPResponse], T] = None,
257
447
  **request_kwargs,
@@ -266,6 +456,7 @@ def request[T](
266
456
  files=files,
267
457
  headers=headers,
268
458
  follow_redirects=follow_redirects,
459
+ cookies=cookies,
269
460
  **request_kwargs,
270
461
  )
271
462
  except HTTPError as e:
@@ -279,7 +470,7 @@ def request[T](
279
470
  return response
280
471
  with response:
281
472
  if isinstance(parse, bool):
282
- data = decompress_response(response)
473
+ data = decompress_response(response.read(), response)
283
474
  if parse:
284
475
  return parse_response(response, data)
285
476
  return data
@@ -287,8 +478,8 @@ def request[T](
287
478
  if ac == 1:
288
479
  return cast(Callable[[HTTPResponse], T], parse)(response)
289
480
  else:
290
- return cast(Callable[[HTTPResponse, bytes], T], parse)(
291
- response, decompress_response(response))
481
+ data = decompress_response(response.read(), response)
482
+ return cast(Callable[[HTTPResponse, bytes], T], parse)(response, data)
292
483
 
293
484
 
294
485
  def download(
@@ -373,7 +564,7 @@ def download(
373
564
  return file
374
565
  elif content_length is not None and filesize > content_length:
375
566
  raise OSError(
376
- errno.EIO,
567
+ 5, # errno.EIO
377
568
  f"file {file!r} is larger than url {url!r}: {filesize} > {content_length} (in bytes)",
378
569
  )
379
570
  reporthook_close: None | Callable = None
@@ -394,7 +585,10 @@ def download(
394
585
  response.close()
395
586
  response = urlopen(url, headers={**headers, "Range": "bytes=%d-" % filesize}, **request_kwargs)
396
587
  if not is_range_request(response):
397
- raise OSError(errno.EIO, f"range request failed: {url!r}")
588
+ raise OSError(
589
+ 5, # errno.EIO
590
+ f"range request failed: {url!r}",
591
+ )
398
592
  if reporthook is not None:
399
593
  reporthook(filesize)
400
594
  elif resume:
File without changes
File without changes