python-urlopen 0.1.3__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: python-urlopen
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: Python urlopen wrapper.
5
5
  Home-page: https://github.com/ChenyangGao/python-modules/tree/main/python-urlopen
6
6
  License: MIT
@@ -20,12 +20,12 @@ Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Classifier: Topic :: Software Development
21
21
  Classifier: Topic :: Software Development :: Libraries
22
22
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
- Requires-Dist: http_response (>=0.0.6)
23
+ Requires-Dist: http_response (>=0.0.8)
24
24
  Requires-Dist: python-argtools (>=0.0.2)
25
- Requires-Dist: python-cookietools (>=0.0.8)
25
+ Requires-Dist: python-cookietools (>=0.0.11)
26
26
  Requires-Dist: python-dicttools (>=0.0.2)
27
27
  Requires-Dist: python-filewrap (>=0.2.8)
28
- Requires-Dist: python-http_request (>=0.1.0)
28
+ Requires-Dist: python-http_request (>=0.1.3)
29
29
  Requires-Dist: python-undefined (>=0.0.3)
30
30
  Requires-Dist: yarl
31
31
  Project-URL: Repository, https://github.com/ChenyangGao/python-modules/tree/main/python-urlopen
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "python-urlopen"
3
- version = "0.1.3"
3
+ version = "0.1.4"
4
4
  description = "Python urlopen wrapper."
5
5
  authors = ["ChenyangGao <wosiwujm@gmail.com>"]
6
6
  license = "MIT"
@@ -27,12 +27,12 @@ include = [
27
27
 
28
28
  [tool.poetry.dependencies]
29
29
  python = "^3.12"
30
- http_response = ">=0.0.6"
30
+ http_response = ">=0.0.8"
31
31
  python-argtools = ">=0.0.2"
32
- python-cookietools = ">=0.0.8"
32
+ python-cookietools = ">=0.0.11"
33
33
  python-dicttools = ">=0.0.2"
34
34
  python-filewrap = ">=0.2.8"
35
- python-http_request = ">=0.1.0"
35
+ python-http_request = ">=0.1.3"
36
36
  python-undefined = ">=0.0.3"
37
37
  yarl = "*"
38
38
 
@@ -2,32 +2,32 @@
2
2
  # coding: utf-8
3
3
 
4
4
  __author__ = "ChenyangGao <https://chenyanggao.github.io>"
5
- __version__ = (0, 1, 3)
5
+ __version__ = (0, 1, 4)
6
6
  __all__ = ["urlopen", "request", "download"]
7
7
 
8
- import errno
9
-
10
- from collections import UserString
8
+ from collections import defaultdict, deque, UserString
11
9
  from collections.abc import Buffer, Callable, Generator, Iterable, Mapping
12
10
  from copy import copy
13
- from http.client import HTTPResponse
11
+ from http.client import HTTPConnection, HTTPSConnection, HTTPResponse
14
12
  from http.cookiejar import CookieJar
13
+ from http.cookies import SimpleCookie
15
14
  from inspect import isgenerator
16
15
  from os import fsdecode, fstat, makedirs, PathLike
17
16
  from os.path import abspath, dirname, isdir, join as joinpath
18
17
  from shutil import COPY_BUFSIZE # type: ignore
19
- from socket import getdefaulttimeout, setdefaulttimeout
18
+ from socket import socket
20
19
  from ssl import SSLContext, _create_unverified_context
21
20
  from types import EllipsisType
22
21
  from typing import cast, overload, Any, Literal
23
- from urllib.error import HTTPError
22
+ from urllib.error import HTTPError, URLError
23
+ from urllib.parse import urlsplit, ParseResult, SplitResult
24
24
  from urllib.request import (
25
- build_opener, BaseHandler, HTTPCookieProcessor, HTTPSHandler,
26
- HTTPRedirectHandler, OpenerDirector, Request,
25
+ build_opener, AbstractHTTPHandler, BaseHandler, HTTPHandler,
26
+ HTTPSHandler, HTTPRedirectHandler, OpenerDirector, Request,
27
27
  )
28
28
 
29
29
  from argtools import argcount
30
- from cookietools import cookies_dict_to_str
30
+ from cookietools import cookies_to_str, extract_cookies, update_cookies
31
31
  from dicttools import iter_items
32
32
  from filewrap import bio_skip_iter, bio_chunk_iter, SupportsRead, SupportsWrite
33
33
  from http_request import normalize_request_args, SupportsGeturl
@@ -35,27 +35,220 @@ from http_response import (
35
35
  decompress_response, get_filename, get_length, is_chunked, is_range_request,
36
36
  parse_response,
37
37
  )
38
+ from property import locked_cacheproperty
38
39
  from yarl import URL
39
40
  from undefined import undefined, Undefined
40
41
 
41
42
 
42
43
  type string = Buffer | str | UserString
43
44
 
45
+ if "__del__" not in HTTPConnection.__dict__:
46
+ setattr(HTTPConnection, "__del__", HTTPConnection.close)
47
+ if "__del__" not in HTTPSConnection.__dict__:
48
+ setattr(HTTPSConnection, "__del__", HTTPSConnection.close)
44
49
  if "__del__" not in HTTPResponse.__dict__:
45
50
  setattr(HTTPResponse, "__del__", HTTPResponse.close)
46
51
  if "__del__" not in OpenerDirector.__dict__:
47
52
  setattr(OpenerDirector, "__del__", OpenerDirector.close)
48
53
 
49
- _cookies = CookieJar()
50
- _opener: OpenerDirector = build_opener(HTTPSHandler(context=_create_unverified_context()), HTTPCookieProcessor(_cookies))
51
- setattr(_opener, "cookies", _cookies)
54
+ def _close_conn(self, /):
55
+ fp = self.fp
56
+ self.fp = None
57
+ pool = getattr(self, "pool", None)
58
+ conn = getattr(self, "connection", None)
59
+ if pool and conn:
60
+ try:
61
+ pool.return_connection(conn)
62
+ except NameError:
63
+ pass
64
+ else:
65
+ fp.close()
66
+
67
+ setattr(HTTPResponse, "_close_conn", _close_conn)
68
+
69
+
70
+ def is_ipv6(host: str, /) -> bool:
71
+ from ipaddress import _BaseV6, AddressValueError
72
+ try:
73
+ _BaseV6._ip_int_from_string(host) # type: ignore
74
+ return True
75
+ except AddressValueError:
76
+ return False
77
+
78
+
79
+ class HTTPCookieProcessor(BaseHandler):
80
+
81
+ def __init__(
82
+ self,
83
+ /,
84
+ cookies: None | CookieJar | SimpleCookie = None,
85
+ ):
86
+ if cookies is None:
87
+ cookies = CookieJar()
88
+ self.cookies = cookies
89
+
90
+ def http_request(self, request):
91
+ cookies = self.cookies
92
+ if cookies:
93
+ if isinstance(cookies, SimpleCookie):
94
+ cookies = update_cookies(CookieJar(), cookies)
95
+ cookies.add_cookie_header(request)
96
+ return request
97
+
98
+ def http_response(self, request, response):
99
+ extract_cookies(self.cookies, request.full_url, response) # type: ignore
100
+ return response
101
+
102
+ https_request = http_request
103
+ https_response = http_response
104
+
105
+
106
+ class ConnectionPool:
107
+
108
+ def __init__(
109
+ self,
110
+ /,
111
+ pool: None | defaultdict[str, deque[HTTPConnection] | deque[HTTPSConnection]] = None,
112
+ ):
113
+ if pool is None:
114
+ pool = defaultdict(deque)
115
+ self.pool = pool
116
+
117
+ def __del__(self, /):
118
+ for dq in self.pool.values():
119
+ for con in dq:
120
+ con.close()
121
+
122
+ def __repr__(self, /) -> str:
123
+ cls = type(self)
124
+ return f"{cls.__module__}.{cls.__qualname__}({self.pool!r})"
125
+
126
+ def get_connection(
127
+ self,
128
+ /,
129
+ url: str | ParseResult | SplitResult,
130
+ timeout: None | float = None,
131
+ ) -> HTTPConnection | HTTPSConnection:
132
+ if isinstance(url, str):
133
+ url = urlsplit(url)
134
+ assert url.scheme, "not a complete URL"
135
+ host = url.hostname or "localhost"
136
+ if is_ipv6(host):
137
+ host = f"[{host}]"
138
+ port = url.port or (443 if url.scheme == 'https' else 80)
139
+ origin = f"{url.scheme}://{host}:{port}"
140
+ dq = self.pool[origin]
141
+ while True:
142
+ try:
143
+ con = dq.popleft()
144
+ except IndexError:
145
+ break
146
+ sock = con.sock
147
+ if not sock or getattr(sock, "_closed"):
148
+ con.connect()
149
+ else:
150
+ sock.setblocking(False)
151
+ try:
152
+ if socket.recv(sock, 1):
153
+ con.connect()
154
+ except BlockingIOError:
155
+ pass
156
+ finally:
157
+ sock.setblocking(True)
158
+ con.timeout = timeout
159
+ return con
160
+ if url.scheme == "https":
161
+ return HTTPSConnection(url.hostname or "localhost", url.port, timeout=timeout)
162
+ else:
163
+ return HTTPConnection(url.hostname or "localhost", url.port, timeout=timeout)
164
+
165
+ def return_connection(
166
+ self,
167
+ con: HTTPConnection | HTTPSConnection,
168
+ /,
169
+ ) -> str:
170
+ if isinstance(con, HTTPSConnection):
171
+ scheme = "https"
172
+ else:
173
+ scheme = "http"
174
+ host = con.host
175
+ if is_ipv6(host):
176
+ host = f"[{host}]"
177
+ origin = f"{scheme}://{host}:{con.port}"
178
+ self.pool[origin].append(con) # type: ignore
179
+ return origin
180
+
52
181
 
182
+ class KeepAliveBaseHTTPHandler(AbstractHTTPHandler):
53
183
 
54
- if getdefaulttimeout() is None:
55
- setdefaulttimeout(60)
184
+ @locked_cacheproperty
185
+ def pool(self, /) -> ConnectionPool:
186
+ return ConnectionPool()
187
+
188
+ def do_open(self, /, http_class, req, **http_conn_args) -> HTTPResponse:
189
+ host = req.host
190
+ if not host:
191
+ raise URLError("no host given")
192
+ pool = self.pool
193
+ if issubclass(http_class, HTTPSHandler):
194
+ origin = "https://" + host
195
+ else:
196
+ origin = "http://" + host
197
+ h = pool.get_connection(origin, timeout=req.timeout)
198
+ h.set_debuglevel(self._debuglevel) # type: ignore
199
+ headers = dict(req.unredirected_hdrs)
200
+ headers.update({k: v for k, v in req.headers.items()
201
+ if k not in headers})
202
+ headers.setdefault("connection", "keep-alive")
203
+ if req._tunnel_host:
204
+ tunnel_headers = {}
205
+ proxy_auth_hdr = "Proxy-Authorization"
206
+ if proxy_auth_hdr in headers:
207
+ tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
208
+ del headers[proxy_auth_hdr]
209
+ h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
210
+ try:
211
+ try:
212
+ h.request(req.get_method(), req.selector, req.data, headers,
213
+ encode_chunked=req.has_header('Transfer-encoding'))
214
+ except OSError as err:
215
+ raise URLError(err)
216
+ r = h.getresponse()
217
+ except:
218
+ pool.return_connection(h)
219
+ raise
220
+ r.url = req.get_full_url()
221
+ r.msg = r.reason
222
+ if headers.get("connection") == "keep-alive":
223
+ setattr(r, "pool", pool)
224
+ setattr(r, "connection", h)
225
+ return r
226
+
227
+
228
+ class KeepAliveHTTPHandler(HTTPHandler, KeepAliveBaseHTTPHandler):
229
+ pass
230
+
231
+
232
+ class KeepAliveHTTPSHandler(HTTPSHandler, KeepAliveBaseHTTPHandler):
233
+ pass
234
+
235
+
236
+ _pool = ConnectionPool()
237
+ _http_handler = KeepAliveHTTPHandler()
238
+ _http_handler.pool = _pool
239
+ _https_handler = KeepAliveHTTPSHandler(context=_create_unverified_context())
240
+ _https_handler.pool = _pool
241
+ _cookies = CookieJar()
242
+ _opener: OpenerDirector = build_opener(
243
+ _http_handler,
244
+ _https_handler,
245
+ HTTPCookieProcessor(_cookies),
246
+ )
247
+ setattr(_opener, "cookies", _cookies)
56
248
 
57
249
 
58
250
  class NoRedirectHandler(HTTPRedirectHandler):
251
+
59
252
  def redirect_request(self, /, *args, **kwds):
60
253
  return None
61
254
 
@@ -71,7 +264,7 @@ def urlopen(
71
264
  follow_redirects: bool = True,
72
265
  proxies: None | Mapping[str, str] | Iterable[tuple[str, str]] = None,
73
266
  context: None | SSLContext = None,
74
- cookies: None | CookieJar = None,
267
+ cookies: None | CookieJar | SimpleCookie = None,
75
268
  timeout: None | Undefined | float = undefined,
76
269
  opener: None | OpenerDirector = _opener,
77
270
  **_,
@@ -99,18 +292,51 @@ def urlopen(
99
292
  headers_ = request.headers
100
293
  if opener is None:
101
294
  handlers: list[BaseHandler] = []
295
+ if cookies is None:
296
+ cookies = CookieJar()
102
297
  else:
103
298
  handlers = list(map(copy, getattr(opener, "handlers")))
104
299
  if cookies is None:
105
300
  cookies = getattr(opener, "cookies", None)
106
301
  if cookies and "cookie" not in headers_:
107
- headers_["cookie"] = cookies_dict_to_str(cookies)
108
- if context is not None:
109
- handlers.append(HTTPSHandler(context=context))
110
- elif opener is None:
111
- handlers.append(HTTPSHandler(context=_create_unverified_context()))
112
- if cookies is not None and (opener is None or all(
113
- h.cookiejar is not cookies
302
+ headers_["cookie"] = cookies_to_str(cookies)
303
+ if context is None:
304
+ if opener is None:
305
+ handlers.append(copy(_https_handler))
306
+ else:
307
+ for i, handler in enumerate(handlers):
308
+ if isinstance(handler, KeepAliveHTTPSHandler):
309
+ break
310
+ elif isinstance(handler, HTTPSHandler):
311
+ handlers[i] = copy(_https_handler)
312
+ break
313
+ else:
314
+ handlers.append(copy(_https_handler))
315
+ else:
316
+ https_handler = KeepAliveHTTPSHandler(context=context)
317
+ https_handler.pool = _pool
318
+ if opener is None:
319
+ handlers.append(https_handler)
320
+ else:
321
+ for i, handler in enumerate(handlers):
322
+ if isinstance(handler, HTTPSHandler):
323
+ handlers[i] = https_handler
324
+ break
325
+ else:
326
+ handlers.append(https_handler)
327
+ if opener is None:
328
+ handlers.append(copy(_http_handler))
329
+ else:
330
+ for i, handler in enumerate(handlers):
331
+ if isinstance(handler, KeepAliveHTTPHandler):
332
+ break
333
+ elif isinstance(handler, HTTPHandler):
334
+ handlers[i] = copy(_http_handler)
335
+ break
336
+ else:
337
+ handlers.append(copy(_http_handler))
338
+ if cookies and (opener is None or all(
339
+ h.cookies is not cookies
114
340
  for h in getattr(opener, "handlers") if isinstance(h, HTTPCookieProcessor)
115
341
  )):
116
342
  handlers.append(HTTPCookieProcessor(cookies))
@@ -126,7 +352,7 @@ def urlopen(
126
352
  if timeout is undefined:
127
353
  response = opener.open(request)
128
354
  else:
129
- response = opener.open(request, timeout=cast(None|float, timeout))
355
+ response = opener.open(request, timeout=cast(None | float, timeout))
130
356
  setattr(response, "opener", opener)
131
357
  setattr(response, "cookies", response_cookies)
132
358
  return response
@@ -148,6 +374,7 @@ def request(
148
374
  headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
149
375
  follow_redirects: bool = True,
150
376
  raise_for_status: bool = True,
377
+ cookies: None | CookieJar | SimpleCookie = None,
151
378
  *,
152
379
  parse: None | EllipsisType = None,
153
380
  **request_kwargs,
@@ -164,6 +391,7 @@ def request(
164
391
  headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
165
392
  follow_redirects: bool = True,
166
393
  raise_for_status: bool = True,
394
+ cookies: None | CookieJar | SimpleCookie = None,
167
395
  *,
168
396
  parse: Literal[False],
169
397
  **request_kwargs,
@@ -180,6 +408,7 @@ def request(
180
408
  headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
181
409
  follow_redirects: bool = True,
182
410
  raise_for_status: bool = True,
411
+ cookies: None | CookieJar | SimpleCookie = None,
183
412
  *,
184
413
  parse: Literal[True],
185
414
  **request_kwargs,
@@ -196,6 +425,7 @@ def request[T](
196
425
  headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
197
426
  follow_redirects: bool = True,
198
427
  raise_for_status: bool = True,
428
+ cookies: None | CookieJar | SimpleCookie = None,
199
429
  *,
200
430
  parse: Callable[[HTTPResponse, bytes], T] | Callable[[HTTPResponse], T],
201
431
  **request_kwargs,
@@ -211,6 +441,7 @@ def request[T](
211
441
  headers: None | Mapping[string, string] | Iterable[tuple[string, string]] = None,
212
442
  follow_redirects: bool = True,
213
443
  raise_for_status: bool = True,
444
+ cookies: None | CookieJar | SimpleCookie = None,
214
445
  *,
215
446
  parse: None | EllipsisType| bool | Callable[[HTTPResponse, bytes], T] | Callable[[HTTPResponse], T] = None,
216
447
  **request_kwargs,
@@ -225,6 +456,7 @@ def request[T](
225
456
  files=files,
226
457
  headers=headers,
227
458
  follow_redirects=follow_redirects,
459
+ cookies=cookies,
228
460
  **request_kwargs,
229
461
  )
230
462
  except HTTPError as e:
@@ -332,7 +564,7 @@ def download(
332
564
  return file
333
565
  elif content_length is not None and filesize > content_length:
334
566
  raise OSError(
335
- errno.EIO,
567
+ 5, # errno.EIO
336
568
  f"file {file!r} is larger than url {url!r}: {filesize} > {content_length} (in bytes)",
337
569
  )
338
570
  reporthook_close: None | Callable = None
@@ -353,7 +585,10 @@ def download(
353
585
  response.close()
354
586
  response = urlopen(url, headers={**headers, "Range": "bytes=%d-" % filesize}, **request_kwargs)
355
587
  if not is_range_request(response):
356
- raise OSError(errno.EIO, f"range request failed: {url!r}")
588
+ raise OSError(
589
+ 5, # errno.EIO
590
+ f"range request failed: {url!r}",
591
+ )
357
592
  if reporthook is not None:
358
593
  reporthook(filesize)
359
594
  elif resume:
File without changes
File without changes