konnect.http 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.1
2
+ Name: konnect.http
3
+ Version: 0.1.0
4
+ Summary: Pythonic, asynchronous HTTP client
5
+ Author-email: Dom Sekotill <dom.sekotill@kodo.org.uk>
6
+ Requires-Python: ~=3.11
7
+ Classifier: Development Status :: 3 - Alpha
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
10
+ Classifier: Topic :: Internet
11
+ Requires-Dist: anyio ~=4.0
12
+ Requires-Dist: konnect.curl ~=0.1.0
@@ -0,0 +1,6 @@
1
+ # Copyright 2023-2024 Dom Sekotill <dom.sekotill@kodo.org.uk>
2
+
3
+ from .request import Method as Method
4
+ from .request import Request as Request
5
+ from .response import Response as Response
6
+ from .session import Session as Session
@@ -0,0 +1,39 @@
1
+ # Copyright 2023 Dom Sekotill <dom.sekotill@kodo.org.uk>
2
+
3
+ """
4
+ Authentication handlers for adding auth data to requests and implementing auth flows
5
+
6
+ The two entrypoints for all concrete authentication handler classes are
7
+ `AuthHandler.prepare_request()` for up-front modifications to requests and pre-request
8
+ authentication flows, and `AuthHandler.process_response()` for post-request authentication
9
+ flows.
10
+ """
11
+
12
+ from typing import Protocol
13
+
14
+ from .request import CurlRequest
15
+ from .response import Response
16
+
17
+
18
+ class AuthHandler(Protocol):
19
+ """
20
+ Abstract definition of authentication handlers' entrypoints
21
+ """
22
+
23
+ async def prepare_request(self, request: CurlRequest) -> None:
24
+ """
25
+ Process a request instance before the request is enacted
26
+
27
+ This method can be used by handlers to modify requests (such as adding headers or
28
+ adding session cookies); it is a coroutine to allow handlers to inject an auth-flow
29
+ before the request. Any such flow SHOULD use the request's session.
30
+ """
31
+
32
+ async def process_response(self, request: CurlRequest, response: Response) -> Response:
33
+ """
34
+ Examine a response to a request and perform any follow-up actions
35
+
36
+ This method may return the passed response if the request was authenticated and no
37
+ further actions need to be taken; or further requests can be made if necessary,
38
+ after which a new successful response to an identical request must be returned.
39
+ """
@@ -0,0 +1,4 @@
1
+ # Copyright 2023 Dom Sekotill <dom.sekotill@kodo.org.uk>
2
+
3
+ from .cookie import Cookie as Cookie
4
+ from .cookie import check_cookie as check_cookie
@@ -0,0 +1,137 @@
1
+ # Copyright 2023 Dom Sekotill <dom.sekotill@kodo.org.uk>
2
+
3
+ """
4
+ Module containing the core Cookie class and checking functions
5
+ """
6
+
7
+ from datetime import datetime as DateTime
8
+ from datetime import timedelta as TimeDelta
9
+ from ipaddress import IPv4Address
10
+ from ipaddress import IPv6Address
11
+ from ipaddress import ip_address
12
+ from typing import Self
13
+ from urllib.parse import urlparse
14
+
15
+ from .set_cookie_parser import TokenType
16
+ from .set_cookie_parser import tokenise
17
+
18
+
19
+ class Cookie:
20
+ """
21
+ Client-side cookie data class
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ name: str, value: bytes,
27
+ expires: DateTime|TimeDelta|None,
28
+ domain: str, path: str, *,
29
+ secure: bool = False, httponly: bool = False, exactdomain: bool = True,
30
+ ):
31
+ self.name = name
32
+ self.value = value
33
+ self.expires = (
34
+ None if expires is None else
35
+ expires if isinstance(expires, DateTime) else
36
+ (DateTime.now() + expires)
37
+ )
38
+ self.domain = domain
39
+ self.path = path
40
+ self.secure = secure
41
+ self.httponly = httponly
42
+ self.exactdomain = exactdomain
43
+
44
+ def __bytes__(self) -> bytes:
45
+ return b"=".join((self.name.encode("ascii"), self.value))
46
+
47
+ @classmethod
48
+ def from_header(cls, header: bytes, domain: str, path: str) -> Self:
49
+ """
50
+ Return a `Cookie` from an HTTP "Set-Cookie:" header string
51
+ """
52
+ self = cls("", b"", None, domain, path)
53
+ for token in tokenise(header):
54
+ if token[0] is TokenType.NAME:
55
+ self.name = token[1]
56
+ elif token[0] is TokenType.VALUE:
57
+ self.value = token[1]
58
+ elif token[0] is TokenType.DOMAIN:
59
+ # TODO: check cookie domain allowed
60
+ self.domain = token[1]
61
+ self.exactdomain = False
62
+ elif token[0] is TokenType.PATH:
63
+ # TODO: check cookie path allowed
64
+ self.path = token[1]
65
+ elif token[0] is TokenType.EXPIRES:
66
+ self.expires = token[1]
67
+ elif token[0] is TokenType.MAX_AGE:
68
+ self.expires = DateTime.now() + token[1]
69
+ elif token[0] is TokenType.SECURE:
70
+ self.secure = token[1]
71
+ elif token[0] is TokenType.HTTP_ONLY:
72
+ self.httponly = token[1]
73
+ assert self.name != ""
74
+ return self
75
+
76
+ def as_header(self, header_name: str = "Cookie") -> bytes:
77
+ """
78
+ Return the cookie formatted as an HTTP header
79
+ """
80
+ raise NotImplementedError
81
+
82
+
83
+ def check_cookie(cookie: Cookie, url: str) -> bool:
84
+ """
85
+ Return whether the cookie should be sent with the request
86
+ """
87
+ if cookie.expires and cookie.expires <= DateTime.now():
88
+ return False
89
+
90
+ parts = urlparse(url)
91
+ if not parts.hostname:
92
+ raise ValueError("URLs must be absolute")
93
+
94
+ cookie_host = normalencode_host(cookie.domain)
95
+ url_host = normalencode_host(parts.hostname)
96
+ if cookie_host != url_host:
97
+ if not isinstance(cookie_host, bytes) or not isinstance(url_host, bytes):
98
+ return False
99
+ if cookie.domain[-1] == ".": # Treat trailing `.` as marker for exact match
100
+ return False
101
+ if not url_host.endswith(b"." + cookie_host):
102
+ return False
103
+
104
+ cookie_path = normalise_path(cookie.path)
105
+ url_path = normalise_path(parts.path)
106
+ if cookie_path != url_path:
107
+ prefix, _, suffix = url_path.partition(cookie_path)
108
+ if prefix:
109
+ return False
110
+ if suffix and suffix[0] != "/":
111
+ return False
112
+
113
+ return True
114
+
115
+
116
+ def normalencode_host(host: str) -> bytes|IPv4Address|IPv6Address:
117
+ """
118
+ Turn a host name or address string into a normalised host name or IP address object
119
+
120
+ Host name normalisation includes case lowering, and encoding unicode labels according to
121
+ IDNA standards.
122
+ """
123
+ try:
124
+ return ip_address(host)
125
+ except ValueError:
126
+ return host.strip(".").lower().encode("idna")
127
+
128
+
129
+ def normalise_path(path: str) -> str:
130
+ """
131
+ Normalise a cookie path by stripping leaf components and replacing invalid paths with /
132
+
133
+ Note that the output path *always* ends with a "/".
134
+ """
135
+ if not path or path[0] != "/":
136
+ return "/"
137
+ return path.rsplit("/", 1)[0] + "/"
@@ -0,0 +1,89 @@
1
+ # Copyright 2023 Dom Sekotill <dom.sekotill@kodo.org.uk>
2
+
3
+ """
4
+ Parsing of timestamps used for the "expires" attribute of "Set-Cookie:" headers
5
+
6
+ For historical reasons, although the current standards prefer RFC822#section-5 dates
7
+ (updated by RFC1123#section-5.2.14) as stated in RFC2616#section-3.3.1, user-agents must be
8
+ able to parse a wide variety of formats. This is module provides an implementation of the
9
+ algorithm described in RFC6265#section-5.1.1
10
+ """
11
+
12
+ import re
13
+ from datetime import datetime as DateTime
14
+ from typing import TypedDict
15
+
16
+ __all__ = ["dateparse"]
17
+
18
+ MONTHS = [
19
+ "jan", "feb", "mar", "apr", "may", "jun",
20
+ "jul", "aug", "sep", "oct", "nov", "dec",
21
+ ]
22
+
23
+ DATETIME_RE = re.compile(
24
+ r"""(?ix)
25
+ (?:^|[\t\x20-\x2f\x3b-\x40\x5b-\x60\x7b-\x7e]+) # Start or leading delimiter chars
26
+ (?P<match>
27
+ (?P<month>jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec) |
28
+ (?P<hour>[0-9]{1,2}):(?P<min>[0-9]{1,2}):(?P<sec>[0-9]{1,2}) (?=[^0-9]*) |
29
+ (?P<number>[0-9]{1,4}) (?=[^0-9]*)
30
+ )
31
+ """,
32
+ )
33
+
34
+
35
+ class DateTimeArgs(TypedDict):
36
+ """
37
+ A dictionary of keyword arguments to pass to `DateTime`
38
+ """
39
+
40
+ year: int
41
+ month: int
42
+ day: int
43
+ hour: int
44
+ minute: int
45
+ second: int
46
+
47
+
48
+ def dateparse(date: str) -> DateTime:
49
+ """
50
+ Parse a "Set-Cookie:" expiration date string
51
+ """
52
+ vals = DateTimeArgs(year=0, month=0, day=0, hour=0, minute=0, second=0)
53
+ found_time = found_day = found_month = found_year = False
54
+ for match in DATETIME_RE.finditer(date):
55
+ match match.groupdict():
56
+ case {"hour": str(sh), "min": str(sm), "sec": str(ss)} if not found_time:
57
+ if not (
58
+ (0 <= (hour := int(sh)) < 24) and
59
+ (0 <= (minute := int(sm)) < 60) and
60
+ (0 <= (second := int(ss)) < 60)
61
+ ):
62
+ raise ValueError(f"Invalid time: {sh}:{sm}:{ss}")
63
+ found_time = True
64
+ vals["hour"] = hour
65
+ vals["minute"] = minute
66
+ vals["second"] = second
67
+ case {"number": str(sd)} if not found_day and 1 <= len(sd) <= 2:
68
+ if not (0 < (day := int(sd)) <= 31):
69
+ raise ValueError(f"Invalid day of month: {sd}")
70
+ found_day = True
71
+ vals["day"] = day
72
+ case {"month": str(month)} if not found_month:
73
+ found_month = True
74
+ vals["month"] = 1 + MONTHS.index(month.lower())
75
+ case {"number": str(sy)} if not found_year and 2 <= len(sy) <= 4:
76
+ found_year = True
77
+ year = int(sy)
78
+ if 0 <= year < 70:
79
+ year += 2000
80
+ elif 70 <= year < 100:
81
+ year += 1900
82
+ elif year < 1601:
83
+ raise ValueError("Dates before year 1601 are invalid")
84
+ vals["year"] = year
85
+ case _:
86
+ raise ValueError(f"Unexpected value: {match.group('match')}")
87
+ if not (found_time and found_day and found_month and found_year):
88
+ raise ValueError(f"Incomplete date-time: {date} ({vals})")
89
+ return DateTime(**vals)
@@ -0,0 +1,134 @@
1
+ # Copyright 2023 Dom Sekotill <dom.sekotill@kodo.org.uk>
2
+
3
+ """
4
+ Tokenising of cookies and attributes from "Set-Cookie:" HTTP headers
5
+ """
6
+
7
+ import re
8
+ from collections.abc import Iterator
9
+ from datetime import datetime as DateTime
10
+ from datetime import timedelta as TimeDelta
11
+ from enum import Enum
12
+ from enum import auto
13
+ from typing import Literal
14
+ from typing import TypeAlias
15
+ from typing import Union
16
+ from urllib.parse import unquote as urldecode
17
+
18
+ from .dates import dateparse
19
+
20
+ __all__ = ["tokenise"]
21
+
22
+ DOT = ord(".")
23
+ SLASH = ord("/")
24
+
25
+ HEADER_RE = re.compile(
26
+ rb"""(?ix)
27
+ ^Set-Cookie:
28
+ \s* (?P<name>[^][()<>@,;:\\"/?={} \t]*)
29
+ \s* (?P<value>[=][^;]*)? # value includes "="
30
+ | [;] \s*
31
+ (?P<attr_name> expires | max-age | domain | path | secure | httpOnly ) \s*
32
+ (?:[=] (?P<attr_value>[^;]*))?
33
+ \s*
34
+ """,
35
+ )
36
+
37
+
38
+ class TokenType(Enum):
39
+
40
+ NAME = auto()
41
+ VALUE = auto()
42
+ DOMAIN = auto()
43
+ PATH = auto()
44
+ EXPIRES = auto()
45
+ MAX_AGE = auto()
46
+ SECURE = auto()
47
+ HTTP_ONLY = auto()
48
+
49
+
50
+ Token: TypeAlias = Union[
51
+ tuple[Literal[TokenType.NAME, TokenType.DOMAIN, TokenType.PATH], str],
52
+ tuple[Literal[TokenType.VALUE], bytes],
53
+ tuple[Literal[TokenType.EXPIRES], DateTime],
54
+ tuple[Literal[TokenType.MAX_AGE], TimeDelta],
55
+ tuple[Literal[TokenType.SECURE, TokenType.HTTP_ONLY], bool],
56
+ ]
57
+
58
+
59
+ def tokenise(header: bytes) -> Iterator[Token]:
60
+ """
61
+ Yield tokenised parts of a "Set-Cookie:" header
62
+
63
+ Yields token (name, value) tuples; the type of a token value is token dependant.
64
+ The algorithm used is the more permissive one for user agents in RFC6265#section-5.1
65
+
66
+ This parser is guaranteed to yield the following tokens or raise `ValueError`:
67
+
68
+ - name: a non-empty raw byte string
69
+ - value: a raw byte string that may be empty
70
+ - secure: boolean
71
+ - http-only: boolean
72
+
73
+ The parser may optionally yield the following tokens:
74
+
75
+ - domain: a normalised, decoded domain name (str)
76
+ - path: a decoded, absolute path value (str)
77
+ - expires: a `datetime.datetime` instance
78
+ - max-age: a `datetime.timedelta` instance
79
+ """
80
+ if not header.startswith(b"Set-Cookie:"):
81
+ raise ValueError(f"Not a Set-Cookie header: {header!r}")
82
+
83
+ secure = http_only = False
84
+
85
+ for match in HEADER_RE.finditer(header):
86
+ name, value = match.group("name", "value")
87
+ if name is None:
88
+ name, value = match.group("attr_name", "attr_value")
89
+ assert name is not None
90
+ elif name == b"":
91
+ raise ValueError("Cookies require a name")
92
+ elif value is None:
93
+ raise ValueError("Cookies require a value")
94
+ else:
95
+ yield TokenType.NAME, name.strip().decode("ascii")
96
+ yield TokenType.VALUE, value.strip()[1:] # strip leading "="
97
+ continue
98
+
99
+ match (name.strip().lower(), value):
100
+ case [b"expires", bytes(expires)]:
101
+ yield TokenType.EXPIRES, dateparse(expires.decode("ascii"))
102
+ case [b"max-age", bytes(max_age)]:
103
+ yield TokenType.MAX_AGE, TimeDelta(seconds=int(max_age, 10))
104
+ case [b"domain", bytes(domain)]:
105
+ domain = domain.strip()
106
+ if domain[-1] == DOT:
107
+ continue # domains ending with "." MUST be ignored
108
+ if domain[0] == DOT:
109
+ domain = domain.lstrip(b".")
110
+ # TODO: Additional check for disallowed chars?
111
+ yield TokenType.DOMAIN, domain.decode("idna").lower()
112
+ case [b"path", bytes(path)]:
113
+ path = path.strip()
114
+ if path[0] != SLASH:
115
+ continue # relative paths MUST be ignored
116
+ yield TokenType.PATH, urldecode(path)
117
+ case [b"secure", _]:
118
+ secure = True
119
+ yield TokenType.SECURE, True
120
+ case [b"httponly", _]:
121
+ http_only = True
122
+ yield TokenType.HTTP_ONLY, True
123
+ case [b"expires" | b"max-age" | b"domain" | b"path", None]:
124
+ raise ValueError(f"Cookie attribute {name.decode()!r} requires a value")
125
+ case _: # pragma: no-cover
126
+ raise RuntimeError(
127
+ f"Unhandled attribute or missing value"
128
+ f" ({match.group(0)!r} -> {match.groupdict()})",
129
+ )
130
+
131
+ if not secure:
132
+ yield TokenType.SECURE, False
133
+ if not http_only:
134
+ yield TokenType.HTTP_ONLY, False
@@ -0,0 +1,11 @@
1
+ # Copyright 2023 Dom Sekotill <dom.sekotill@kodo.org.uk>
2
+
3
+ """
4
+ Publicly exposed exception classes used in the package
5
+ """
6
+
7
+
8
+ class UnsupportedSchemeError(ValueError):
9
+ """
10
+ An exception for non-HTTP URL schemes
11
+ """
File without changes
@@ -0,0 +1,375 @@
1
+ # Copyright 2023-2024 Dom Sekotill <dom.sekotill@kodo.org.uk>
2
+
3
+ """
4
+ Module providing a `konnect.curl.Request` implementation for HTTP requests
5
+
6
+ Using the `Request` class directly allows for finer-grained control of a request, including
7
+ asynchronously sending chunked data.
8
+
9
+ For many uses, there is a simple interface supplied by the `Session` class which does not
10
+ require users to interact directly with the classes supplied in this module.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from enum import Enum
16
+ from enum import Flag
17
+ from enum import auto
18
+ from ipaddress import IPv4Address
19
+ from ipaddress import IPv6Address
20
+ from pathlib import Path
21
+ from typing import TYPE_CHECKING
22
+ from typing import Literal
23
+ from typing import Mapping
24
+ from typing import TypeAlias
25
+ from typing import Union
26
+ from urllib.parse import urlparse
27
+
28
+ from konnect.curl import MILLISECONDS
29
+ from pycurl import *
30
+
31
+ from .cookies import check_cookie
32
+ from .exceptions import UnsupportedSchemeError
33
+ from .response import ReadStream
34
+ from .response import Response
35
+
36
+ if TYPE_CHECKING:
37
+ from .authenticators import AuthHandler
38
+ from .session import Session
39
+
40
+ ServiceIdentifier: TypeAlias = tuple[Literal["http", "https"], str]
41
+ TransportInfo: TypeAlias = Union[
42
+ tuple[IPv4Address | IPv6Address | str, int],
43
+ Path,
44
+ ]
45
+
46
+
47
+ __all__ = [
48
+ "Method",
49
+ "Transport",
50
+ "Request",
51
+ ]
52
+
53
+
54
+ class Method(Enum):
55
+ """
56
+ HTTP methods supported by konnect.http
57
+ """
58
+
59
+ GET = auto()
60
+ HEAD = auto()
61
+ PUT = auto()
62
+ POST = auto()
63
+ PATCH = auto()
64
+ DELETE = auto()
65
+
66
+
67
+ class Transport(Flag):
68
+ """
69
+ Transport layer types
70
+ """
71
+
72
+ TCP = auto()
73
+ UNIX = auto()
74
+ TLS = auto()
75
+
76
+
77
+ class Phase(Enum):
78
+
79
+ INITIAL = auto()
80
+ HEADERS = auto()
81
+ BODY_START = auto()
82
+ BODY_CHUNKS = auto()
83
+ TRAILERS = auto()
84
+
85
+
86
+ class Request:
87
+ """
88
+ This class provides the user-callable API for requests
89
+ """
90
+
91
+ def __init__(self, session: Session, method: Method, url: str):
92
+ self._request = CurlRequest(session, method, url)
93
+
94
+ def __repr__(self) -> str:
95
+ return f"<Request {self._request.method.name} {self._request.url}>"
96
+
97
+ @property
98
+ def session(self) -> Session:
99
+ """
100
+ Wrap the underlying request's session attribute
101
+ """
102
+ return self._request.session
103
+
104
+ @property
105
+ def method(self) -> Method:
106
+ """
107
+ Wrap the underlying request's method attribute
108
+ """
109
+ return self._request.method
110
+
111
+ @property
112
+ def url(self) -> str:
113
+ """
114
+ Wrap the underlying request's url attribute
115
+ """
116
+ return self._request.url
117
+
118
+ async def write(self, data: bytes, /) -> None:
119
+ """
120
+ Write data to an upload request
121
+
122
+ Signal an EOF by writing b""
123
+ """
124
+ await self._request.write(data)
125
+
126
+ async def get_response(self) -> Response:
127
+ """
128
+ Progress the request far enough to create a `Response` object and return it
129
+ """
130
+ return await self._request.get_response()
131
+
132
+
133
+ class CurlRequest:
134
+ """
135
+ This class provides the `konnect.curl.Request` interface, callbacks and internal API
136
+
137
+ It is not intended to be used directly by users.
138
+ """
139
+
140
+ def __init__(self, session: Session, method: Method, url: str):
141
+ self.session = session
142
+ self.method = method
143
+ self.url = url
144
+ self._handle: Curl|None = None
145
+ self._response: Response|None = None
146
+ self._phase = Phase.INITIAL
147
+ self._upcomplete = False
148
+ self._data = b""
149
+
150
+ def configure_handle(self, handle: Curl) -> None:
151
+ """
152
+ Configure a konnect.curl.Curl handle for this request
153
+
154
+ This is part of the `konnect.curl.Request` interface.
155
+ """
156
+ self._handle = handle
157
+
158
+ handle.setopt(URL, self.url)
159
+
160
+ match self.method:
161
+ case Method.HEAD:
162
+ handle.setopt(NOBODY, True)
163
+ case Method.PUT:
164
+ handle.setopt(UPLOAD, True)
165
+ handle.setopt(INFILESIZE, -1)
166
+ handle.setopt(READFUNCTION, self._process_input)
167
+ case Method.POST:
168
+ handle.setopt(POST, True)
169
+ handle.setopt(READFUNCTION, self._process_input)
170
+ case Method.PATCH:
171
+ handle.setopt(CUSTOMREQUEST, "PATCH")
172
+ handle.setopt(UPLOAD, True)
173
+ handle.setopt(INFILESIZE, -1)
174
+ handle.setopt(READFUNCTION, self._process_input)
175
+ case Method.DELETE:
176
+ handle.setopt(CUSTOMREQUEST, "DELETE")
177
+
178
+ match get_transport(self.session.transports, self.url):
179
+ case Path() as path:
180
+ handle.setopt(UNIX_SOCKET_PATH, path.as_posix())
181
+ case [(IPv4Address() | IPv6Address() | str()) as host, int(port)]:
182
+ handle.setopt(CONNECT_TO, [f"::{host}:{port}"])
183
+ case transport:
184
+ raise TypeError(f"Unknown transport: {transport!r}")
185
+
186
+ handle.setopt(COOKIE, self.get_cookies())
187
+
188
+ handle.setopt(VERBOSE, 0)
189
+ handle.setopt(NOPROGRESS, 1)
190
+
191
+ handle.setopt(TIMEOUT_MS, self.session.timeout // MILLISECONDS)
192
+ handle.setopt(CONNECTTIMEOUT_MS, self.session.connect_timeout // MILLISECONDS)
193
+
194
+ handle.setopt(PIPEWAIT, 1)
195
+ handle.setopt(DEFAULT_PROTOCOL, "https")
196
+ # handle.setopt(PROTOCOLS_STR, "http,https")
197
+ # handle.setopt(REDIR_PROTOCOLS_STR, "http,https")
198
+ handle.setopt(PROTOCOLS, PROTO_HTTP|PROTO_HTTPS)
199
+ handle.setopt(REDIR_PROTOCOLS, PROTO_HTTP|PROTO_HTTPS)
200
+ handle.setopt(HEADERFUNCTION, self._process_header)
201
+ handle.setopt(WRITEFUNCTION, self._process_body)
202
+
203
+ def has_response(self) -> bool:
204
+ """
205
+ Return whether calling `response()` will return a value or raise `LookupError`
206
+
207
+ This is part of the `konnect.curl.Request` interface.
208
+ """
209
+ match self._phase:
210
+ case Phase.BODY_START:
211
+ assert self._response is not None
212
+ return self._response.code >= 200
213
+ case Phase.BODY_CHUNKS:
214
+ return self._data != b""
215
+ return False
216
+
217
+ def response(self) -> Response|bytes:
218
+ """
219
+ Return a waiting response or raise `LookupError` if there is none
220
+
221
+ See `has_response()` for checking for waiting responses.
222
+
223
+ This is part of the `konnect.curl.Request` interface.
224
+ """
225
+ if self._phase == Phase.BODY_START:
226
+ self._phase = Phase.BODY_CHUNKS
227
+ assert self._response is not None
228
+ if self._response.code < 200:
229
+ raise LookupError
230
+ return self._response
231
+ if self._phase != Phase.BODY_CHUNKS or not self._data:
232
+ raise LookupError
233
+ data, self._data = self._data, b""
234
+ return data
235
+
236
+ def completed(self) -> bytes:
237
+ """
238
+ Complete the transfer by returning the final stream bytes
239
+
240
+ This is part of the `konnect.curl.Request` interface.
241
+ """
242
+ assert self._phase == Phase.BODY_CHUNKS
243
+ data, self._data = self._data, b""
244
+ return data
245
+
246
+ async def write(self, data: bytes, /) -> None:
247
+ """
248
+ Write data to an upload request
249
+
250
+ Signal an EOF by writing b""
251
+ """
252
+ # TODO: apply back-pressure when self._data reaches a certain length
253
+ # TODO: use a nicer buffer implementation than just appending
254
+ if data == b"":
255
+ self._upcomplete = True
256
+ elif self._data:
257
+ self._data += data
258
+ else:
259
+ self._data = data
260
+ if self._handle:
261
+ self._handle.pause(PAUSE_CONT)
262
+
263
+ def _process_input(self, size: int) -> bytes|int:
264
+ if self._data:
265
+ data, self._data = self._data[:size], self._data[size:]
266
+ return data
267
+ if self._upcomplete:
268
+ return b""
269
+ return READFUNC_PAUSE
270
+
271
+ def _process_header(self, data: bytes) -> None:
272
+ if data.startswith(b"HTTP/"):
273
+ self._phase = Phase.HEADERS
274
+ stream = ReadStream(self)
275
+ self._response = Response(data.decode("ascii"), stream)
276
+ return
277
+ assert self._response is not None
278
+ if data == b"\r\n":
279
+ self._phase = Phase.BODY_START
280
+ return
281
+ if self._phase not in (Phase.HEADERS, Phase.TRAILERS):
282
+ self._phase = Phase.TRAILERS
283
+ self._response.headers.append(self._split_field(data))
284
+
285
+ def _split_field(self, field: bytes) -> tuple[str, bytes]:
286
+ assert self._response is not None
287
+ name, has_sep, value = field.partition(b":")
288
+ if has_sep:
289
+ # TODO: test performance of str.lower() vs. bytes.lower()
290
+ return name.lower().decode("ascii"), value.strip()
291
+ try:
292
+ lname = self._response.headers[-1][0]
293
+ except IndexError:
294
+ raise ValueError("Non-field value when reading HTTP message fields")
295
+ else:
296
+ raise ValueError(f"Non-compliant multi-line field: {lname}")
297
+
298
+ def _process_body(self, data: bytes) -> None:
299
+ self._data += data
300
+
301
+ async def get_response(self) -> Response:
302
+ """
303
+ Progress the request far enough to create a `Response` object and return it
304
+ """
305
+ if self._phase != Phase.INITIAL:
306
+ raise RuntimeError("get_response() can only be called on an unstarted request")
307
+ auth = get_authenticator(self.session.auth, self.url)
308
+ if auth is not None:
309
+ await auth.prepare_request(self)
310
+ resp = await self.session.multi.process(self)
311
+ assert isinstance(resp, Response)
312
+ if auth is not None:
313
+ resp = await auth.process_response(self, resp)
314
+ return resp
315
+
316
+ async def get_data(self) -> bytes:
317
+ """
318
+ Return chunks of received data from the body of the response to the request
319
+ """
320
+ if self._phase != Phase.BODY_CHUNKS:
321
+ raise RuntimeError("get_data() can only be called after get_response()")
322
+ data = await self.session.multi.process(self)
323
+ assert isinstance(data, bytes), repr(data)
324
+ return data
325
+
326
+ def get_cookies(self) -> bytes:
327
+ """
328
+ Return the encoded cookie values to be sent with the request
329
+ """
330
+ return b"; ".join(
331
+ bytes(cookie)
332
+ for cookie in self.session.cookies
333
+ if check_cookie(cookie, self.url)
334
+ )
335
+
336
+
337
+ def get_transport(
338
+ transports: Mapping[ServiceIdentifier, TransportInfo],
339
+ url: str,
340
+ ) -> TransportInfo:
341
+ """
342
+ For a given http:// or https:// URL, return suitable transport layer information
343
+ """
344
+ parts = urlparse(url)
345
+ if parts.hostname is None:
346
+ raise ValueError("An absolute URL is required")
347
+ match parts.scheme:
348
+ case "https" as scheme:
349
+ default_port = 443
350
+ case "http" as scheme:
351
+ default_port = 80
352
+ case _:
353
+ raise UnsupportedSchemeError(url)
354
+ try:
355
+ return transports[scheme, parts.netloc]
356
+ except KeyError:
357
+ return parts.hostname, parts.port or default_port
358
+
359
+
360
+ def get_authenticator(
361
+ authenticators: Mapping[ServiceIdentifier, AuthHandler],
362
+ url: str,
363
+ ) -> AuthHandler|None:
364
+ """
365
+ For a given http:// or https:// URL, return any `AuthHandler` associated with it
366
+ """
367
+ parts = urlparse(url)
368
+ if parts.hostname is None:
369
+ raise ValueError("An absolute URL is required")
370
+ if parts.scheme not in ("http", "https"):
371
+ raise UnsupportedSchemeError(url)
372
+ try:
373
+ return authenticators[parts.scheme, parts.netloc] # type: ignore[index]
374
+ except KeyError:
375
+ return None
@@ -0,0 +1,191 @@
1
+ # Copyright 2023-2024 Dom Sekotill <dom.sekotill@kodo.org.uk>
2
+
3
+ """
4
+ Response classes for HTTP requests
5
+
6
+ Instances of the classes contained in this module are not created directly by users, instead
7
+ they are returned from `konnect.http.Session` methods. If needed for typing, they are
8
+ exported from `konnect.http`.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from asyncio import IncompleteReadError
14
+ from asyncio import LimitOverrunError
15
+ from collections.abc import AsyncIterator
16
+ from http import HTTPStatus
17
+ from typing import TYPE_CHECKING
18
+
19
+ from anyio import EndOfStream
20
+
21
+ if TYPE_CHECKING:
22
+ from .request import CurlRequest
23
+
24
+
25
+ class ReadStream:
26
+ """
27
+ A readable stream for response bodies
28
+
29
+ This class implements the methods for `asyncio.StreamReader` and
30
+ `anyio.abc.ByteReceiveStream`, allowing it to be passed to library functions that may
31
+ require either of those interfaces.
32
+ """
33
+
34
+ def __init__(self, request: CurlRequest) -> None:
35
+ self.request: CurlRequest|None = request
36
+ self._buffer = b""
37
+
38
+ async def __aiter__(self) -> AsyncIterator[bytes]:
39
+ try:
40
+ while (chunk := await self.receive()):
41
+ yield chunk
42
+ except EndOfStream:
43
+ return
44
+
45
+ async def aclose(self) -> None:
46
+ """
47
+ Close the stream
48
+
49
+ Implements `anyio.abc.AsyncResource.aclose()`
50
+ """
51
+ self.request = None
52
+
53
+ async def _receive(self) -> bytes:
54
+ # Wait until a chunk is available, and return it. Raise EndOfStream if indicated
55
+ # with an empty byte chunk.
56
+ if self._buffer:
57
+ data, self._buffer = self._buffer, b""
58
+ return data
59
+ if self.request is None:
60
+ raise EndOfStream
61
+ if (data := await self.request.get_data()) == b"":
62
+ self.request = None
63
+ raise EndOfStream
64
+ return data
65
+
66
+ async def receive(self, max_bytes: int = 65536, /) -> bytes:
67
+ """
68
+ Read and return up to `max_bytes` bytes from the stream
69
+
70
+ Implements `anyio.abc.ByteReceiveStream.receive()`
71
+ """
72
+ data = await self._receive()
73
+ if max_bytes >= 0:
74
+ data, self._buffer = data[:max_bytes], data[max_bytes:]
75
+ return data
76
+
77
+ async def readuntil(self, separator: bytes = b'\n') -> bytes:
78
+ """
79
+ Read and return up-to and including the first instance of `separator` in the stream
80
+
81
+ If an EOF occurs before encountering the separator `IncompleteReadError` is raised.
82
+ If the separator is not encountered within the configured buffer size limit for the
83
+ stream, `LimitOverrunError` is raised and the buffer left intact.
84
+
85
+ Implements `asyncio.StreamReader.readuntil()`
86
+ """
87
+ chunks = list[bytes]()
88
+ length = 0
89
+ split = -1
90
+ while split < 0:
91
+ try:
92
+ data = await self._receive()
93
+ except EndOfStream:
94
+ raise IncompleteReadError(b''.join(chunks), None)
95
+ if (split := data.find(separator)) >= 0:
96
+ split += len(separator)
97
+ assert len(data) >= split
98
+ data, self._buffer = data[:split], data[split:]
99
+ chunks.append(data)
100
+ length += len(data)
101
+ return b''.join(chunks)
102
+
103
+ async def read(self, max_size: int = -1, /) -> bytes:
104
+ """
105
+ Read and return up to `max_size` bytes from the stream
106
+
107
+ Be cautious about calling this with a non-positive `max_size` as the entire stream
108
+ will be stored in memory.
109
+
110
+ Implements `asyncio.StreamReader.read()`
111
+ """
112
+ if max_size >= 0:
113
+ try:
114
+ return await self.receive(max_size)
115
+ except EndOfStream:
116
+ return b""
117
+ # Collect ALL THE DATA and return it
118
+ chunks = list[bytes]()
119
+ try:
120
+ while (chunk := await self.receive()):
121
+ chunks.append(chunk)
122
+ except EndOfStream:
123
+ pass
124
+ return b''.join(chunks)
125
+
126
+ async def readline(self) -> bytes:
127
+ r"""
128
+ Read and return one '\n' terminated line from the stream
129
+
130
+ Unlike `readuntil()` an incomplete line will be returned of an EOF occurs, and
131
+ `ValueError` is raised instead of `LimitOverrunError`. In the event of
132
+ a `LimitOverrunError` the buffer is also cleared.
133
+
134
+ This implementation differs very slightly from Asyncio's, as the behaviour described
135
+ there is a hot mess. It is *highly* recommended you use `readuntil` instead.
136
+
137
+ Implements `asyncio.StreamReader.readline()`
138
+ """
139
+ try:
140
+ return await self.readuntil(b'\n')
141
+ except IncompleteReadError as exc:
142
+ return exc.partial
143
+ except LimitOverrunError as exc:
144
+ self._buffer = b""
145
+ raise ValueError(exc.args[0])
146
+
147
+ async def readexactly(self, size: int, /) -> bytes:
148
+ """
149
+ Read and return exactly `size` bytes from the stream
150
+
151
+ Implements `asyncio.StreamReader.readexactly()`
152
+ """
153
+ chunks = list[bytes]()
154
+ try:
155
+ while size > 0:
156
+ chunks.append(chunk := await self.receive(size))
157
+ size -= len(chunk)
158
+ assert size == 0, "ReadStream.receive() returned too many bytes"
159
+ except EndOfStream:
160
+ IncompleteReadError(b''.join(chunks), size)
161
+ return b''.join(chunks)
162
+
163
+ def at_eof(self) -> bool:
164
+ """
165
+ Return `True` if the buffer is empty and an end-of-file has been indicated
166
+ """
167
+ return not self._buffer and self.request is None
168
+
169
+
170
+ class Response:
171
+ """
172
+ A class for response details, and header and body accessors
173
+ """
174
+
175
+ def __init__(self, response: str, stream: ReadStream):
176
+ match response.split(maxsplit=2):
177
+ case [version, response, status]:
178
+ self.version = version
179
+ self.code = HTTPStatus(int(response))
180
+ self.status = status.strip()
181
+ case [version, response]:
182
+ self.version = version
183
+ self.code = HTTPStatus(int(response))
184
+ self.status = self.code.phrase
185
+ case _:
186
+ raise ValueError
187
+ self.stream = stream
188
+ self.headers = list[tuple[str, bytes]]()
189
+
190
+ def __repr__(self) -> str:
191
+ return f"<Response {self.code} {self.status}>"
@@ -0,0 +1,180 @@
1
+ # Copyright 2023 Dom Sekotill <dom.sekotill@kodo.org.uk>
2
+
3
+ """
4
+ Sessions are the primary entrypoint for users
5
+
6
+ Sessions handle global, prepared, shared state for requests. They are also the primary
7
+ entrypoint for users, abstracting away request generation and scheduling, and yielding
8
+ responses for users to consume.
9
+
10
+ > **Note:**
11
+ > Unlike the `requests` package, there are no top-level functions for generating requests
12
+ > and producing responses, as they would have to be synchronous.
13
+
14
+ The `Session` class has several request methods which return `Response` objects. These are
15
+ conveniences for creating `Request` objects, writing data to them (if appropriate for the
16
+ HTTP method), and awaiting a response from them.
17
+ """
18
+
19
+ from typing import Self
20
+ from urllib.parse import urlparse
21
+
22
+ from konnect.curl import SECONDS
23
+ from konnect.curl import Multi
24
+ from konnect.curl import Time
25
+ from konnect.curl.scalars import Quantity
26
+
27
+ from .authenticators import AuthHandler
28
+ from .cookies import Cookie
29
+ from .exceptions import UnsupportedSchemeError
30
+ from .request import Method
31
+ from .request import Request
32
+ from .request import ServiceIdentifier
33
+ from .request import TransportInfo
34
+ from .response import Response
35
+
36
+
37
+ class Session:
38
+ """
39
+ A shared request state class
40
+
41
+ Users *should* use a `Session` instance as an asynchronous context manager.
42
+
43
+ Users can provide a shared `Multi` object to allow connections to be shared between
44
+ sessions (or even different protocol clients), otherwise a new `Multi` object is
45
+ created; either option is safe in a single threaded environment but `Multi` objects
46
+ must not be shared between threads.
47
+
48
+ Users may also inject a subclass of `Request` to be used by the various methods that
49
+ return `Response` objects; the return object is the result of calling
50
+ `Request.get_response()`.
51
+ """
52
+
53
+ # TODO: cookiejars
54
+ # TODO: proxies
55
+
56
+ def __init__(
57
+ self, *,
58
+ multi: Multi|None = None,
59
+ request_class: type[Request] = Request,
60
+ ) -> None:
61
+ self.multi = multi or Multi()
62
+ self.request_class = request_class
63
+ self.timeout: Quantity[Time] = 0 @ SECONDS
64
+ self.connect_timeout: Quantity[Time] = 300 @ SECONDS
65
+ self.transports = dict[ServiceIdentifier, TransportInfo]()
66
+ self.auth = dict[ServiceIdentifier, AuthHandler]()
67
+ self.cookies = set[Cookie]()
68
+
69
+ async def __aenter__(self) -> Self:
70
+ # For future use; likely downloading PAC files if used for proxies
71
+ return self
72
+
73
+ async def __aexit__(self, *exc_info: object) -> None:
74
+ return
75
+
76
+ async def head(self, url: str) -> Response:
77
+ """
78
+ Perform an HTTP HEAD request
79
+ """
80
+ req = self.request_class(self, Method.HEAD, url)
81
+ return await req.get_response()
82
+
83
+ async def get(self, url: str) -> Response:
84
+ """
85
+ Perform an HTTP GET request
86
+ """
87
+ req = self.request_class(self, Method.GET, url)
88
+ return await req.get_response()
89
+
90
+ async def put(self, url: str, data: bytes) -> Response:
91
+ """
92
+ Perform a simple HTTP PUT request with in-memory data
93
+ """
94
+ req = self.request_class(self, Method.PUT, url)
95
+ await req.write(data)
96
+ await req.write(b"")
97
+ return await req.get_response()
98
+
99
+ async def post(self, url: str, data: bytes) -> Response:
100
+ """
101
+ Perform a simple HTTP POST request with in-memory data
102
+ """
103
+ req = self.request_class(self, Method.POST, url)
104
+ await req.write(data)
105
+ await req.write(b"")
106
+ return await req.get_response()
107
+
108
+ async def patch(self, url: str, data: bytes) -> Response:
109
+ """
110
+ Perform a simple HTTP PATCH request with in-memory data
111
+ """
112
+ req = self.request_class(self, Method.PATCH, url)
113
+ await req.write(data)
114
+ await req.write(b"")
115
+ return await req.get_response()
116
+
117
+ async def delete(self, url: str) -> Response:
118
+ """
119
+ Perform an HTTP DELETE request
120
+ """
121
+ req = self.request_class(self, Method.DELETE, url)
122
+ return await req.get_response()
123
+
124
+ def add_redirect(self, url: str, target: TransportInfo) -> None:
125
+ """
126
+ Add a redirect for a URL base to a target address/port
127
+
128
+ The URL base should be a schema and 'hostname[:port]' only,
129
+ e.g. `"http://example.com"`; anything else will be ignored but may have an effect in
130
+ future releases.
131
+ """
132
+ parts = urlparse(url)
133
+ if parts.scheme not in ("http", "https"):
134
+ raise UnsupportedSchemeError(url)
135
+ self.transports[parts.scheme, parts.netloc] = target # type: ignore[index]
136
+
137
+ def remove_redirect(self, url: str) -> None:
138
+ """
139
+ Remove a redirect for a URL base
140
+
141
+ See `add_redirect()` for the format of the URL base.
142
+ """
143
+ parts = urlparse(url)
144
+ if parts.scheme not in ("http", "https"):
145
+ raise UnsupportedSchemeError(url)
146
+ del self.transports[parts.scheme, parts.netloc] # type: ignore[arg-type]
147
+
148
+ def add_authentication(self, url: str, authenticator: AuthHandler) -> None:
149
+ """
150
+ Add an authentication handler to use when accessing URLs under the given URL base
151
+
152
+ The URL base should be a schema and 'hostname[:port]' only,
153
+ e.g. `"http://example.com"`; anything else will be ignored but may have an effect in
154
+ future releases.
155
+ """
156
+ parts = urlparse(url)
157
+ if parts.scheme not in ("http", "https"):
158
+ raise UnsupportedSchemeError(url)
159
+ self.auth[parts.scheme, parts.netloc] = authenticator # type: ignore[index]
160
+
161
+ def remove_authentication(self, url: str) -> None:
162
+ """
163
+ Remove an authentication handler for a URL base
164
+
165
+ See `add_authentication()` for the format of the URL base
166
+ """
167
+ parts = urlparse(url)
168
+ if parts.scheme not in ("http", "https"):
169
+ raise UnsupportedSchemeError(url)
170
+ del self.auth[parts.scheme, parts.netloc] # type: ignore[arg-type]
171
+
172
+ def add_cookie(self, url: str, name: str, value: bytes) -> None:
173
+ """
174
+ Add a cookie for the given URL base
175
+ """
176
+ parts = urlparse(url)
177
+ if parts.hostname is None:
178
+ raise ValueError(f"a hostname is required in URL: {url}")
179
+ cookie = Cookie(name, value, None, parts.hostname, parts.path, secure=(parts.scheme == "https"))
180
+ self.cookies.add(cookie)
@@ -0,0 +1,88 @@
1
+ [build-system]
2
+ requires = ["flit_core >=3.2,<4"]
3
+ build-backend = "flit_core.buildapi"
4
+
5
+ [[project.authors]]
6
+ name = "Dom Sekotill"
7
+ email = "dom.sekotill@kodo.org.uk"
8
+
9
+ [project]
10
+ name = "konnect.http"
11
+ version = "0.1.0"
12
+ description = "Pythonic, asynchronous HTTP client"
13
+
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
18
+ "Topic :: Internet",
19
+ ]
20
+
21
+ requires-python = "~=3.11"
22
+ dependencies = [
23
+ "anyio ~=4.0",
24
+ "konnect.curl ~=0.1.0",
25
+ ]
26
+
27
+ [tool.isort]
28
+ force_single_line = true
29
+ line_length = 92
30
+
31
+ [tool.unimport]
32
+ ignore-init = true
33
+
34
+
35
+ [tool.flakeheaven]
36
+ base = "https://code.kodo.org.uk/dom/project-templates/-/raw/main/.flakerules.toml"
37
+ colored = true
38
+ max_line_length = 92
39
+ max_doc_length = 92
40
+
41
+ [tool.flakeheaven.plugins]
42
+ flake8-return = ["-R504"]
43
+
44
+ [tool.flakeheaven.exceptions."examples/"]
45
+ flake8-print = ["-*"]
46
+
47
+
48
+ [tool.mypy]
49
+ strict = true
50
+ namespace_packages = true
51
+ explicit_package_bases = true
52
+ allow_redefinition = true
53
+ warn_unused_configs = true
54
+ warn_unreachable = true
55
+
56
+
57
+ [tool.coverage.run]
58
+ data_file = "results/coverage.db"
59
+ branch = true
60
+ source = ["konnect"]
61
+
62
+ [tool.coverage.report]
63
+ precision = 2
64
+ skip_empty = true
65
+ exclude_lines = [
66
+ "pragma: no-cover",
67
+ "if .*\\b__name__\\b",
68
+ "if .*\\bTYPE_CHECKING\\b",
69
+ "class .*(.*\\bProtocol\\b.*):",
70
+ "def __repr__",
71
+ "@overload",
72
+ "@(abc\\.)abstractmethod",
73
+ ]
74
+ partial_branches = [
75
+ "pragma: no-branch",
76
+ "if .*\\b__debug__\\b",
77
+ ]
78
+
79
+ [tool.coverage.json]
80
+ output = "results/coverage.json"
81
+ show_contexts = true
82
+
83
+ [tool.coverage.xml]
84
+ output = "results/coverage.xml"
85
+
86
+ [tool.coverage.html]
87
+ directory = "results/coverage"
88
+ show_contexts = true