amazon-polly-streaming 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- amazon_polly_streaming/__init__.py +20 -0
- amazon_polly_streaming/_buffered_stream.py +102 -0
- amazon_polly_streaming/_connection_pool.py +229 -0
- amazon_polly_streaming/_event_signer.py +129 -0
- amazon_polly_streaming/_eventstream.py +155 -0
- amazon_polly_streaming/_http2.py +275 -0
- amazon_polly_streaming/client.py +295 -0
- amazon_polly_streaming/exceptions.py +57 -0
- amazon_polly_streaming/py.typed +0 -0
- amazon_polly_streaming-1.0.0.dist-info/METADATA +84 -0
- amazon_polly_streaming-1.0.0.dist-info/RECORD +15 -0
- amazon_polly_streaming-1.0.0.dist-info/WHEEL +5 -0
- amazon_polly_streaming-1.0.0.dist-info/licenses/LICENSE +201 -0
- amazon_polly_streaming-1.0.0.dist-info/licenses/NOTICE +2 -0
- amazon_polly_streaming-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Amazon Polly bidirectional streaming over HTTP/2 with SigV4."""
|
|
2
|
+
|
|
3
|
+
from amazon_polly_streaming.client import PollyStreamingClient
|
|
4
|
+
from amazon_polly_streaming.exceptions import (
|
|
5
|
+
ServiceException,
|
|
6
|
+
ServiceFailureException,
|
|
7
|
+
ServiceQuotaExceededException,
|
|
8
|
+
ThrottlingException,
|
|
9
|
+
ValidationException,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"PollyStreamingClient",
|
|
14
|
+
"ServiceException",
|
|
15
|
+
"ServiceFailureException",
|
|
16
|
+
"ServiceQuotaExceededException",
|
|
17
|
+
"ThrottlingException",
|
|
18
|
+
"ValidationException",
|
|
19
|
+
]
|
|
20
|
+
__version__ = "1.0.0"
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""Writable, non-blocking byte stream used as HTTP/2 request body channel.
|
|
2
|
+
|
|
3
|
+
`awscrt`'s HTTP/2 client reads the request body from a file-like object on its
|
|
4
|
+
own I/O thread, calling `read(size)` repeatedly until the stream signals EOF.
|
|
5
|
+
For bidirectional streaming we want to write events to the body channel
|
|
6
|
+
incrementally from the application's asyncio thread, after the request has
|
|
7
|
+
already been opened.
|
|
8
|
+
|
|
9
|
+
`BufferableByteStream` exposes that pattern by behaving as a non-blocking
|
|
10
|
+
file-like object: `read` raises `BlockingIOError` when no data is currently
|
|
11
|
+
available, signalling `awscrt` to retry later; once `end_stream` is called and
|
|
12
|
+
all buffered chunks have been read, `read` returns `b""` (EOF) and the HTTP/2
|
|
13
|
+
stream is closed cleanly.
|
|
14
|
+
|
|
15
|
+
Pattern adapted from the `amazon-transcribe-streaming-sdk` Python package
|
|
16
|
+
(Apache 2.0), which uses the same approach with `awscrt`.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from io import BufferedIOBase
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class BufferableByteStream(BufferedIOBase):
|
|
25
|
+
"""Non-blocking, append-only byte buffer for HTTP/2 request body channels."""
|
|
26
|
+
|
|
27
|
+
def __init__(self) -> None:
|
|
28
|
+
"""Initialize an empty, open stream."""
|
|
29
|
+
self._chunks: list[bytes] = []
|
|
30
|
+
self._done: bool = False
|
|
31
|
+
self._closed: bool = False
|
|
32
|
+
|
|
33
|
+
def read(self, size: int | None = -1) -> bytes:
|
|
34
|
+
"""Return up to `size` bytes; raise `BlockingIOError` if no data is buffered.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
size: Maximum number of bytes to return. ``-1`` or ``None`` means
|
|
38
|
+
"the next chunk in full".
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Bytes consumed from the head of the buffer. ``b""`` once the
|
|
42
|
+
stream has been ended (`end_stream`) and fully drained.
|
|
43
|
+
|
|
44
|
+
Raises:
|
|
45
|
+
BlockingIOError: when the stream is still open but no data has
|
|
46
|
+
been written yet. The caller (`awscrt` in production) is
|
|
47
|
+
expected to retry later.
|
|
48
|
+
"""
|
|
49
|
+
if not self._chunks:
|
|
50
|
+
if self._done or self._closed:
|
|
51
|
+
return b""
|
|
52
|
+
msg = "no data buffered yet"
|
|
53
|
+
raise BlockingIOError(msg)
|
|
54
|
+
|
|
55
|
+
chunk = self._chunks.pop(0)
|
|
56
|
+
if size is None or size < 0 or size >= len(chunk):
|
|
57
|
+
return chunk
|
|
58
|
+
leftover = chunk[size:]
|
|
59
|
+
self._chunks.insert(0, leftover)
|
|
60
|
+
return chunk[:size]
|
|
61
|
+
|
|
62
|
+
def read1(self, size: int = -1) -> bytes:
|
|
63
|
+
"""Read at most `size` bytes; same semantics as `read`."""
|
|
64
|
+
return self.read(size)
|
|
65
|
+
|
|
66
|
+
def write(self, b: bytes) -> int: # pyright: ignore[reportIncompatibleMethodOverride]
|
|
67
|
+
"""Append `b` to the buffer; return the number of bytes accepted.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
b: Bytes to append. Non-bytes input raises `TypeError`.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
``len(b)``.
|
|
74
|
+
|
|
75
|
+
Raises:
|
|
76
|
+
TypeError: if `b` is not exactly `bytes`.
|
|
77
|
+
OSError: if the stream has already been ended or closed.
|
|
78
|
+
"""
|
|
79
|
+
if not isinstance(b, bytes): # pyright: ignore[reportUnnecessaryIsInstance]
|
|
80
|
+
msg = f"BufferableByteStream.write requires bytes, got {type(b).__name__}"
|
|
81
|
+
raise TypeError(msg)
|
|
82
|
+
if self._done or self._closed:
|
|
83
|
+
msg = "stream is closed"
|
|
84
|
+
raise OSError(msg)
|
|
85
|
+
if b:
|
|
86
|
+
self._chunks.append(b)
|
|
87
|
+
return len(b)
|
|
88
|
+
|
|
89
|
+
def end_stream(self) -> None:
|
|
90
|
+
"""Mark the stream as ended; future writes raise, future reads drain then return `b""`."""
|
|
91
|
+
self._done = True
|
|
92
|
+
|
|
93
|
+
def close(self) -> None:
|
|
94
|
+
"""Close the stream and discard any buffered data."""
|
|
95
|
+
self._chunks = []
|
|
96
|
+
self._done = True
|
|
97
|
+
self._closed = True
|
|
98
|
+
|
|
99
|
+
@property
|
|
100
|
+
def closed(self) -> bool:
|
|
101
|
+
"""True if `close` has been called."""
|
|
102
|
+
return self._closed
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""HTTP/2 connection pool for `amazon-polly-streaming` with multi-connection lease semantics.
|
|
2
|
+
|
|
3
|
+
Eliminates the TLS handshake, ALPN h2 negotiation, and HTTP/2 SETTINGS exchange
|
|
4
|
+
between subsequent calls to the same Polly endpoint, while supporting fan-out:
|
|
5
|
+
multiple concurrent leases on the same ``(host, port)`` get distinct underlying
|
|
6
|
+
``HttpClientConnection`` instances, up to ``max_size_per_key``.
|
|
7
|
+
|
|
8
|
+
The Polly bidirectional streaming endpoint advertises one active stream per
|
|
9
|
+
HTTP/2 connection, so a single shared connection cannot serve more than one
|
|
10
|
+
synthesis call at a time. The pool therefore keeps a per-key list of
|
|
11
|
+
connections: idle entries are reused by subsequent acquires (preserving the
|
|
12
|
+
TLS/H2 cache benefit), and concurrent leases each get their own connection.
|
|
13
|
+
When the per-key cap is reached, a further acquire waits on a Condition until
|
|
14
|
+
a release frees a slot.
|
|
15
|
+
|
|
16
|
+
The underlying AWS Common Runtime resources (event loop group, host resolver,
|
|
17
|
+
client bootstrap, TLS context) are shared across all entries and allocated
|
|
18
|
+
lazily on the first connect.
|
|
19
|
+
|
|
20
|
+
Pattern inspired by the AWS-maintained ``amazon-transcribe-streaming-sdk``
|
|
21
|
+
(``AwsCrtHttpSessionManager._connections`` in
|
|
22
|
+
``amazon_transcribe/httpsession.py``).
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
# pyright: reportUnknownMemberType=false, reportUnknownVariableType=false
|
|
26
|
+
# pyright: reportUnknownArgumentType=false, reportUnknownParameterType=false
|
|
27
|
+
# pyright: reportAttributeAccessIssue=false, reportArgumentType=false
|
|
28
|
+
# pyright: reportUnnecessaryComparison=false
|
|
29
|
+
# Rationale: awscrt ships no type stubs and its inline annotations are
|
|
30
|
+
# incomplete; suppressions are scoped to this module.
|
|
31
|
+
from __future__ import annotations
|
|
32
|
+
|
|
33
|
+
import asyncio
|
|
34
|
+
from contextlib import asynccontextmanager
|
|
35
|
+
from typing import TYPE_CHECKING
|
|
36
|
+
|
|
37
|
+
from awscrt import http, io
|
|
38
|
+
from awscrt.http import HttpClientConnection
|
|
39
|
+
|
|
40
|
+
if TYPE_CHECKING:
|
|
41
|
+
from collections.abc import AsyncGenerator
|
|
42
|
+
|
|
43
|
+
_DEFAULT_MAX_SIZE_PER_KEY = 8
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
async def _connect(
|
|
47
|
+
host: str,
|
|
48
|
+
port: int,
|
|
49
|
+
*,
|
|
50
|
+
bootstrap: io.ClientBootstrap,
|
|
51
|
+
tls_ctx: io.ClientTlsContext,
|
|
52
|
+
) -> HttpClientConnection:
|
|
53
|
+
"""Open an HTTP/2 connection to ``host:port`` using the provided awscrt resources.
|
|
54
|
+
|
|
55
|
+
The bootstrap (event loop group + DNS resolver) and TLS context are
|
|
56
|
+
intended to be shared across multiple connections. Only the
|
|
57
|
+
per-connection TLS connection options carrying SNI server name and ALPN
|
|
58
|
+
list are built here.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
host: Hostname to connect to.
|
|
62
|
+
port: TCP port (typically ``443``).
|
|
63
|
+
bootstrap: Pre-built ``ClientBootstrap`` used as I/O backbone.
|
|
64
|
+
tls_ctx: Pre-built ``ClientTlsContext`` from which per-connection
|
|
65
|
+
options are spawned.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
An open ``HttpClientConnection`` with HTTP/2 negotiated.
|
|
69
|
+
|
|
70
|
+
Raises:
|
|
71
|
+
RuntimeError: if the connection cannot be opened or HTTP/2 cannot be
|
|
72
|
+
negotiated.
|
|
73
|
+
"""
|
|
74
|
+
tls_conn_options = tls_ctx.new_connection_options()
|
|
75
|
+
tls_conn_options.set_server_name(host)
|
|
76
|
+
tls_conn_options.set_alpn_list(["h2"])
|
|
77
|
+
connect_future = HttpClientConnection.new(
|
|
78
|
+
host_name=host,
|
|
79
|
+
port=port,
|
|
80
|
+
bootstrap=bootstrap,
|
|
81
|
+
socket_options=io.SocketOptions(),
|
|
82
|
+
tls_connection_options=tls_conn_options,
|
|
83
|
+
)
|
|
84
|
+
connection = await asyncio.wrap_future(connect_future)
|
|
85
|
+
if not connection.is_open():
|
|
86
|
+
msg = f"Could not open connection to {host}:{port}"
|
|
87
|
+
raise RuntimeError(msg)
|
|
88
|
+
if connection.version is not http.HttpVersion.Http2:
|
|
89
|
+
connection.close()
|
|
90
|
+
msg = f"HTTP/2 could not be negotiated: got {connection.version!r}"
|
|
91
|
+
raise RuntimeError(msg)
|
|
92
|
+
return connection
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class _ConnectionPool:
|
|
96
|
+
"""Bounded pool of HTTP/2 connections keyed on ``(host, port)``, with leases.
|
|
97
|
+
|
|
98
|
+
The pool keeps up to ``max_size_per_key`` connections per key. Each acquire
|
|
99
|
+
grants exclusive use of one connection (a "lease") for the duration of an
|
|
100
|
+
``async with acquire_connection(...)`` block. Idle connections are reused
|
|
101
|
+
by subsequent acquires; concurrent acquires on the same key each get their
|
|
102
|
+
own connection (opening fresh ones up to the cap).
|
|
103
|
+
|
|
104
|
+
A single ``asyncio.Condition`` serializes pool state mutations and signals
|
|
105
|
+
waiters when a slot frees up. The lock is held only over in-memory state
|
|
106
|
+
updates; ``_connect`` runs outside the lock so different keys (and even
|
|
107
|
+
different acquires on the same key) connect in parallel.
|
|
108
|
+
|
|
109
|
+
The underlying awscrt resources (event loop group, host resolver,
|
|
110
|
+
client bootstrap, TLS context) are shared across every cached
|
|
111
|
+
connection and allocated lazily on the first connect.
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
def __init__(self, *, max_size_per_key: int = _DEFAULT_MAX_SIZE_PER_KEY) -> None:
|
|
115
|
+
self._max_size_per_key = max_size_per_key
|
|
116
|
+
self._idle: dict[tuple[str, int], list[HttpClientConnection]] = {}
|
|
117
|
+
self._in_use: dict[tuple[str, int], int] = {}
|
|
118
|
+
self._cond = asyncio.Condition()
|
|
119
|
+
self._closed = False
|
|
120
|
+
self._bootstrap: io.ClientBootstrap | None = None
|
|
121
|
+
self._tls_ctx: io.ClientTlsContext | None = None
|
|
122
|
+
# Hold refs to event loop group and resolver so they are not GC'd
|
|
123
|
+
# while the bootstrap is alive.
|
|
124
|
+
self._elg: io.EventLoopGroup | None = None
|
|
125
|
+
self._resolver: io.DefaultHostResolver | None = None
|
|
126
|
+
|
|
127
|
+
def _ensure_resources(self) -> tuple[io.ClientBootstrap, io.ClientTlsContext]:
|
|
128
|
+
"""Lazily build the shared awscrt resources on first use."""
|
|
129
|
+
if self._bootstrap is None or self._tls_ctx is None:
|
|
130
|
+
elg = io.EventLoopGroup(1)
|
|
131
|
+
resolver = io.DefaultHostResolver(elg)
|
|
132
|
+
self._elg = elg
|
|
133
|
+
self._resolver = resolver
|
|
134
|
+
self._bootstrap = io.ClientBootstrap(elg, resolver)
|
|
135
|
+
self._tls_ctx = io.ClientTlsContext(io.TlsContextOptions())
|
|
136
|
+
return self._bootstrap, self._tls_ctx
|
|
137
|
+
|
|
138
|
+
@asynccontextmanager
|
|
139
|
+
async def acquire_connection(
|
|
140
|
+
self, *, host: str, port: int
|
|
141
|
+
) -> AsyncGenerator[HttpClientConnection]:
|
|
142
|
+
"""Acquire an exclusive lease on an HTTP/2 connection for ``(host, port)``.
|
|
143
|
+
|
|
144
|
+
On entry, returns either a cached idle connection or a freshly opened
|
|
145
|
+
one (up to ``max_size_per_key`` total per key). If the cap is reached,
|
|
146
|
+
the call waits until a concurrent lease releases its connection. On
|
|
147
|
+
exit, the connection returns to the idle list (or is closed if it has
|
|
148
|
+
gone stale or the pool was closed in the meantime).
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
host: Hostname.
|
|
152
|
+
port: TCP port.
|
|
153
|
+
|
|
154
|
+
Yields:
|
|
155
|
+
An open ``HttpClientConnection`` ready to accept a new HTTP/2
|
|
156
|
+
stream via ``connection.request(...)``.
|
|
157
|
+
|
|
158
|
+
Raises:
|
|
159
|
+
RuntimeError: if no connection slot is available and a fresh
|
|
160
|
+
connect cannot be opened.
|
|
161
|
+
"""
|
|
162
|
+
key = (host, port)
|
|
163
|
+
connection = await self._acquire(key)
|
|
164
|
+
try:
|
|
165
|
+
yield connection
|
|
166
|
+
finally:
|
|
167
|
+
await self._release(key, connection)
|
|
168
|
+
|
|
169
|
+
async def _acquire(self, key: tuple[str, int]) -> HttpClientConnection:
|
|
170
|
+
"""Reserve a slot for ``key`` and return an open connection."""
|
|
171
|
+
async with self._cond:
|
|
172
|
+
while True:
|
|
173
|
+
idle_list = self._idle.get(key, [])
|
|
174
|
+
while idle_list:
|
|
175
|
+
candidate = idle_list.pop()
|
|
176
|
+
if candidate.is_open():
|
|
177
|
+
self._in_use[key] = self._in_use.get(key, 0) + 1
|
|
178
|
+
return candidate
|
|
179
|
+
candidate.close()
|
|
180
|
+
count = self._in_use.get(key, 0)
|
|
181
|
+
if count < self._max_size_per_key:
|
|
182
|
+
self._in_use[key] = count + 1
|
|
183
|
+
break
|
|
184
|
+
await self._cond.wait()
|
|
185
|
+
try:
|
|
186
|
+
bootstrap, tls_ctx = self._ensure_resources()
|
|
187
|
+
return await _connect(key[0], key[1], bootstrap=bootstrap, tls_ctx=tls_ctx)
|
|
188
|
+
except BaseException:
|
|
189
|
+
async with self._cond:
|
|
190
|
+
self._in_use[key] = max(0, self._in_use.get(key, 0) - 1)
|
|
191
|
+
self._cond.notify_all()
|
|
192
|
+
raise
|
|
193
|
+
|
|
194
|
+
async def _release(self, key: tuple[str, int], connection: HttpClientConnection) -> None:
|
|
195
|
+
"""Return a leased connection to idle, or close it on stale/closed-pool."""
|
|
196
|
+
async with self._cond:
|
|
197
|
+
self._in_use[key] = max(0, self._in_use.get(key, 0) - 1)
|
|
198
|
+
if self._closed or not connection.is_open():
|
|
199
|
+
connection.close()
|
|
200
|
+
else:
|
|
201
|
+
self._idle.setdefault(key, []).append(connection)
|
|
202
|
+
self._cond.notify_all()
|
|
203
|
+
|
|
204
|
+
async def close_all(self) -> None:
|
|
205
|
+
"""Close every idle connection and mark the pool as closed.
|
|
206
|
+
|
|
207
|
+
Connections currently leased survive until their lease releases; on
|
|
208
|
+
release they are closed (not returned to idle) because the pool is
|
|
209
|
+
flagged closed.
|
|
210
|
+
"""
|
|
211
|
+
async with self._cond:
|
|
212
|
+
self._closed = True
|
|
213
|
+
idle_snapshot = list(self._idle.items())
|
|
214
|
+
self._idle.clear()
|
|
215
|
+
self._cond.notify_all()
|
|
216
|
+
for _key, conns in idle_snapshot:
|
|
217
|
+
for connection in conns:
|
|
218
|
+
connection.close()
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
_default_pool: _ConnectionPool | None = None
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def get_default_pool() -> _ConnectionPool:
|
|
225
|
+
"""Return the module-level default ``_ConnectionPool``, creating it on first call."""
|
|
226
|
+
global _default_pool # noqa: PLW0603
|
|
227
|
+
if _default_pool is None:
|
|
228
|
+
_default_pool = _ConnectionPool()
|
|
229
|
+
return _default_pool
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""Rolling AWS event-stream chunk signer for HTTP/2 bidirectional streaming.
|
|
2
|
+
|
|
3
|
+
Each event sent on the request body channel must be wrapped in a signed
|
|
4
|
+
envelope. The wrapper carries two headers, `:date` and `:chunk-signature`,
|
|
5
|
+
where `:chunk-signature` is computed as an HMAC-SHA256 of an event-stream-
|
|
6
|
+
specific string-to-sign. The signature of the previous wrapped event (or, for
|
|
7
|
+
the first event, the SigV4 signature of the initial HTTP request) participates
|
|
8
|
+
in the string-to-sign, producing a rolling chain.
|
|
9
|
+
|
|
10
|
+
The signing string follows the format documented at
|
|
11
|
+
https://docs.aws.amazon.com/transcribe/latest/dg/streaming-setting-up.html
|
|
12
|
+
under "AWS4-HMAC-SHA256-PAYLOAD". The same algorithm is used by Amazon Polly
|
|
13
|
+
bidirectional streaming and by the open-source `amazon-transcribe-streaming-
|
|
14
|
+
sdk` Python package, both based on the AWS Common Runtime event-stream signing
|
|
15
|
+
contract.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import hashlib
|
|
21
|
+
import hmac
|
|
22
|
+
import struct
|
|
23
|
+
from dataclasses import dataclass
|
|
24
|
+
from typing import TYPE_CHECKING
|
|
25
|
+
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
import datetime
|
|
28
|
+
|
|
29
|
+
from amazon_polly_streaming._eventstream import HeaderValue
|
|
30
|
+
|
|
31
|
+
_TIMESTAMP_FMT = "%Y%m%dT%H%M%SZ"
|
|
32
|
+
_HEADER_TYPE_TIMESTAMP = 8
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(frozen=True)
|
|
36
|
+
class EventSignerCredentials:
|
|
37
|
+
"""Subset of AWS credentials needed to derive the signing key."""
|
|
38
|
+
|
|
39
|
+
access_key_id: str
|
|
40
|
+
secret_access_key: str
|
|
41
|
+
session_token: str | None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class EventSigner:
|
|
45
|
+
"""Compute rolling `:chunk-signature` headers for event-stream wrappers."""
|
|
46
|
+
|
|
47
|
+
def __init__(self, *, signing_name: str, region: str) -> None:
|
|
48
|
+
"""Bind the signer to a service signing name and a region."""
|
|
49
|
+
self._signing_name = signing_name
|
|
50
|
+
self._region = region
|
|
51
|
+
|
|
52
|
+
def sign(
|
|
53
|
+
self,
|
|
54
|
+
*,
|
|
55
|
+
payload: bytes,
|
|
56
|
+
prior_signature: bytes,
|
|
57
|
+
credentials: EventSignerCredentials,
|
|
58
|
+
now: datetime.datetime,
|
|
59
|
+
) -> dict[str, HeaderValue]:
|
|
60
|
+
"""Return wrapper headers (`:date`, `:chunk-signature`) for `payload`.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
payload: Inner event-stream message bytes (already encoded with its
|
|
64
|
+
own headers and payload).
|
|
65
|
+
prior_signature: 32-byte raw signature of the previous wrapped
|
|
66
|
+
event, or the SigV4 signature of the initial HTTP request for
|
|
67
|
+
the first event.
|
|
68
|
+
credentials: AWS credentials used to derive the signing key.
|
|
69
|
+
now: Timezone-aware UTC datetime used both as the `:date` header
|
|
70
|
+
value and as the timestamp embedded in the string-to-sign.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
A dict with two entries: `:date` (the input `now`) and
|
|
74
|
+
`:chunk-signature` (32 raw HMAC-SHA256 bytes).
|
|
75
|
+
|
|
76
|
+
Raises:
|
|
77
|
+
ValueError: if `now` is naive (no `tzinfo`).
|
|
78
|
+
"""
|
|
79
|
+
if now.tzinfo is None:
|
|
80
|
+
msg = "EventSigner.sign requires a timezone-aware `now`"
|
|
81
|
+
raise ValueError(msg)
|
|
82
|
+
|
|
83
|
+
timestamp = now.strftime(_TIMESTAMP_FMT)
|
|
84
|
+
date_header_bytes = _encode_date_header_value(now)
|
|
85
|
+
string_to_sign = "\n".join(
|
|
86
|
+
[
|
|
87
|
+
"AWS4-HMAC-SHA256-PAYLOAD",
|
|
88
|
+
timestamp,
|
|
89
|
+
self._scope(timestamp),
|
|
90
|
+
prior_signature.hex(),
|
|
91
|
+
hashlib.sha256(date_header_bytes).hexdigest(),
|
|
92
|
+
hashlib.sha256(payload).hexdigest(),
|
|
93
|
+
]
|
|
94
|
+
)
|
|
95
|
+
signing_key = self._derive_signing_key(
|
|
96
|
+
secret_access_key=credentials.secret_access_key,
|
|
97
|
+
timestamp=timestamp,
|
|
98
|
+
)
|
|
99
|
+
chunk_signature = hmac.new(
|
|
100
|
+
signing_key, string_to_sign.encode("utf-8"), hashlib.sha256
|
|
101
|
+
).digest()
|
|
102
|
+
return {":date": now, ":chunk-signature": chunk_signature}
|
|
103
|
+
|
|
104
|
+
def _scope(self, timestamp: str) -> str:
|
|
105
|
+
return f"{timestamp[:8]}/{self._region}/{self._signing_name}/aws4_request"
|
|
106
|
+
|
|
107
|
+
def _derive_signing_key(self, *, secret_access_key: str, timestamp: str) -> bytes:
|
|
108
|
+
date = timestamp[:8].encode("utf-8")
|
|
109
|
+
k_date = _hmac(b"AWS4" + secret_access_key.encode("utf-8"), date)
|
|
110
|
+
k_region = _hmac(k_date, self._region.encode("utf-8"))
|
|
111
|
+
k_service = _hmac(k_region, self._signing_name.encode("utf-8"))
|
|
112
|
+
return _hmac(k_service, b"aws4_request")
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _hmac(key: bytes, msg: bytes) -> bytes:
|
|
116
|
+
return hmac.new(key, msg, hashlib.sha256).digest()
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _encode_date_header_value(when: datetime.datetime) -> bytes:
|
|
120
|
+
"""Encode `:date` as it appears in the wrapper headers section, for hashing.
|
|
121
|
+
|
|
122
|
+
The string-to-sign uses the SHA256 of the encoded `:date` header bytes
|
|
123
|
+
(name-prefixed, type-tagged, big-endian int64 milliseconds since UTC
|
|
124
|
+
epoch). This mirrors the wire format produced by the event-stream encoder
|
|
125
|
+
when serializing the wrapper message.
|
|
126
|
+
"""
|
|
127
|
+
name = b":date"
|
|
128
|
+
epoch_ms = int(when.timestamp() * 1000)
|
|
129
|
+
return bytes([len(name)]) + name + bytes([_HEADER_TYPE_TIMESTAMP]) + struct.pack(">q", epoch_ms)
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""AWS event-stream binary format parser and encoder.
|
|
2
|
+
|
|
3
|
+
This module provides three interfaces:
|
|
4
|
+
|
|
5
|
+
- `EventStreamParser`: stateful, sync-friendly. Accumulates bytes via `feed(...)`
|
|
6
|
+
and returns the list of complete messages decoded so far.
|
|
7
|
+
- `parse_stream(...)`: async iterator wrapper. Consumes an async iterator of
|
|
8
|
+
byte chunks and yields each complete message.
|
|
9
|
+
- `encode_message(...)` / `encode_messages(...)`: build the binary on-the-wire
|
|
10
|
+
representation of one or more event-stream messages.
|
|
11
|
+
|
|
12
|
+
Parsing reuses `botocore.eventstream.EventStreamBuffer`. Encoding is
|
|
13
|
+
implemented directly with `struct` and `binascii.crc32` because `awscrt`
|
|
14
|
+
exposes the encoder only via its connection-oriented RPC client, which is
|
|
15
|
+
heavier than what we need here.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import binascii
|
|
21
|
+
import datetime
|
|
22
|
+
import struct
|
|
23
|
+
from typing import TYPE_CHECKING
|
|
24
|
+
|
|
25
|
+
from botocore.eventstream import EventStreamBuffer
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from collections.abc import AsyncIterator, Mapping, Sequence
|
|
29
|
+
|
|
30
|
+
from botocore.eventstream import EventStreamMessage
|
|
31
|
+
|
|
32
|
+
# Header value type codes per the AWS event-stream binary spec.
|
|
33
|
+
_HEADER_TYPE_BYTE_ARRAY = 6
|
|
34
|
+
_HEADER_TYPE_STRING = 7
|
|
35
|
+
_HEADER_TYPE_TIMESTAMP = 8
|
|
36
|
+
# Each prelude is 12 bytes: total_length (u32) + headers_length (u32) + crc (u32).
|
|
37
|
+
_PRELUDE_LENGTH = 12
|
|
38
|
+
# The trailing message CRC is 4 bytes.
|
|
39
|
+
_MESSAGE_CRC_LENGTH = 4
|
|
40
|
+
|
|
41
|
+
HeaderValue = str | bytes | datetime.datetime
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class EventStreamParser:
|
|
45
|
+
"""Sync stateful parser for AWS event-stream framing."""
|
|
46
|
+
|
|
47
|
+
def __init__(self) -> None:
|
|
48
|
+
"""Initialize with an empty buffer."""
|
|
49
|
+
self._buffer = EventStreamBuffer()
|
|
50
|
+
|
|
51
|
+
def feed(self, data: bytes) -> list[EventStreamMessage]:
|
|
52
|
+
"""Add bytes to the buffer; return any complete messages now decoded.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
data: Raw bytes from the network. May be a partial message, a full
|
|
56
|
+
message, or multiple messages concatenated.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
List of complete `EventStreamMessage` objects decoded after this
|
|
60
|
+
chunk. Empty if the buffer does not yet contain a full message.
|
|
61
|
+
"""
|
|
62
|
+
self._buffer.add_data(data)
|
|
63
|
+
return list(self._buffer)
|
|
64
|
+
|
|
65
|
+
def reset(self) -> None:
|
|
66
|
+
"""Reset the internal buffer to an empty state."""
|
|
67
|
+
self._buffer = EventStreamBuffer()
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
async def parse_stream(
|
|
71
|
+
chunks: AsyncIterator[bytes],
|
|
72
|
+
) -> AsyncIterator[EventStreamMessage]:
|
|
73
|
+
"""Consume an async iterator of bytes and yield each complete message.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
chunks: Async iterator producing byte chunks from a streaming HTTP body.
|
|
77
|
+
|
|
78
|
+
Yields:
|
|
79
|
+
Each complete `EventStreamMessage` as soon as enough bytes have arrived
|
|
80
|
+
to decode it.
|
|
81
|
+
"""
|
|
82
|
+
parser = EventStreamParser()
|
|
83
|
+
async for chunk in chunks:
|
|
84
|
+
for msg in parser.feed(chunk):
|
|
85
|
+
yield msg
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _encode_header(name: str, value: HeaderValue) -> bytes:
|
|
89
|
+
"""Encode a single header in event-stream wire format.
|
|
90
|
+
|
|
91
|
+
Dispatches on the value Python type:
|
|
92
|
+
- `str` -> type 7 (UTF-8 string), u16 length prefix.
|
|
93
|
+
- `bytes` -> type 6 (byte array), u16 length prefix.
|
|
94
|
+
- `datetime.datetime` -> type 8 (timestamp), i64 ms since UTC epoch.
|
|
95
|
+
|
|
96
|
+
Layout: name_len (u8) | name | value_type (u8) | value-encoded-bytes.
|
|
97
|
+
"""
|
|
98
|
+
name_bytes = name.encode("utf-8")
|
|
99
|
+
name_prefix = bytes([len(name_bytes)]) + name_bytes
|
|
100
|
+
if isinstance(value, str):
|
|
101
|
+
value_bytes = value.encode("utf-8")
|
|
102
|
+
return (
|
|
103
|
+
name_prefix
|
|
104
|
+
+ bytes([_HEADER_TYPE_STRING])
|
|
105
|
+
+ struct.pack(">H", len(value_bytes))
|
|
106
|
+
+ value_bytes
|
|
107
|
+
)
|
|
108
|
+
if isinstance(value, datetime.datetime):
|
|
109
|
+
if value.tzinfo is None:
|
|
110
|
+
msg = f"Header {name!r} datetime value must be timezone-aware"
|
|
111
|
+
raise ValueError(msg)
|
|
112
|
+
epoch_ms = int(value.timestamp() * 1000)
|
|
113
|
+
return name_prefix + bytes([_HEADER_TYPE_TIMESTAMP]) + struct.pack(">q", epoch_ms)
|
|
114
|
+
return name_prefix + bytes([_HEADER_TYPE_BYTE_ARRAY]) + struct.pack(">H", len(value)) + value
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def encode_message(headers: Mapping[str, HeaderValue], payload: bytes) -> bytes:
|
|
118
|
+
"""Encode one event-stream message.
|
|
119
|
+
|
|
120
|
+
Headers are emitted with a wire type matching their Python type (see
|
|
121
|
+
`_encode_header`): string (type 7), byte_array (type 6), timestamp
|
|
122
|
+
(type 8). The Polly inbound protocol uses string headers for inner events
|
|
123
|
+
(`:message-type`, `:event-type`, `:content-type`) and bytes/timestamp for
|
|
124
|
+
the rolling-signature wrapper (`:date`, `:chunk-signature`).
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
headers: Mapping of header name to typed value.
|
|
128
|
+
payload: Raw payload bytes (already JSON-encoded for ``TextEvent`` or
|
|
129
|
+
empty for ``CloseStreamEvent``).
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
Binary message: prelude (with prelude CRC) + headers + payload +
|
|
133
|
+
trailing message CRC.
|
|
134
|
+
"""
|
|
135
|
+
headers_bytes = b"".join(_encode_header(name, value) for name, value in headers.items())
|
|
136
|
+
headers_length = len(headers_bytes)
|
|
137
|
+
total_length = _PRELUDE_LENGTH + headers_length + len(payload) + _MESSAGE_CRC_LENGTH
|
|
138
|
+
|
|
139
|
+
prelude = struct.pack(">II", total_length, headers_length)
|
|
140
|
+
prelude_with_crc = prelude + struct.pack(">I", binascii.crc32(prelude))
|
|
141
|
+
|
|
142
|
+
body = prelude_with_crc + headers_bytes + payload
|
|
143
|
+
return body + struct.pack(">I", binascii.crc32(body))
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def encode_messages(messages: Sequence[tuple[Mapping[str, HeaderValue], bytes]]) -> bytes:
|
|
147
|
+
"""Encode a sequence of (headers, payload) tuples into one byte string.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
messages: Ordered list of messages to encode.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Concatenated binary representation of all messages.
|
|
154
|
+
"""
|
|
155
|
+
return b"".join(encode_message(headers, payload) for headers, payload in messages)
|