amazon-polly-streaming 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ """Amazon Polly bidirectional streaming over HTTP/2 with SigV4."""
2
+
3
+ from amazon_polly_streaming.client import PollyStreamingClient
4
+ from amazon_polly_streaming.exceptions import (
5
+ ServiceException,
6
+ ServiceFailureException,
7
+ ServiceQuotaExceededException,
8
+ ThrottlingException,
9
+ ValidationException,
10
+ )
11
+
12
+ __all__ = [
13
+ "PollyStreamingClient",
14
+ "ServiceException",
15
+ "ServiceFailureException",
16
+ "ServiceQuotaExceededException",
17
+ "ThrottlingException",
18
+ "ValidationException",
19
+ ]
20
+ __version__ = "1.0.0"
@@ -0,0 +1,102 @@
1
+ """Writable, non-blocking byte stream used as HTTP/2 request body channel.
2
+
3
+ `awscrt`'s HTTP/2 client reads the request body from a file-like object on its
4
+ own I/O thread, calling `read(size)` repeatedly until the stream signals EOF.
5
+ For bidirectional streaming we want to write events to the body channel
6
+ incrementally from the application's asyncio thread, after the request has
7
+ already been opened.
8
+
9
+ `BufferableByteStream` exposes that pattern by behaving as a non-blocking
10
+ file-like object: `read` raises `BlockingIOError` when no data is currently
11
+ available, signalling `awscrt` to retry later; once `end_stream` is called and
12
+ all buffered chunks have been read, `read` returns `b""` (EOF) and the HTTP/2
13
+ stream is closed cleanly.
14
+
15
+ Pattern adapted from the `amazon-transcribe-streaming-sdk` Python package
16
+ (Apache 2.0), which uses the same approach with `awscrt`.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from io import BufferedIOBase
22
+
23
+
24
+ class BufferableByteStream(BufferedIOBase):
25
+ """Non-blocking, append-only byte buffer for HTTP/2 request body channels."""
26
+
27
+ def __init__(self) -> None:
28
+ """Initialize an empty, open stream."""
29
+ self._chunks: list[bytes] = []
30
+ self._done: bool = False
31
+ self._closed: bool = False
32
+
33
+ def read(self, size: int | None = -1) -> bytes:
34
+ """Return up to `size` bytes; raise `BlockingIOError` if no data is buffered.
35
+
36
+ Args:
37
+ size: Maximum number of bytes to return. ``-1`` or ``None`` means
38
+ "the next chunk in full".
39
+
40
+ Returns:
41
+ Bytes consumed from the head of the buffer. ``b""`` once the
42
+ stream has been ended (`end_stream`) and fully drained.
43
+
44
+ Raises:
45
+ BlockingIOError: when the stream is still open but no data has
46
+ been written yet. The caller (`awscrt` in production) is
47
+ expected to retry later.
48
+ """
49
+ if not self._chunks:
50
+ if self._done or self._closed:
51
+ return b""
52
+ msg = "no data buffered yet"
53
+ raise BlockingIOError(msg)
54
+
55
+ chunk = self._chunks.pop(0)
56
+ if size is None or size < 0 or size >= len(chunk):
57
+ return chunk
58
+ leftover = chunk[size:]
59
+ self._chunks.insert(0, leftover)
60
+ return chunk[:size]
61
+
62
+ def read1(self, size: int = -1) -> bytes:
63
+ """Read at most `size` bytes; same semantics as `read`."""
64
+ return self.read(size)
65
+
66
+ def write(self, b: bytes) -> int: # pyright: ignore[reportIncompatibleMethodOverride]
67
+ """Append `b` to the buffer; return the number of bytes accepted.
68
+
69
+ Args:
70
+ b: Bytes to append. Non-bytes input raises `TypeError`.
71
+
72
+ Returns:
73
+ ``len(b)``.
74
+
75
+ Raises:
76
+ TypeError: if `b` is not exactly `bytes`.
77
+ OSError: if the stream has already been ended or closed.
78
+ """
79
+ if not isinstance(b, bytes): # pyright: ignore[reportUnnecessaryIsInstance]
80
+ msg = f"BufferableByteStream.write requires bytes, got {type(b).__name__}"
81
+ raise TypeError(msg)
82
+ if self._done or self._closed:
83
+ msg = "stream is closed"
84
+ raise OSError(msg)
85
+ if b:
86
+ self._chunks.append(b)
87
+ return len(b)
88
+
89
+ def end_stream(self) -> None:
90
+ """Mark the stream as ended; future writes raise, future reads drain then return `b""`."""
91
+ self._done = True
92
+
93
+ def close(self) -> None:
94
+ """Close the stream and discard any buffered data."""
95
+ self._chunks = []
96
+ self._done = True
97
+ self._closed = True
98
+
99
+ @property
100
+ def closed(self) -> bool:
101
+ """True if `close` has been called."""
102
+ return self._closed
@@ -0,0 +1,229 @@
1
+ """HTTP/2 connection pool for `amazon-polly-streaming` with multi-connection lease semantics.
2
+
3
+ Eliminates the TLS handshake, ALPN h2 negotiation, and HTTP/2 SETTINGS exchange
4
+ between subsequent calls to the same Polly endpoint, while supporting fan-out:
5
+ multiple concurrent leases on the same ``(host, port)`` get distinct underlying
6
+ ``HttpClientConnection`` instances, up to ``max_size_per_key``.
7
+
8
+ The Polly bidirectional streaming endpoint advertises one active stream per
9
+ HTTP/2 connection, so a single shared connection cannot serve more than one
10
+ synthesis call at a time. The pool therefore keeps a per-key list of
11
+ connections: idle entries are reused by subsequent acquires (preserving the
12
+ TLS/H2 cache benefit), and concurrent leases each get their own connection.
13
+ When the per-key cap is reached, a further acquire waits on a Condition until
14
+ a release frees a slot.
15
+
16
+ The underlying AWS Common Runtime resources (event loop group, host resolver,
17
+ client bootstrap, TLS context) are shared across all entries and allocated
18
+ lazily on the first connect.
19
+
20
+ Pattern inspired by the AWS-maintained ``amazon-transcribe-streaming-sdk``
21
+ (``AwsCrtHttpSessionManager._connections`` in
22
+ ``amazon_transcribe/httpsession.py``).
23
+ """
24
+
25
+ # pyright: reportUnknownMemberType=false, reportUnknownVariableType=false
26
+ # pyright: reportUnknownArgumentType=false, reportUnknownParameterType=false
27
+ # pyright: reportAttributeAccessIssue=false, reportArgumentType=false
28
+ # pyright: reportUnnecessaryComparison=false
29
+ # Rationale: awscrt ships no type stubs and its inline annotations are
30
+ # incomplete; suppressions are scoped to this module.
31
+ from __future__ import annotations
32
+
33
+ import asyncio
34
+ from contextlib import asynccontextmanager
35
+ from typing import TYPE_CHECKING
36
+
37
+ from awscrt import http, io
38
+ from awscrt.http import HttpClientConnection
39
+
40
+ if TYPE_CHECKING:
41
+ from collections.abc import AsyncGenerator
42
+
43
+ _DEFAULT_MAX_SIZE_PER_KEY = 8
44
+
45
+
46
+ async def _connect(
47
+ host: str,
48
+ port: int,
49
+ *,
50
+ bootstrap: io.ClientBootstrap,
51
+ tls_ctx: io.ClientTlsContext,
52
+ ) -> HttpClientConnection:
53
+ """Open an HTTP/2 connection to ``host:port`` using the provided awscrt resources.
54
+
55
+ The bootstrap (event loop group + DNS resolver) and TLS context are
56
+ intended to be shared across multiple connections. Only the
57
+ per-connection TLS connection options carrying SNI server name and ALPN
58
+ list are built here.
59
+
60
+ Args:
61
+ host: Hostname to connect to.
62
+ port: TCP port (typically ``443``).
63
+ bootstrap: Pre-built ``ClientBootstrap`` used as I/O backbone.
64
+ tls_ctx: Pre-built ``ClientTlsContext`` from which per-connection
65
+ options are spawned.
66
+
67
+ Returns:
68
+ An open ``HttpClientConnection`` with HTTP/2 negotiated.
69
+
70
+ Raises:
71
+ RuntimeError: if the connection cannot be opened or HTTP/2 cannot be
72
+ negotiated.
73
+ """
74
+ tls_conn_options = tls_ctx.new_connection_options()
75
+ tls_conn_options.set_server_name(host)
76
+ tls_conn_options.set_alpn_list(["h2"])
77
+ connect_future = HttpClientConnection.new(
78
+ host_name=host,
79
+ port=port,
80
+ bootstrap=bootstrap,
81
+ socket_options=io.SocketOptions(),
82
+ tls_connection_options=tls_conn_options,
83
+ )
84
+ connection = await asyncio.wrap_future(connect_future)
85
+ if not connection.is_open():
86
+ msg = f"Could not open connection to {host}:{port}"
87
+ raise RuntimeError(msg)
88
+ if connection.version is not http.HttpVersion.Http2:
89
+ connection.close()
90
+ msg = f"HTTP/2 could not be negotiated: got {connection.version!r}"
91
+ raise RuntimeError(msg)
92
+ return connection
93
+
94
+
95
+ class _ConnectionPool:
96
+ """Bounded pool of HTTP/2 connections keyed on ``(host, port)``, with leases.
97
+
98
+ The pool keeps up to ``max_size_per_key`` connections per key. Each acquire
99
+ grants exclusive use of one connection (a "lease") for the duration of an
100
+ ``async with acquire_connection(...)`` block. Idle connections are reused
101
+ by subsequent acquires; concurrent acquires on the same key each get their
102
+ own connection (opening fresh ones up to the cap).
103
+
104
+ A single ``asyncio.Condition`` serializes pool state mutations and signals
105
+ waiters when a slot frees up. The lock is held only over in-memory state
106
+ updates; ``_connect`` runs outside the lock so different keys (and even
107
+ different acquires on the same key) connect in parallel.
108
+
109
+ The underlying awscrt resources (event loop group, host resolver,
110
+ client bootstrap, TLS context) are shared across every cached
111
+ connection and allocated lazily on the first connect.
112
+ """
113
+
114
+ def __init__(self, *, max_size_per_key: int = _DEFAULT_MAX_SIZE_PER_KEY) -> None:
115
+ self._max_size_per_key = max_size_per_key
116
+ self._idle: dict[tuple[str, int], list[HttpClientConnection]] = {}
117
+ self._in_use: dict[tuple[str, int], int] = {}
118
+ self._cond = asyncio.Condition()
119
+ self._closed = False
120
+ self._bootstrap: io.ClientBootstrap | None = None
121
+ self._tls_ctx: io.ClientTlsContext | None = None
122
+ # Hold refs to event loop group and resolver so they are not GC'd
123
+ # while the bootstrap is alive.
124
+ self._elg: io.EventLoopGroup | None = None
125
+ self._resolver: io.DefaultHostResolver | None = None
126
+
127
+ def _ensure_resources(self) -> tuple[io.ClientBootstrap, io.ClientTlsContext]:
128
+ """Lazily build the shared awscrt resources on first use."""
129
+ if self._bootstrap is None or self._tls_ctx is None:
130
+ elg = io.EventLoopGroup(1)
131
+ resolver = io.DefaultHostResolver(elg)
132
+ self._elg = elg
133
+ self._resolver = resolver
134
+ self._bootstrap = io.ClientBootstrap(elg, resolver)
135
+ self._tls_ctx = io.ClientTlsContext(io.TlsContextOptions())
136
+ return self._bootstrap, self._tls_ctx
137
+
138
+ @asynccontextmanager
139
+ async def acquire_connection(
140
+ self, *, host: str, port: int
141
+ ) -> AsyncGenerator[HttpClientConnection]:
142
+ """Acquire an exclusive lease on an HTTP/2 connection for ``(host, port)``.
143
+
144
+ On entry, returns either a cached idle connection or a freshly opened
145
+ one (up to ``max_size_per_key`` total per key). If the cap is reached,
146
+ the call waits until a concurrent lease releases its connection. On
147
+ exit, the connection returns to the idle list (or is closed if it has
148
+ gone stale or the pool was closed in the meantime).
149
+
150
+ Args:
151
+ host: Hostname.
152
+ port: TCP port.
153
+
154
+ Yields:
155
+ An open ``HttpClientConnection`` ready to accept a new HTTP/2
156
+ stream via ``connection.request(...)``.
157
+
158
+ Raises:
159
+ RuntimeError: if no connection slot is available and a fresh
160
+ connect cannot be opened.
161
+ """
162
+ key = (host, port)
163
+ connection = await self._acquire(key)
164
+ try:
165
+ yield connection
166
+ finally:
167
+ await self._release(key, connection)
168
+
169
+ async def _acquire(self, key: tuple[str, int]) -> HttpClientConnection:
170
+ """Reserve a slot for ``key`` and return an open connection."""
171
+ async with self._cond:
172
+ while True:
173
+ idle_list = self._idle.get(key, [])
174
+ while idle_list:
175
+ candidate = idle_list.pop()
176
+ if candidate.is_open():
177
+ self._in_use[key] = self._in_use.get(key, 0) + 1
178
+ return candidate
179
+ candidate.close()
180
+ count = self._in_use.get(key, 0)
181
+ if count < self._max_size_per_key:
182
+ self._in_use[key] = count + 1
183
+ break
184
+ await self._cond.wait()
185
+ try:
186
+ bootstrap, tls_ctx = self._ensure_resources()
187
+ return await _connect(key[0], key[1], bootstrap=bootstrap, tls_ctx=tls_ctx)
188
+ except BaseException:
189
+ async with self._cond:
190
+ self._in_use[key] = max(0, self._in_use.get(key, 0) - 1)
191
+ self._cond.notify_all()
192
+ raise
193
+
194
+ async def _release(self, key: tuple[str, int], connection: HttpClientConnection) -> None:
195
+ """Return a leased connection to idle, or close it on stale/closed-pool."""
196
+ async with self._cond:
197
+ self._in_use[key] = max(0, self._in_use.get(key, 0) - 1)
198
+ if self._closed or not connection.is_open():
199
+ connection.close()
200
+ else:
201
+ self._idle.setdefault(key, []).append(connection)
202
+ self._cond.notify_all()
203
+
204
+ async def close_all(self) -> None:
205
+ """Close every idle connection and mark the pool as closed.
206
+
207
+ Connections currently leased survive until their lease releases; on
208
+ release they are closed (not returned to idle) because the pool is
209
+ flagged closed.
210
+ """
211
+ async with self._cond:
212
+ self._closed = True
213
+ idle_snapshot = list(self._idle.items())
214
+ self._idle.clear()
215
+ self._cond.notify_all()
216
+ for _key, conns in idle_snapshot:
217
+ for connection in conns:
218
+ connection.close()
219
+
220
+
221
+ _default_pool: _ConnectionPool | None = None
222
+
223
+
224
+ def get_default_pool() -> _ConnectionPool:
225
+ """Return the module-level default ``_ConnectionPool``, creating it on first call."""
226
+ global _default_pool # noqa: PLW0603
227
+ if _default_pool is None:
228
+ _default_pool = _ConnectionPool()
229
+ return _default_pool
@@ -0,0 +1,129 @@
1
+ """Rolling AWS event-stream chunk signer for HTTP/2 bidirectional streaming.
2
+
3
+ Each event sent on the request body channel must be wrapped in a signed
4
+ envelope. The wrapper carries two headers, `:date` and `:chunk-signature`,
5
+ where `:chunk-signature` is computed as an HMAC-SHA256 of an event-stream-
6
+ specific string-to-sign. The signature of the previous wrapped event (or, for
7
+ the first event, the SigV4 signature of the initial HTTP request) participates
8
+ in the string-to-sign, producing a rolling chain.
9
+
10
+ The signing string follows the format documented at
11
+ https://docs.aws.amazon.com/transcribe/latest/dg/streaming-setting-up.html
12
+ under "AWS4-HMAC-SHA256-PAYLOAD". The same algorithm is used by Amazon Polly
13
+ bidirectional streaming and by the open-source `amazon-transcribe-streaming-
14
+ sdk` Python package, both based on the AWS Common Runtime event-stream signing
15
+ contract.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import hashlib
21
+ import hmac
22
+ import struct
23
+ from dataclasses import dataclass
24
+ from typing import TYPE_CHECKING
25
+
26
+ if TYPE_CHECKING:
27
+ import datetime
28
+
29
+ from amazon_polly_streaming._eventstream import HeaderValue
30
+
31
+ _TIMESTAMP_FMT = "%Y%m%dT%H%M%SZ"
32
+ _HEADER_TYPE_TIMESTAMP = 8
33
+
34
+
35
+ @dataclass(frozen=True)
36
+ class EventSignerCredentials:
37
+ """Subset of AWS credentials needed to derive the signing key."""
38
+
39
+ access_key_id: str
40
+ secret_access_key: str
41
+ session_token: str | None
42
+
43
+
44
+ class EventSigner:
45
+ """Compute rolling `:chunk-signature` headers for event-stream wrappers."""
46
+
47
+ def __init__(self, *, signing_name: str, region: str) -> None:
48
+ """Bind the signer to a service signing name and a region."""
49
+ self._signing_name = signing_name
50
+ self._region = region
51
+
52
+ def sign(
53
+ self,
54
+ *,
55
+ payload: bytes,
56
+ prior_signature: bytes,
57
+ credentials: EventSignerCredentials,
58
+ now: datetime.datetime,
59
+ ) -> dict[str, HeaderValue]:
60
+ """Return wrapper headers (`:date`, `:chunk-signature`) for `payload`.
61
+
62
+ Args:
63
+ payload: Inner event-stream message bytes (already encoded with its
64
+ own headers and payload).
65
+ prior_signature: 32-byte raw signature of the previous wrapped
66
+ event, or the SigV4 signature of the initial HTTP request for
67
+ the first event.
68
+ credentials: AWS credentials used to derive the signing key.
69
+ now: Timezone-aware UTC datetime used both as the `:date` header
70
+ value and as the timestamp embedded in the string-to-sign.
71
+
72
+ Returns:
73
+ A dict with two entries: `:date` (the input `now`) and
74
+ `:chunk-signature` (32 raw HMAC-SHA256 bytes).
75
+
76
+ Raises:
77
+ ValueError: if `now` is naive (no `tzinfo`).
78
+ """
79
+ if now.tzinfo is None:
80
+ msg = "EventSigner.sign requires a timezone-aware `now`"
81
+ raise ValueError(msg)
82
+
83
+ timestamp = now.strftime(_TIMESTAMP_FMT)
84
+ date_header_bytes = _encode_date_header_value(now)
85
+ string_to_sign = "\n".join(
86
+ [
87
+ "AWS4-HMAC-SHA256-PAYLOAD",
88
+ timestamp,
89
+ self._scope(timestamp),
90
+ prior_signature.hex(),
91
+ hashlib.sha256(date_header_bytes).hexdigest(),
92
+ hashlib.sha256(payload).hexdigest(),
93
+ ]
94
+ )
95
+ signing_key = self._derive_signing_key(
96
+ secret_access_key=credentials.secret_access_key,
97
+ timestamp=timestamp,
98
+ )
99
+ chunk_signature = hmac.new(
100
+ signing_key, string_to_sign.encode("utf-8"), hashlib.sha256
101
+ ).digest()
102
+ return {":date": now, ":chunk-signature": chunk_signature}
103
+
104
+ def _scope(self, timestamp: str) -> str:
105
+ return f"{timestamp[:8]}/{self._region}/{self._signing_name}/aws4_request"
106
+
107
+ def _derive_signing_key(self, *, secret_access_key: str, timestamp: str) -> bytes:
108
+ date = timestamp[:8].encode("utf-8")
109
+ k_date = _hmac(b"AWS4" + secret_access_key.encode("utf-8"), date)
110
+ k_region = _hmac(k_date, self._region.encode("utf-8"))
111
+ k_service = _hmac(k_region, self._signing_name.encode("utf-8"))
112
+ return _hmac(k_service, b"aws4_request")
113
+
114
+
115
+ def _hmac(key: bytes, msg: bytes) -> bytes:
116
+ return hmac.new(key, msg, hashlib.sha256).digest()
117
+
118
+
119
+ def _encode_date_header_value(when: datetime.datetime) -> bytes:
120
+ """Encode `:date` as it appears in the wrapper headers section, for hashing.
121
+
122
+ The string-to-sign uses the SHA256 of the encoded `:date` header bytes
123
+ (name-prefixed, type-tagged, big-endian int64 milliseconds since UTC
124
+ epoch). This mirrors the wire format produced by the event-stream encoder
125
+ when serializing the wrapper message.
126
+ """
127
+ name = b":date"
128
+ epoch_ms = int(when.timestamp() * 1000)
129
+ return bytes([len(name)]) + name + bytes([_HEADER_TYPE_TIMESTAMP]) + struct.pack(">q", epoch_ms)
@@ -0,0 +1,155 @@
1
+ """AWS event-stream binary format parser and encoder.
2
+
3
+ This module provides three interfaces:
4
+
5
+ - `EventStreamParser`: stateful, sync-friendly. Accumulates bytes via `feed(...)`
6
+ and returns the list of complete messages decoded so far.
7
+ - `parse_stream(...)`: async iterator wrapper. Consumes an async iterator of
8
+ byte chunks and yields each complete message.
9
+ - `encode_message(...)` / `encode_messages(...)`: build the binary on-the-wire
10
+ representation of one or more event-stream messages.
11
+
12
+ Parsing reuses `botocore.eventstream.EventStreamBuffer`. Encoding is
13
+ implemented directly with `struct` and `binascii.crc32` because `awscrt`
14
+ exposes the encoder only via its connection-oriented RPC client, which is
15
+ heavier than what we need here.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import binascii
21
+ import datetime
22
+ import struct
23
+ from typing import TYPE_CHECKING
24
+
25
+ from botocore.eventstream import EventStreamBuffer
26
+
27
+ if TYPE_CHECKING:
28
+ from collections.abc import AsyncIterator, Mapping, Sequence
29
+
30
+ from botocore.eventstream import EventStreamMessage
31
+
32
+ # Header value type codes per the AWS event-stream binary spec.
33
+ _HEADER_TYPE_BYTE_ARRAY = 6
34
+ _HEADER_TYPE_STRING = 7
35
+ _HEADER_TYPE_TIMESTAMP = 8
36
+ # Each prelude is 12 bytes: total_length (u32) + headers_length (u32) + crc (u32).
37
+ _PRELUDE_LENGTH = 12
38
+ # The trailing message CRC is 4 bytes.
39
+ _MESSAGE_CRC_LENGTH = 4
40
+
41
+ HeaderValue = str | bytes | datetime.datetime
42
+
43
+
44
+ class EventStreamParser:
45
+ """Sync stateful parser for AWS event-stream framing."""
46
+
47
+ def __init__(self) -> None:
48
+ """Initialize with an empty buffer."""
49
+ self._buffer = EventStreamBuffer()
50
+
51
+ def feed(self, data: bytes) -> list[EventStreamMessage]:
52
+ """Add bytes to the buffer; return any complete messages now decoded.
53
+
54
+ Args:
55
+ data: Raw bytes from the network. May be a partial message, a full
56
+ message, or multiple messages concatenated.
57
+
58
+ Returns:
59
+ List of complete `EventStreamMessage` objects decoded after this
60
+ chunk. Empty if the buffer does not yet contain a full message.
61
+ """
62
+ self._buffer.add_data(data)
63
+ return list(self._buffer)
64
+
65
+ def reset(self) -> None:
66
+ """Reset the internal buffer to an empty state."""
67
+ self._buffer = EventStreamBuffer()
68
+
69
+
70
+ async def parse_stream(
71
+ chunks: AsyncIterator[bytes],
72
+ ) -> AsyncIterator[EventStreamMessage]:
73
+ """Consume an async iterator of bytes and yield each complete message.
74
+
75
+ Args:
76
+ chunks: Async iterator producing byte chunks from a streaming HTTP body.
77
+
78
+ Yields:
79
+ Each complete `EventStreamMessage` as soon as enough bytes have arrived
80
+ to decode it.
81
+ """
82
+ parser = EventStreamParser()
83
+ async for chunk in chunks:
84
+ for msg in parser.feed(chunk):
85
+ yield msg
86
+
87
+
88
+ def _encode_header(name: str, value: HeaderValue) -> bytes:
89
+ """Encode a single header in event-stream wire format.
90
+
91
+ Dispatches on the value Python type:
92
+ - `str` -> type 7 (UTF-8 string), u16 length prefix.
93
+ - `bytes` -> type 6 (byte array), u16 length prefix.
94
+ - `datetime.datetime` -> type 8 (timestamp), i64 ms since UTC epoch.
95
+
96
+ Layout: name_len (u8) | name | value_type (u8) | value-encoded-bytes.
97
+ """
98
+ name_bytes = name.encode("utf-8")
99
+ name_prefix = bytes([len(name_bytes)]) + name_bytes
100
+ if isinstance(value, str):
101
+ value_bytes = value.encode("utf-8")
102
+ return (
103
+ name_prefix
104
+ + bytes([_HEADER_TYPE_STRING])
105
+ + struct.pack(">H", len(value_bytes))
106
+ + value_bytes
107
+ )
108
+ if isinstance(value, datetime.datetime):
109
+ if value.tzinfo is None:
110
+ msg = f"Header {name!r} datetime value must be timezone-aware"
111
+ raise ValueError(msg)
112
+ epoch_ms = int(value.timestamp() * 1000)
113
+ return name_prefix + bytes([_HEADER_TYPE_TIMESTAMP]) + struct.pack(">q", epoch_ms)
114
+ return name_prefix + bytes([_HEADER_TYPE_BYTE_ARRAY]) + struct.pack(">H", len(value)) + value
115
+
116
+
117
+ def encode_message(headers: Mapping[str, HeaderValue], payload: bytes) -> bytes:
118
+ """Encode one event-stream message.
119
+
120
+ Headers are emitted with a wire type matching their Python type (see
121
+ `_encode_header`): string (type 7), byte_array (type 6), timestamp
122
+ (type 8). The Polly inbound protocol uses string headers for inner events
123
+ (`:message-type`, `:event-type`, `:content-type`) and bytes/timestamp for
124
+ the rolling-signature wrapper (`:date`, `:chunk-signature`).
125
+
126
+ Args:
127
+ headers: Mapping of header name to typed value.
128
+ payload: Raw payload bytes (already JSON-encoded for ``TextEvent`` or
129
+ empty for ``CloseStreamEvent``).
130
+
131
+ Returns:
132
+ Binary message: prelude (with prelude CRC) + headers + payload +
133
+ trailing message CRC.
134
+ """
135
+ headers_bytes = b"".join(_encode_header(name, value) for name, value in headers.items())
136
+ headers_length = len(headers_bytes)
137
+ total_length = _PRELUDE_LENGTH + headers_length + len(payload) + _MESSAGE_CRC_LENGTH
138
+
139
+ prelude = struct.pack(">II", total_length, headers_length)
140
+ prelude_with_crc = prelude + struct.pack(">I", binascii.crc32(prelude))
141
+
142
+ body = prelude_with_crc + headers_bytes + payload
143
+ return body + struct.pack(">I", binascii.crc32(body))
144
+
145
+
146
+ def encode_messages(messages: Sequence[tuple[Mapping[str, HeaderValue], bytes]]) -> bytes:
147
+ """Encode a sequence of (headers, payload) tuples into one byte string.
148
+
149
+ Args:
150
+ messages: Ordered list of messages to encode.
151
+
152
+ Returns:
153
+ Concatenated binary representation of all messages.
154
+ """
155
+ return b"".join(encode_message(headers, payload) for headers, payload in messages)