s2-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
s2_sdk/__init__.py ADDED
@@ -0,0 +1,129 @@
1
+ import logging
2
+ from importlib.metadata import version
3
+
4
+ from s2_sdk._append_session import AppendSession, BatchSubmitTicket
5
+ from s2_sdk._batching import append_inputs, append_record_batches
6
+ from s2_sdk._exceptions import (
7
+ AppendConditionError,
8
+ FencingTokenMismatchError,
9
+ ReadUnwrittenError,
10
+ S2ClientError,
11
+ S2Error,
12
+ S2ServerError,
13
+ SeqNumMismatchError,
14
+ )
15
+ from s2_sdk._ops import S2, S2Basin, S2Stream
16
+ from s2_sdk._producer import Producer, RecordSubmitTicket
17
+ from s2_sdk._types import (
18
+ AccessTokenInfo,
19
+ AccessTokenScope,
20
+ AccountMetricSet,
21
+ Accumulation,
22
+ AppendAck,
23
+ AppendInput,
24
+ AppendRetryPolicy,
25
+ BasinConfig,
26
+ BasinInfo,
27
+ BasinMetricSet,
28
+ BasinScope,
29
+ Batching,
30
+ CommandRecord,
31
+ Compression,
32
+ Endpoints,
33
+ ExactMatch,
34
+ Gauge,
35
+ IndexedAppendAck,
36
+ Label,
37
+ MetricUnit,
38
+ Operation,
39
+ OperationGroupPermissions,
40
+ Page,
41
+ Permission,
42
+ PrefixMatch,
43
+ ReadBatch,
44
+ ReadLimit,
45
+ Record,
46
+ Retry,
47
+ Scalar,
48
+ SeqNum,
49
+ SequencedRecord,
50
+ StorageClass,
51
+ StreamConfig,
52
+ StreamInfo,
53
+ StreamMetricSet,
54
+ StreamPosition,
55
+ TailOffset,
56
+ Timeout,
57
+ TimeseriesInterval,
58
+ Timestamp,
59
+ Timestamping,
60
+ TimestampingMode,
61
+ metered_bytes,
62
+ )
63
+
64
+ __version__ = version("s2-sdk")
65
+
66
+ logging.getLogger(__name__).addHandler(logging.NullHandler())
67
+
68
+ __all__ = [
69
+ "S2",
70
+ "S2Basin",
71
+ "S2Stream",
72
+ "AppendSession",
73
+ "BatchSubmitTicket",
74
+ "Producer",
75
+ "RecordSubmitTicket",
76
+ "append_record_batches",
77
+ "append_inputs",
78
+ "Retry",
79
+ "Timeout",
80
+ "Compression",
81
+ "AppendRetryPolicy",
82
+ "Batching",
83
+ "Endpoints",
84
+ "Record",
85
+ "AppendInput",
86
+ "AppendAck",
87
+ "IndexedAppendAck",
88
+ "StreamPosition",
89
+ "SeqNum",
90
+ "Timestamp",
91
+ "TailOffset",
92
+ "ReadBatch",
93
+ "ReadLimit",
94
+ "SequencedRecord",
95
+ "Page",
96
+ "CommandRecord",
97
+ "metered_bytes",
98
+ "StorageClass",
99
+ "TimestampingMode",
100
+ "Timestamping",
101
+ "StreamConfig",
102
+ "BasinConfig",
103
+ "BasinInfo",
104
+ "BasinScope",
105
+ "StreamInfo",
106
+ "ExactMatch",
107
+ "PrefixMatch",
108
+ "Permission",
109
+ "Operation",
110
+ "OperationGroupPermissions",
111
+ "AccessTokenScope",
112
+ "AccessTokenInfo",
113
+ "MetricUnit",
114
+ "TimeseriesInterval",
115
+ "AccountMetricSet",
116
+ "BasinMetricSet",
117
+ "StreamMetricSet",
118
+ "Scalar",
119
+ "Accumulation",
120
+ "Gauge",
121
+ "Label",
122
+ "S2Error",
123
+ "S2ClientError",
124
+ "S2ServerError",
125
+ "AppendConditionError",
126
+ "FencingTokenMismatchError",
127
+ "SeqNumMismatchError",
128
+ "ReadUnwrittenError",
129
+ ]
@@ -0,0 +1,216 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from collections import deque
5
+ from typing import AsyncIterable, NamedTuple, Self
6
+
7
+ from s2_sdk._client import HttpClient
8
+ from s2_sdk._exceptions import S2ClientError
9
+ from s2_sdk._s2s._append_session import run_append_session
10
+ from s2_sdk._types import (
11
+ AppendAck,
12
+ AppendInput,
13
+ Compression,
14
+ Retry,
15
+ metered_bytes,
16
+ )
17
+ from s2_sdk._validators import validate_append_input
18
+
19
+
20
+ class _UnackedBatch(NamedTuple):
21
+ ack_fut: asyncio.Future[AppendAck]
22
+ metered_bytes: int
23
+
24
+
25
+ class AppendSession:
26
+ """Session for high-throughput appending with backpressure control.
27
+
28
+ Supports pipelining multiple :class:`AppendInput`\\ s while preserving
29
+ submission order.
30
+
31
+ Caution:
32
+ Returned by :meth:`S2Stream.append_session`. Do not instantiate directly.
33
+ """
34
+
35
+ __slots__ = (
36
+ "_closed",
37
+ "_client",
38
+ "_compression",
39
+ "_error",
40
+ "_permits",
41
+ "_queue",
42
+ "_retry",
43
+ "_stream_name",
44
+ "_task",
45
+ "_unacked",
46
+ )
47
+
48
+ def __init__(
49
+ self,
50
+ client: HttpClient,
51
+ stream_name: str,
52
+ retry: Retry,
53
+ compression: Compression,
54
+ max_unacked_bytes: int,
55
+ max_unacked_batches: int | None,
56
+ ) -> None:
57
+ self._client = client
58
+ self._stream_name = stream_name
59
+ self._retry = retry
60
+ self._compression = compression
61
+ self._permits = _AppendPermits(max_unacked_bytes, max_unacked_batches)
62
+
63
+ self._queue: asyncio.Queue[AppendInput | None] = asyncio.Queue()
64
+ self._unacked: deque[_UnackedBatch] = deque()
65
+ self._closed = False
66
+ self._error: BaseException | None = None
67
+
68
+ self._task = asyncio.get_running_loop().create_task(self._run())
69
+
70
+ async def submit(self, inp: AppendInput) -> BatchSubmitTicket:
71
+ """Submit a batch of records for appending.
72
+
73
+ Waits when backpressure limits are reached.
74
+ """
75
+ self._check_ready()
76
+ batch_bytes = metered_bytes(inp.records)
77
+ validate_append_input(len(inp.records), batch_bytes)
78
+
79
+ await self._permits.acquire(batch_bytes)
80
+ # Re-check after potentially waiting on backpressure.
81
+ try:
82
+ self._check_ready()
83
+ except BaseException:
84
+ self._permits.release(batch_bytes)
85
+ raise
86
+
87
+ ack_fut: asyncio.Future[AppendAck] = asyncio.get_running_loop().create_future()
88
+ self._unacked.append(_UnackedBatch(ack_fut, batch_bytes))
89
+ await self._queue.put(inp)
90
+ return BatchSubmitTicket(ack_fut)
91
+
92
+ def _check_ready(self) -> None:
93
+ if self._closed:
94
+ raise S2ClientError("AppendSession is closed")
95
+ if self._error is not None:
96
+ raise self._error
97
+
98
+ async def close(self) -> None:
99
+ """Close the session and wait for all submitted batches to be appended."""
100
+ if self._closed:
101
+ return
102
+ self._closed = True
103
+ await self._queue.put(None)
104
+ await self._task
105
+ if self._error is not None:
106
+ raise self._error
107
+
108
+ async def __aenter__(self) -> Self:
109
+ return self
110
+
111
+ async def __aexit__(self, exc_type, exc_val, exc_tb) -> bool:
112
+ await self.close()
113
+ return False
114
+
115
+ async def _run(self) -> None:
116
+ try:
117
+ async for ack in run_append_session(
118
+ self._client,
119
+ self._stream_name,
120
+ self._input_iter(),
121
+ retry=self._retry,
122
+ compression=self._compression,
123
+ ack_timeout=self._client._request_timeout,
124
+ ):
125
+ self._resolve_next(ack)
126
+ except BaseException as e:
127
+ # Unwrap single-exception ExceptionGroups so callers see
128
+ # the original exception type (e.g. S2ServerError, SeqNumMismatchError).
129
+ exc = e
130
+ while isinstance(exc, BaseExceptionGroup) and len(exc.exceptions) == 1:
131
+ exc = exc.exceptions[0]
132
+ self._fail_all(exc)
133
+
134
+ async def _input_iter(self) -> AsyncIterable[AppendInput]:
135
+ while True:
136
+ item = await self._queue.get()
137
+ if item is None:
138
+ return
139
+ yield item
140
+
141
+ def _resolve_next(self, ack: AppendAck) -> None:
142
+ unacked = self._unacked.popleft()
143
+ self._permits.release(unacked.metered_bytes)
144
+ unacked.ack_fut.set_result(ack)
145
+
146
+ def _fail_all(self, error: BaseException) -> None:
147
+ self._error = error
148
+ for unacked in self._unacked:
149
+ self._permits.release(unacked.metered_bytes)
150
+ if not unacked.ack_fut.done():
151
+ unacked.ack_fut.set_exception(error)
152
+ self._unacked.clear()
153
+ # Drain queue
154
+ while not self._queue.empty():
155
+ try:
156
+ self._queue.get_nowait()
157
+ except asyncio.QueueEmpty:
158
+ break
159
+
160
+
161
+ class BatchSubmitTicket:
162
+ """Awaitable that resolves to an :class:`AppendAck` once the batch is appended."""
163
+
164
+ __slots__ = ("_ack_fut",)
165
+
166
+ def __init__(self, ack_fut: asyncio.Future[AppendAck]) -> None:
167
+ self._ack_fut = ack_fut
168
+
169
+ def __await__(self):
170
+ return self._ack_fut.__await__()
171
+
172
+
173
+ class _Semaphore:
174
+ __slots__ = ("_event", "_lock", "_value")
175
+
176
+ def __init__(self, value: int) -> None:
177
+ self._value = value
178
+ self._event = asyncio.Event()
179
+ self._event.set()
180
+ self._lock = asyncio.Lock()
181
+
182
+ async def acquire(self, n: int) -> None:
183
+ while True:
184
+ async with self._lock:
185
+ if self._value >= n:
186
+ self._value -= n
187
+ return
188
+ self._event.clear()
189
+ await self._event.wait()
190
+
191
+ def release(self, n: int) -> None:
192
+ self._value += n
193
+ self._event.set()
194
+
195
+
196
+ class _AppendPermits:
197
+ __slots__ = ("_bytes", "_count")
198
+
199
+ def __init__(self, max_bytes: int, max_count: int | None = None) -> None:
200
+ self._bytes = _Semaphore(max_bytes)
201
+ self._count = _Semaphore(max_count) if max_count is not None else None
202
+
203
+ async def acquire(self, n_bytes: int) -> None:
204
+ if self._count is not None:
205
+ await self._count.acquire(1)
206
+ try:
207
+ await self._bytes.acquire(n_bytes)
208
+ except BaseException:
209
+ if self._count is not None:
210
+ self._count.release(1)
211
+ raise
212
+
213
+ def release(self, n_bytes: int) -> None:
214
+ self._bytes.release(n_bytes)
215
+ if self._count is not None:
216
+ self._count.release(1)
s2_sdk/_batching.py ADDED
@@ -0,0 +1,105 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from typing import AsyncIterable
5
+
6
+ from s2_sdk._types import AppendInput, Batching, Record, metered_bytes
7
+ from s2_sdk._validators import validate_batching
8
+
9
+
10
+ class BatchAccumulator:
11
+ __slots__ = ("_batching", "_bytes", "_records")
12
+
13
+ def __init__(self, batching: Batching) -> None:
14
+ self._batching = batching
15
+ self._records: list[Record] = []
16
+ self._bytes = 0
17
+
18
+ def add(self, record: Record) -> None:
19
+ self._records.append(record)
20
+ self._bytes += metered_bytes((record,))
21
+
22
+ def take(self) -> list[Record]:
23
+ records = list(self._records)
24
+ self._records.clear()
25
+ self._bytes = 0
26
+ return records
27
+
28
+ def is_full(self) -> bool:
29
+ return (
30
+ len(self._records) >= self._batching.max_records
31
+ or self._bytes >= self._batching.max_bytes
32
+ )
33
+
34
+ def is_empty(self) -> bool:
35
+ return len(self._records) == 0
36
+
37
+ @property
38
+ def linger(self) -> float:
39
+ return self._batching.linger.total_seconds()
40
+
41
+
42
+ async def append_record_batches(
43
+ records: AsyncIterable[Record],
44
+ *,
45
+ batching: Batching | None = None,
46
+ ) -> AsyncIterable[list[Record]]:
47
+ """Group records into batches based on count, bytes, and linger time."""
48
+ if batching is None:
49
+ batching = Batching()
50
+ validate_batching(batching.max_records, batching.max_bytes)
51
+ acc = BatchAccumulator(batching)
52
+ linger_secs = batching.linger.total_seconds()
53
+ aiter = records.__aiter__()
54
+
55
+ while True:
56
+ try:
57
+ record = await anext(aiter)
58
+ except StopAsyncIteration:
59
+ break
60
+
61
+ acc.add(record)
62
+ if acc.is_full():
63
+ yield acc.take()
64
+ continue
65
+
66
+ try:
67
+ while not acc.is_full():
68
+ if linger_secs > 0:
69
+ record = await asyncio.wait_for(anext(aiter), timeout=linger_secs)
70
+ else:
71
+ record = await anext(aiter)
72
+ acc.add(record)
73
+ except StopAsyncIteration:
74
+ pass
75
+ except TimeoutError:
76
+ pass
77
+
78
+ if not acc.is_empty():
79
+ yield acc.take()
80
+
81
+
82
+ async def append_inputs(
83
+ records: AsyncIterable[Record],
84
+ *,
85
+ match_seq_num: int | None = None,
86
+ fencing_token: str | None = None,
87
+ batching: Batching | None = None,
88
+ ) -> AsyncIterable[AppendInput]:
89
+ """Group records into :class:`AppendInput` batches based on count, bytes, and linger time.
90
+
91
+ If ``match_seq_num`` is set, it applies to the first input and is auto-incremented for subsequent ones.
92
+ """
93
+ if batching is None:
94
+ batching = Batching()
95
+ async for batch in append_record_batches(records, batching=batching):
96
+ if not batch:
97
+ continue
98
+ append_input = AppendInput(
99
+ records=batch,
100
+ match_seq_num=match_seq_num,
101
+ fencing_token=fencing_token,
102
+ )
103
+ if match_seq_num is not None:
104
+ match_seq_num += len(batch)
105
+ yield append_input