omextra 0.0.0.dev513__py3-none-any.whl → 0.0.0.dev515__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,263 +0,0 @@
1
- # ruff: noqa: UP007 UP045
2
- # @omlish-lite
3
- import abc
4
- import typing as ta
5
-
6
- from omlish.lite.abstract import Abstract
7
-
8
-
9
- BytesLike = ta.Union[bytes, bytearray, memoryview] # ta.TypeAlias
10
-
11
-
12
- ##
13
-
14
-
15
- class BytesViewLike(Abstract):
16
- @abc.abstractmethod
17
- def __len__(self) -> int:
18
- """
19
- Return the number of readable bytes.
20
-
21
- This is expected to be O(1). Many drivers and codecs use `len(buf)` in tight loops to decide whether more data
22
- is needed before attempting to parse a frame.
23
- """
24
-
25
- raise NotImplementedError
26
-
27
- @abc.abstractmethod
28
- def peek(self) -> memoryview:
29
- """
30
- Return a contiguous, read-only `memoryview` of the first available bytes.
31
-
32
- This is the "next chunk" fast-path: for segmented views, the returned memoryview may represent only the first
33
- segment (and thus may be shorter than `len(self)`), but it must be non-copying. This is the fast-path for codecs
34
- that can parse headers from an initial contiguous region.
35
-
36
- The returned view should be treated as ephemeral: callers must assume it may be invalidated by subsequent buffer
37
- mutations (advance/write/reserve/commit), depending on the implementation.
38
- """
39
-
40
- raise NotImplementedError
41
-
42
- @abc.abstractmethod
43
- def segments(self) -> ta.Sequence[memoryview]:
44
- """
45
- Return the readable contents as an ordered sequence of non-copying `memoryview` segments.
46
-
47
- This method is required because efficient operations in pure Python typically depend on delegating work to
48
- CPython's optimized implementations for searching/slicing within contiguous regions. By exposing
49
- already-contiguous segments, the buffer enables implementations of `find/rfind` and higher-level framing to
50
- avoid Python-level per-byte iteration.
51
-
52
- The returned segments must:
53
- - collectively represent exactly the readable bytes, in order,
54
- - be 1-D, byte-oriented views (itemsize 1),
55
- - be non-copying views of the underlying storage.
56
-
57
- Callers must assume that the returned views may be invalidated by subsequent mutations of the originating
58
- buffer/view (e.g., advancing, writing, reserving, committing), depending on the implementation's rules.
59
- """
60
-
61
- raise NotImplementedError
62
-
63
-
64
- class BytesView(BytesViewLike, Abstract):
65
- """
66
- A read-only, possibly non-contiguous view of bytes.
67
-
68
- This is the result type of operations like `BytesBuffer.split_to()`: it represents a *logical* byte sequence without
69
- requiring a copy. A `BytesView` is intentionally minimal: it is not a general-purpose container API, not a
70
- random-access sequence, and not intended for arbitrary indexing/slicing-heavy use.
71
-
72
- `BytesView` exists to make copy boundaries explicit:
73
- - Use `segments()` / `peek()` to access data without copying.
74
- - Use `tobytes()` (or `bytes(view)`) to intentionally materialize a contiguous `bytes` object.
75
-
76
- Implementations may be backed by one or many `memoryview` segments; the semantics are defined as if all readable
77
- bytes were concatenated in order.
78
- """
79
-
80
- @abc.abstractmethod
81
- def tobytes(self) -> bytes:
82
- """
83
- Materialize this view as a contiguous `bytes` object (copying).
84
-
85
- This is the explicit copy boundary: callers should prefer `peek()` / `segments()` for zero-copy-ish access when
86
- feasible, and use `tobytes()` only when a contiguous owned `bytes` is required.
87
- """
88
-
89
- raise NotImplementedError
90
-
91
-
92
- class BytesBuffer(BytesViewLike, Abstract):
93
- """
94
- An incremental, consumption-oriented byte accumulator intended for protocol parsing.
95
-
96
- A `BytesBuffer` is a *stream buffer*: bytes are appended by a driver/transport and then consumed by codecs via
97
- peeking, searching, splitting, and advancing-without forcing repeated concatenation or reallocation. It is
98
- explicitly designed to support segmented storage (to avoid "a huge buffer pinned by a tiny tail") and to enable
99
- low-copy pipeline-style decoding (Netty/Tokio-inspired).
100
-
101
- What it is for:
102
- - buffering raw bytes between I/O and protocol codecs,
103
- - framing (delimiters/length-prefixed) using split/advance,
104
- - efficient searching over buffered bytes using C-accelerated primitives via `memoryview` segments.
105
-
106
- What it is *not* for:
107
- - a general-purpose replacement for `bytes`/`bytearray`,
108
- - a `collections.abc.Sequence` or random-access container abstraction,
109
- - arbitrary indexing/slicing-heavy workloads (use `bytes`/`bytearray`/`memoryview` directly).
110
-
111
- `BytesBuffer` deliberately exposes `memoryview` at its boundary. This is foundational: it allows both immutable
112
- (`bytes`) and mutable (`bytearray`) internal storage to be viewed in O(1) without copying. It also avoids relying
113
- on `io.BytesIO` as a core backing store: while `BytesIO.getbuffer()` can expose a view, exported views pin the
114
- underlying buffer against resizing, which makes it awkward as a general-purpose buffer substrate.
115
-
116
- Semantics note:
117
- Many methods describe behavior in terms of the *conceptual concatenation* of readable bytes, even if the buffer
118
- is physically segmented. This is what "stream-correct" means here: results must be correct regardless of how the
119
- buffered bytes are chunked internally.
120
- """
121
-
122
- @abc.abstractmethod
123
- def advance(self, n: int, /) -> None:
124
- """
125
- Consume (discard) exactly `n` readable bytes from the front of the buffer.
126
-
127
- This operation must not copy remaining bytes unnecessarily. For segmented buffers, this typically adjusts a head
128
- offset and drops exhausted segments.
129
-
130
- Implementations must raise if `n` is negative or greater than `len(self)`.
131
- """
132
-
133
- raise NotImplementedError
134
-
135
- @abc.abstractmethod
136
- def split_to(self, n: int, /) -> BytesView:
137
- """
138
- Split off and return a read-only view of the first `n` readable bytes, consuming them from this buffer.
139
-
140
- This is the core "low-copy framing" primitive:
141
- - codecs can `split_to(frame_len)` to obtain a view of an entire frame without copying,
142
- - then immediately continue parsing subsequent frames from the remaining bytes.
143
-
144
- Implementations should strive for O(1) or amortized O(1) behavior, returning a view that references underlying
145
- segments rather than materializing a new contiguous `bytes`.
146
-
147
- Implementations must raise if `n` is negative or greater than `len(self)`.
148
- """
149
-
150
- raise NotImplementedError
151
-
152
- @abc.abstractmethod
153
- def coalesce(self, n: int, /) -> memoryview:
154
- """
155
- Ensure the first `n` readable bytes are available contiguously and return a view of them.
156
-
157
- Semantics:
158
- - Non-consuming: does not advance.
159
- - May restructure internal segments (content-preserving) to make the prefix contiguous.
160
- - Returns a read-only-ish `memoryview` (callers must not mutate readable bytes).
161
-
162
- Copying behavior:
163
- - If `peek()` already exposes >= n contiguous bytes, this is zero-copy.
164
- - Otherwise, it copies exactly the first `n` bytes into a new contiguous segment and rewrites the internal
165
- segment list so that segment[0] contains that prefix.
166
-
167
- Reserve interaction:
168
- - Disallowed while an outstanding reservation exists, since reserve() hands out a view that must not be
169
- invalidated by internal reshaping.
170
- """
171
-
172
- raise NotImplementedError
173
-
174
- @abc.abstractmethod
175
- def find(self, sub: bytes, start: int = 0, end: ta.Optional[int] = None) -> int:
176
- """
177
- Find the first occurrence of `sub` within the readable bytes and return its offset, or -1 if not found.
178
-
179
- This operation is "stream-correct": it must behave as if searching within the conceptual concatenation of all
180
- readable bytes, even if the buffer is physically segmented. In particular, matches that span segment boundaries
181
- must be detected.
182
-
183
- `start` and `end` are offsets into the readable region, matching the semantics of `bytes.find()`:
184
- - `start` defaults to 0 (the beginning of readable bytes),
185
- - `end` defaults to `len(self)`.
186
-
187
- Rationale for being part of the core interface:
188
- In pure Python, higher-level codecs cannot efficiently implement correct cross-segment searching byte-by-byte.
189
- Keeping `find` near the owning storage allows implementations to exploit contiguous segments and CPython's
190
- optimized search within each segment while still providing correct stream semantics.
191
- """
192
-
193
- raise NotImplementedError
194
-
195
- @abc.abstractmethod
196
- def rfind(self, sub: bytes, start: int = 0, end: ta.Optional[int] = None) -> int:
197
- """
198
- Find the last occurrence of `sub` within the readable bytes and return its offset, or -1 if not found.
199
-
200
- This operation is also stream-correct and matches `bytes.rfind()` semantics for `start`/`end`, interpreted as
201
- offsets into the readable region of this buffer.
202
- """
203
-
204
- raise NotImplementedError
205
-
206
-
207
- class MutableBytesBuffer(BytesBuffer, Abstract):
208
- """
209
- A writable `BytesBuffer`: supports appending bytes and (optionally) reserving writable space.
210
-
211
- `MutableBytesBuffer` is the primary target for drivers/transports feeding data into protocol pipelines, and for
212
- encoders building outbound byte sequences. It intentionally does not imply any particular I/O model (blocking,
213
- asyncio, custom reactors); it is simply the mutable byte substrate.
214
-
215
- Implementations may be linear (single `bytearray` + indices), segmented (multiple chunks), or adaptive.
216
- """
217
-
218
- @abc.abstractmethod
219
- def write(self, data: BytesLike, /) -> None:
220
- """
221
- Append `data` to the end of the readable region (after any existing unread bytes).
222
-
223
- Implementations should avoid needless copying; e.g., segmented buffers may store large `bytes` chunks directly,
224
- while linear buffers may copy into a `bytearray`.
225
- """
226
-
227
- raise NotImplementedError
228
-
229
- @abc.abstractmethod
230
- def reserve(self, n: int, /) -> memoryview:
231
- """
232
- Reserve writable space for at least `n` bytes and return a writable `memoryview` into that space.
233
-
234
- This method exists to support "close to the metal" drivers that can fill buffers directly (e.g., `recv_into`,
235
- `readinto`) without allocating temporary `bytes` objects.
236
-
237
- The returned view represents capacity that is not yet part of the readable region. The caller must write into
238
- some prefix of the view and then call `commit(written)` to make those bytes readable.
239
-
240
- Implementations should document their rules regarding outstanding reservations; a simple and robust rule is:
241
- - only one active reservation may exist at a time,
242
- - mutations that would reallocate storage are forbidden while a reservation is outstanding.
243
- """
244
-
245
- raise NotImplementedError
246
-
247
- @abc.abstractmethod
248
- def commit(self, n: int, /) -> None:
249
- """
250
- Commit `n` bytes from the most recent reservation, making them readable.
251
-
252
- Conceptually, `reserve()` may provide more capacity than the caller actually uses; `commit(n)` "shrinks" that
253
- over-reservation by only publishing the first `n` bytes as readable.
254
-
255
- Implementations must validate:
256
- - that a reservation is outstanding,
257
- - that `0 <= n <= reserved_length`.
258
-
259
- After commit, the reservation is considered consumed; subsequent reads and searches must include the committed
260
- bytes as part of the readable region.
261
- """
262
-
263
- raise NotImplementedError
@@ -1,76 +0,0 @@
1
- # ruff: noqa: UP006 UP045
2
- # @omlish-lite
3
- import typing as ta
4
-
5
- from .types import BytesView
6
- from .types import BytesViewLike
7
-
8
-
9
- ##
10
-
11
-
12
- def _norm_slice(length: int, start: int, end: ta.Optional[int]) -> ta.Tuple[int, int]:
13
- if start < 0:
14
- start += length
15
- if start < 0:
16
- start = 0
17
- if start > length:
18
- start = length
19
-
20
- if end is None:
21
- end = length
22
- else:
23
- if end < 0:
24
- end += length
25
- if end < 0:
26
- end = 0
27
- if end > length:
28
- end = length
29
-
30
- if end < start:
31
- end = start
32
-
33
- return start, end
34
-
35
-
36
- ##
37
-
38
-
39
- def can_bytes(obj: ta.Any) -> bool:
40
- return isinstance(obj, (bytes, bytearray, memoryview, BytesViewLike))
41
-
42
-
43
- def iter_bytes_segments(obj: ta.Any) -> ta.Iterator[memoryview]:
44
- if isinstance(obj, memoryview):
45
- yield obj
46
- elif isinstance(obj, (bytes, bytearray)):
47
- yield memoryview(obj)
48
- elif isinstance(obj, BytesViewLike):
49
- yield from obj.segments()
50
- else:
51
- raise TypeError(obj)
52
-
53
-
54
- def to_bytes(obj: ta.Any) -> bytes:
55
- if isinstance(obj, bytes):
56
- return obj
57
- elif isinstance(obj, bytearray):
58
- return bytes(obj)
59
- elif isinstance(obj, memoryview):
60
- return obj.tobytes()
61
- elif isinstance(obj, BytesView):
62
- return obj.tobytes()
63
- elif isinstance(obj, BytesViewLike):
64
- return b''.join(bytes(mv) for mv in obj.segments())
65
- else:
66
- raise TypeError(obj)
67
-
68
-
69
- def bytes_len(obj: ta.Any) -> int:
70
- if isinstance(obj, (bytes, bytearray, memoryview)):
71
- return len(obj)
72
- elif isinstance(obj, BytesViewLike):
73
- return sum(len(mv) for mv in obj.segments())
74
- else:
75
- # Not bytes-like
76
- return 0