omextra 0.0.0.dev513__py3-none-any.whl → 0.0.0.dev515__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,187 +0,0 @@
1
- # ruff: noqa: UP006 UP007 UP045
2
- # @omlish-lite
3
- import typing as ta
4
-
5
- from .errors import BufferTooLarge
6
- from .errors import FrameTooLarge
7
- from .types import BytesBuffer
8
-
9
-
10
- ##
11
-
12
-
13
- class LongestMatchDelimiterFramer:
14
- """
15
- A delimiter-based framing codec that supports *overlapping* delimiters with longest-match semantics.
16
-
17
- This is intentionally decoupled from any I/O model: it operates purely on a `BytesBuffer`-like object (providing
18
- `__len__`, `find`, `split_to`, `advance`, and `segments`/`peek`).
19
-
20
- Key property:
21
- Given overlapping delimiters like [b'\\r', b'\\r\\n'], this codec will *not* emit a frame ending at '\\r' unless
22
- it can prove the next byte is not '\\n' (or the stream is finalized).
23
-
24
- Implementation note:
25
- This codec relies on `BytesBuffer.find(...)` being stream-correct and C-accelerated over the buffer's underlying
26
- contiguous segments. In pure Python it is usually better to keep searching near the storage layer than to
27
- re-implement scanning byte-by-byte in higher-level codecs.
28
- """
29
-
30
- def __init__(
31
- self,
32
- delims: ta.Sequence[bytes],
33
- *,
34
- keep_ends: bool = False,
35
- max_size: ta.Optional[int] = None,
36
- ) -> None:
37
- super().__init__()
38
-
39
- dl = list(delims)
40
- if not dl:
41
- raise ValueError('no delimiters')
42
- if any(not isinstance(d, (bytes, bytearray)) for d in dl):
43
- raise TypeError(delims)
44
- if any(len(d) == 0 for d in dl):
45
- raise ValueError('empty delimiter')
46
-
47
- self._delims = tuple(bytes(d) for d in dl)
48
- self._keep_ends = keep_ends
49
- self._max_size = max_size
50
-
51
- # Sort by length descending for "choose longest at same start".
52
- self._delims_by_len = tuple(sorted(self._delims, key=len, reverse=True))
53
-
54
- # Build prefix relationships for overlap deferral. For each short delimiter, store longer delimiters that start
55
- # with it.
56
- pref: ta.Dict[bytes, ta.List[bytes]] = {}
57
- for d in self._delims:
58
- for e in self._delims:
59
- if d is e:
60
- continue
61
- if len(e) > len(d) and e.startswith(d):
62
- pref.setdefault(d, []).append(e)
63
- for k, vs in list(pref.items()):
64
- pref[k] = sorted(vs, key=len, reverse=True)
65
- self._prefix_longer = pref
66
-
67
- self._max_delim_len = max(len(d) for d in self._delims)
68
-
69
- def decode(self, buf: BytesBuffer, *, final: bool = False) -> ta.List[ta.Any]:
70
- """
71
- Consume as many complete frames as possible from `buf` and return them as views.
72
-
73
- - Frames are produced without copying (via `buf.split_to(...)`) when possible.
74
- - The delimiter is consumed from the buffer; it may be retained on the frame if `keep_ends=True`.
75
- - If `final=True`, the codec will not defer on overlapping delimiter prefixes at the end of the buffer.
76
-
77
- Raises:
78
- - BufferTooLarge if no delimiter is present and the buffered prefix exceeds max_size.
79
- - FrameTooLarge if the next frame payload (bytes before delimiter) exceeds max_size.
80
-
81
- Note on `max_size`:
82
- `max_size` is enforced as a limit on the *current* frame (bytes before the next delimiter). If the buffer
83
- contains bytes for a subsequent frame that already exceed `max_size`, this codec will only raise when it would
84
- otherwise need to make progress on that oversized frame. Concretely: if this call already emitted at least one
85
- frame, it will return those frames rather than raising immediately on trailing oversized data, leaving the
86
- remaining bytes buffered.
87
- """
88
-
89
- out: ta.List[ta.Any] = []
90
-
91
- while True:
92
- hit = self._find_next_delim(buf)
93
- if hit is None:
94
- if self._max_size is not None and len(buf) > self._max_size and not out:
95
- raise BufferTooLarge('buffer exceeded max_size without delimiter')
96
- return out
97
-
98
- pos, delim = hit
99
-
100
- if self._max_size is not None and pos > self._max_size:
101
- raise FrameTooLarge('frame exceeded max_size')
102
-
103
- if not final and self._should_defer(buf, pos, delim):
104
- return out
105
-
106
- if self._keep_ends:
107
- frame = buf.split_to(pos + len(delim))
108
- out.append(frame)
109
- else:
110
- frame = buf.split_to(pos)
111
- out.append(frame)
112
- buf.advance(len(delim))
113
-
114
- def _find_next_delim(self, buf: BytesBuffer) -> ta.Optional[ta.Tuple[int, bytes]]:
115
- """
116
- Return (pos, delim) for the earliest delimiter occurrence. If multiple delimiters occur at the same position,
117
- choose the longest matching delimiter.
118
- """
119
-
120
- ln = len(buf)
121
- if ln == 0:
122
- return None
123
-
124
- best_pos = None # type: ta.Optional[int]
125
- best_delim = None # type: ta.Optional[bytes]
126
-
127
- # First pass: find the earliest position of any delimiter (cheap, uses buf.find).
128
- for d in self._delims:
129
- i = buf.find(d, 0, None)
130
- if i == -1:
131
- continue
132
- if best_pos is None or i < best_pos:
133
- best_pos = i
134
- best_delim = d
135
- if best_pos == 0:
136
- # Can't beat position 0; still need to choose longest at this position.
137
- pass
138
- elif i == best_pos and best_delim is not None and len(d) > len(best_delim):
139
- best_delim = d
140
-
141
- if best_pos is None or best_delim is None:
142
- return None
143
-
144
- # Second pass: at that position, choose the longest delimiter that actually matches there. (We can't just rely
145
- # on "which delimiter found it first" when overlaps exist.)
146
- pos = best_pos
147
- for d in self._delims_by_len:
148
- if pos + len(d) > ln:
149
- continue
150
- if buf.find(d, pos, pos + len(d)) == pos:
151
- return pos, d
152
-
153
- # Shouldn't happen: best_pos came from some delimiter occurrence.
154
- return pos, best_delim
155
-
156
- def _should_defer(self, buf: BytesBuffer, pos: int, matched: bytes) -> bool:
157
- """
158
- Return True if we must defer because a longer delimiter could still match starting at `pos` but we don't yet
159
- have enough bytes to decide.
160
-
161
- We only defer when:
162
- - the current match ends at the end of the currently buffered bytes, and
163
- - there exists some longer delimiter that has `matched` as a prefix, and
164
- - the buffered bytes from pos match the available prefix of that longer delimiter.
165
- """
166
-
167
- ln = len(buf)
168
- endpos = pos + len(matched)
169
- if endpos != ln:
170
- return False
171
-
172
- longer = self._prefix_longer.get(matched)
173
- if not longer:
174
- return False
175
-
176
- avail = ln - pos
177
- for d2 in longer:
178
- if avail >= len(d2):
179
- # If we had enough bytes, we'd have matched d2 in _find_next_delim.
180
- continue
181
- # Check whether buffered bytes match the prefix of d2 that we have available.
182
- # Use stream-correct find on the prefix.
183
- prefix = d2[:avail]
184
- if buf.find(prefix, pos, pos + avail) == pos:
185
- return True
186
-
187
- return False
@@ -1,202 +0,0 @@
1
- # ruff: noqa: UP006 UP007 UP045
2
- # @omlish-lite
3
- import typing as ta
4
-
5
- from .errors import BufferTooLarge
6
- from .errors import NoOutstandingReserve
7
- from .errors import OutstandingReserve
8
- from .segmented import SegmentedBytesView
9
- from .types import BytesLike
10
- from .types import MutableBytesBuffer
11
- from .utils import _norm_slice
12
-
13
-
14
- ##
15
-
16
-
17
- class LinearBytesBuffer(MutableBytesBuffer):
18
- """
19
- A simple contiguous (bytearray-backed) MutableBytesBuffer implementation.
20
-
21
- Strengths:
22
- - Fast `find/rfind` and contiguous peeking.
23
- - Efficient reserve/commit into a single backing store.
24
-
25
- Tradeoffs:
26
- - `split_to` returns a stable view by copying the split bytes into an owned `bytes` object.
27
- (A truly zero-copy split view would require pinning the underlying bytearray against compaction.)
28
- """
29
-
30
- def __init__(
31
- self,
32
- *,
33
- max_bytes: ta.Optional[int] = None,
34
- initial_capacity: int = 0,
35
- ) -> None:
36
- super().__init__()
37
-
38
- self._max_bytes = None if max_bytes is None else int(max_bytes)
39
-
40
- if initial_capacity < 0:
41
- raise ValueError(initial_capacity)
42
- if self._max_bytes is not None and initial_capacity > self._max_bytes:
43
- raise BufferTooLarge('buffer exceeded max_bytes')
44
-
45
- # Pre-size the backing store to encourage fewer resizes/copies on trickle-y writes.
46
- # We immediately clear so readable length remains 0.
47
- if initial_capacity:
48
- self._ba = bytearray(initial_capacity)
49
- self._ba.clear()
50
- else:
51
- self._ba = bytearray()
52
-
53
- _rpos = 0
54
- _wpos = 0
55
-
56
- _resv_start: ta.Optional[int] = None
57
- _resv_len = 0
58
-
59
- _resv_buf: bytearray
60
-
61
- def __len__(self) -> int:
62
- return self._wpos - self._rpos
63
-
64
- def peek(self) -> memoryview:
65
- if self._rpos == self._wpos:
66
- return memoryview(b'')
67
- return memoryview(self._ba)[self._rpos:self._wpos]
68
-
69
- def segments(self) -> ta.Sequence[memoryview]:
70
- mv = self.peek()
71
- return (mv,) if len(mv) else ()
72
-
73
- def _check_no_reserve(self) -> None:
74
- if self._resv_start is not None:
75
- raise OutstandingReserve('outstanding reserve')
76
-
77
- def write(self, data: BytesLike, /) -> None:
78
- self._check_no_reserve()
79
- if not data:
80
- return
81
- if isinstance(data, memoryview):
82
- data = data.tobytes()
83
- elif isinstance(data, bytearray):
84
- data = bytes(data)
85
-
86
- bl = len(data)
87
-
88
- if self._max_bytes is not None and len(self) + bl > self._max_bytes:
89
- raise BufferTooLarge('buffer exceeded max_bytes')
90
-
91
- # Keep backing store "dense": if we've consumed everything, reset.
92
- if self._rpos == self._wpos and self._rpos:
93
- self._ba.clear()
94
- self._rpos = 0
95
- self._wpos = 0
96
-
97
- self._ba.extend(data)
98
- self._wpos += bl
99
-
100
- def reserve(self, n: int, /) -> memoryview:
101
- if n < 0:
102
- raise ValueError(n)
103
- if self._resv_start is not None:
104
- raise OutstandingReserve('outstanding reserve')
105
-
106
- # Important: do NOT reserve by extending the backing bytearray and returning a view into it. A live exported
107
- # memoryview pins the bytearray against resizing, and commit() would need to shrink unused reservation space (or
108
- # otherwise reshape), which would raise BufferError.
109
- #
110
- # Instead, reserve returns a view of a temporary bytearray, and commit() appends only what was actually written.
111
- # This keeps reserve/commit safe and predictable.
112
- b = bytearray(n)
113
- self._resv_start = 0
114
- self._resv_len = n
115
- self._resv_buf = b
116
- return memoryview(b)
117
-
118
- def commit(self, n: int, /) -> None:
119
- if self._resv_start is None:
120
- raise NoOutstandingReserve('no outstanding reserve')
121
- if n < 0 or n > self._resv_len:
122
- raise ValueError(n)
123
-
124
- b = self._resv_buf
125
- self._resv_start = None
126
- self._resv_len = 0
127
- del self._resv_buf
128
-
129
- if not n:
130
- return
131
-
132
- if self._max_bytes is not None and len(self) + n > self._max_bytes:
133
- raise BufferTooLarge('buffer exceeded max_bytes')
134
-
135
- # Append only what was written.
136
- self.write(memoryview(b)[:n])
137
-
138
- def advance(self, n: int, /) -> None:
139
- self._check_no_reserve()
140
- if n < 0 or n > len(self):
141
- raise ValueError(n)
142
- if n == 0:
143
- return
144
-
145
- self._rpos += n
146
-
147
- # Compact opportunistically (content-preserving, may copy).
148
- # This avoids "huge buffer pinned by small tail" for contiguous backing.
149
- # Keep thresholds conservative to avoid excessive churn.
150
- if self._rpos and self._rpos >= 65536 and self._rpos >= (self._wpos // 2):
151
- del self._ba[:self._rpos]
152
- self._wpos -= self._rpos
153
- self._rpos = 0
154
-
155
- # Fully consumed: reset.
156
- if self._rpos == self._wpos:
157
- self._ba.clear()
158
- self._rpos = 0
159
- self._wpos = 0
160
-
161
- def split_to(self, n: int, /) -> SegmentedBytesView:
162
- self._check_no_reserve()
163
- if n < 0 or n > len(self):
164
- raise ValueError(n)
165
- if n == 0:
166
- return SegmentedBytesView(())
167
-
168
- # Copy out the split prefix to keep the view stable even if the underlying buffer compacts.
169
- b = bytes(memoryview(self._ba)[self._rpos:self._rpos + n])
170
- self._rpos += n
171
-
172
- if self._rpos == self._wpos:
173
- self._ba.clear()
174
- self._rpos = 0
175
- self._wpos = 0
176
-
177
- return SegmentedBytesView((memoryview(b),))
178
-
179
- def find(self, sub: bytes, start: int = 0, end: ta.Optional[int] = None) -> int:
180
- start, end = _norm_slice(len(self), start, end)
181
- if len(sub) == 0:
182
- return start
183
- i = self._ba.find(sub, self._rpos + start, self._rpos + end)
184
- return -1 if i < 0 else (i - self._rpos)
185
-
186
- def rfind(self, sub: bytes, start: int = 0, end: ta.Optional[int] = None) -> int:
187
- start, end = _norm_slice(len(self), start, end)
188
- if len(sub) == 0:
189
- return end
190
- i = self._ba.rfind(sub, self._rpos + start, self._rpos + end)
191
- return -1 if i < 0 else (i - self._rpos)
192
-
193
- def coalesce(self, n: int, /) -> memoryview:
194
- self._check_no_reserve()
195
- if n < 0:
196
- raise ValueError(n)
197
- if n > len(self):
198
- raise ValueError(n)
199
- if n == 0:
200
- return memoryview(b'')
201
- # Always contiguous for the readable prefix.
202
- return memoryview(self._ba)[self._rpos:self._rpos + n]
@@ -1,149 +0,0 @@
1
- # @omlish-lite
2
- import typing as ta
3
-
4
- from .errors import NeedMoreData
5
- from .types import BytesBuffer
6
-
7
-
8
- ##
9
-
10
-
11
- def _coalesce_exact(buf: BytesBuffer, n: int) -> memoryview:
12
- """
13
- Return a contiguous view of exactly `n` readable bytes, or raise NeedMoreData.
14
-
15
- Uses `buf.coalesce(n)` to avoid per-byte Python work and to keep copying close to the buffer backend.
16
- """
17
-
18
- if n < 0:
19
- raise ValueError(n)
20
- if len(buf) < n:
21
- raise NeedMoreData
22
- mv = buf.coalesce(n)
23
- if len(mv) < n:
24
- # Defensive: coalesce contract should provide >= n when len(buf) >= n.
25
- raise NeedMoreData
26
- return mv[:n]
27
-
28
-
29
- ##
30
-
31
-
32
- def peek_u8(buf: BytesBuffer) -> int:
33
- """Peek an unsigned 8-bit integer without consuming."""
34
-
35
- mv = _coalesce_exact(buf, 1)
36
- return mv[0]
37
-
38
-
39
- def read_u8(buf: BytesBuffer) -> int:
40
- """Read and consume an unsigned 8-bit integer."""
41
-
42
- v = peek_u8(buf)
43
- buf.advance(1)
44
- return v
45
-
46
-
47
- def peek_u16_be(buf: BytesBuffer) -> int:
48
- """Peek an unsigned 16-bit big-endian integer without consuming."""
49
-
50
- mv = _coalesce_exact(buf, 2)
51
- return int.from_bytes(mv, 'big', signed=False)
52
-
53
-
54
- def read_u16_be(buf: BytesBuffer) -> int:
55
- """Read and consume an unsigned 16-bit big-endian integer."""
56
-
57
- v = peek_u16_be(buf)
58
- buf.advance(2)
59
- return v
60
-
61
-
62
- def peek_u16_le(buf: BytesBuffer) -> int:
63
- """Peek an unsigned 16-bit little-endian integer without consuming."""
64
-
65
- mv = _coalesce_exact(buf, 2)
66
- return int.from_bytes(mv, 'little', signed=False)
67
-
68
-
69
- def read_u16_le(buf: BytesBuffer) -> int:
70
- """Read and consume an unsigned 16-bit little-endian integer."""
71
-
72
- v = peek_u16_le(buf)
73
- buf.advance(2)
74
- return v
75
-
76
-
77
- def peek_u32_be(buf: BytesBuffer) -> int:
78
- """Peek an unsigned 32-bit big-endian integer without consuming."""
79
-
80
- mv = _coalesce_exact(buf, 4)
81
- return int.from_bytes(mv, 'big', signed=False)
82
-
83
-
84
- def read_u32_be(buf: BytesBuffer) -> int:
85
- """Read and consume an unsigned 32-bit big-endian integer."""
86
-
87
- v = peek_u32_be(buf)
88
- buf.advance(4)
89
- return v
90
-
91
-
92
- def peek_u32_le(buf: BytesBuffer) -> int:
93
- """Peek an unsigned 32-bit little-endian integer without consuming."""
94
-
95
- mv = _coalesce_exact(buf, 4)
96
- return int.from_bytes(mv, 'little', signed=False)
97
-
98
-
99
- def read_u32_le(buf: BytesBuffer) -> int:
100
- """Read and consume an unsigned 32-bit little-endian integer."""
101
-
102
- v = peek_u32_le(buf)
103
- buf.advance(4)
104
- return v
105
-
106
-
107
- ##
108
-
109
-
110
- def peek_exact(buf: BytesBuffer, n: int, /) -> memoryview:
111
- """
112
- Return a contiguous view of exactly `n` readable bytes without consuming.
113
-
114
- Raises NeedMoreData if fewer than `n` bytes are currently buffered.
115
- """
116
-
117
- if n < 0:
118
- raise ValueError(n)
119
- if len(buf) < n:
120
- raise NeedMoreData
121
- mv = buf.coalesce(n)
122
- if len(mv) < n:
123
- raise NeedMoreData
124
- return mv[:n]
125
-
126
-
127
- def take(buf: BytesBuffer, n: int, /) -> ta.Any:
128
- """
129
- Consume and return a `BytesView`-like object representing exactly `n` bytes.
130
-
131
- Raises NeedMoreData if fewer than `n` bytes are currently buffered.
132
- """
133
-
134
- if n < 0:
135
- raise ValueError(n)
136
- if len(buf) < n:
137
- raise NeedMoreData
138
- return buf.split_to(n)
139
-
140
-
141
- def read_bytes(buf: BytesBuffer, n: int, /) -> bytes:
142
- """
143
- Consume exactly `n` bytes and return them as a contiguous `bytes` object (copy boundary).
144
-
145
- Raises NeedMoreData if fewer than `n` bytes are currently buffered.
146
- """
147
-
148
- v = take(buf, n)
149
- return ta.cast(bytes, v.tobytes())
@@ -1,110 +0,0 @@
1
- # ruff: noqa: UP045
2
- # @omlish-lite
3
- import typing as ta
4
-
5
- from .types import BytesLike
6
- from .types import MutableBytesBuffer
7
-
8
-
9
- ##
10
-
11
-
12
- class ScanningBytesBuffer(MutableBytesBuffer):
13
- """
14
- A MutableBytesBuffer wrapper that caches negative-find progress to avoid repeated rescans in trickle scenarios.
15
-
16
- It is intentionally conservative:
17
- - It only caches progress for the default find range (start==0, end is None).
18
- - It only caches *negative* results (i.e., "-1"): once a match is found, caching is not updated, to preserve the
19
- property that repeated `find(sub)` on an unchanged buffer yields the same answer.
20
-
21
- This is designed to help framing-style code that repeatedly does:
22
- - buf.write(...small...)
23
- - buf.find(delim)
24
- - (not found) repeat
25
- """
26
-
27
- def __init__(self, buf) -> None:
28
- super().__init__()
29
-
30
- self._buf = buf
31
- self._scan_from_by_sub: dict[bytes, int] = {}
32
-
33
- #
34
-
35
- def __len__(self) -> int:
36
- return len(self._buf)
37
-
38
- def peek(self) -> memoryview:
39
- return self._buf.peek()
40
-
41
- def segments(self) -> ta.Sequence[memoryview]:
42
- return self._buf.segments()
43
-
44
- #
45
-
46
- def advance(self, n: int, /) -> None:
47
- self._buf.advance(n)
48
- self._adjust_for_consume(n)
49
-
50
- def split_to(self, n: int, /):
51
- v = self._buf.split_to(n)
52
- self._adjust_for_consume(n)
53
- return v
54
-
55
- def coalesce(self, n: int, /) -> memoryview:
56
- return self._buf.coalesce(n)
57
-
58
- def find(self, sub: bytes, start: int = 0, end: ta.Optional[int] = None) -> int:
59
- if start != 0 or end is not None:
60
- return self._buf.find(sub, start, end)
61
-
62
- sub_len = len(sub)
63
- if sub_len <= 0:
64
- return self._buf.find(sub, start, end)
65
-
66
- scan_from = self._scan_from_by_sub.get(sub, 0)
67
-
68
- # Allow overlap so a match spanning old/new boundary is discoverable.
69
- overlap = sub_len - 1
70
- eff_start = scan_from - overlap
71
- if eff_start < 0:
72
- eff_start = 0
73
-
74
- i = self._buf.find(sub, eff_start, None)
75
- if i < 0:
76
- self._scan_from_by_sub[sub] = len(self._buf)
77
-
78
- return i
79
-
80
- def rfind(self, sub: bytes, start: int = 0, end: ta.Optional[int] = None) -> int:
81
- # rfind isn't the typical trickle hot-path; delegate.
82
- return self._buf.rfind(sub, start, end)
83
-
84
- #
85
-
86
- def write(self, data: BytesLike, /) -> None:
87
- self._buf.write(data)
88
-
89
- def reserve(self, n: int, /) -> memoryview:
90
- return self._buf.reserve(n)
91
-
92
- def commit(self, n: int, /) -> None:
93
- self._buf.commit(n)
94
-
95
- #
96
-
97
- def _adjust_for_consume(self, n: int) -> None:
98
- if not self._scan_from_by_sub:
99
- return
100
-
101
- if n <= 0:
102
- return
103
-
104
- # Only front-consumption exists in this buffer model.
105
- for k, v in list(self._scan_from_by_sub.items()):
106
- nv = v - n
107
- if nv <= 0:
108
- self._scan_from_by_sub.pop(k, None)
109
- else:
110
- self._scan_from_by_sub[k] = nv