omextra 0.0.0.dev513__py3-none-any.whl → 0.0.0.dev515__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,637 +0,0 @@
1
- # ruff: noqa: UP006 UP007 UP045
2
- # @omlish-lite
3
- import typing as ta
4
-
5
- from .errors import BufferTooLarge
6
- from .errors import NoOutstandingReserve
7
- from .errors import OutstandingReserve
8
- from .types import BytesLike
9
- from .types import BytesView
10
- from .types import MutableBytesBuffer
11
- from .utils import _norm_slice
12
-
13
-
14
- ##
15
-
16
-
17
- class SegmentedBytesView(BytesView):
18
- """
19
- A read-only, possibly non-contiguous view over a sequence of byte segments.
20
-
21
- This is intended to be produced by `SegmentedBytesBuffer.split_to()` without copying.
22
- """
23
-
24
- def __init__(self, segs: ta.Sequence[memoryview]) -> None:
25
- super().__init__()
26
-
27
- self._segs = tuple(segs)
28
- for mv in self._segs:
29
- self._len += len(mv)
30
-
31
- _len = 0
32
-
33
- def __len__(self) -> int:
34
- return self._len
35
-
36
- def peek(self) -> memoryview:
37
- if not self._segs:
38
- return memoryview(b'')
39
- return self._segs[0]
40
-
41
- def segments(self) -> ta.Sequence[memoryview]:
42
- return self._segs
43
-
44
- def tobytes(self) -> bytes:
45
- if not self._segs:
46
- return b''
47
- if len(self._segs) == 1:
48
- return bytes(self._segs[0])
49
- return b''.join(bytes(mv) for mv in self._segs)
50
-
51
-
52
- class SegmentedBytesBuffer(MutableBytesBuffer):
53
- """
54
- A segmented, consumption-oriented bytes buffer.
55
-
56
- Internally stores a list of `bytes`/`bytearray` segments plus a head offset. Exposes readable data as `memoryview`
57
- segments without copying.
58
-
59
- Optional "chunked writes":
60
- - If chunk_size > 0, small writes are accumulated into a lazily-allocated active bytearray "chunk" up to
61
- chunk_size.
62
- - Writes >= chunk_size are stored as their own segments (after flushing any active chunk).
63
- - On flush, the active chunk is kept as a bytearray segment iff it is at least `chunk_compact_threshold` full;
64
- otherwise it is materialized as bytes to avoid pinning a large capacity for tiny content.
65
-
66
- Reserve/commit:
67
- - If chunk_size > 0 and reserve(n) fits in the active chunk, the reservation is carved from the active chunk.
68
- Reserved bytes are not readable until commit().
69
- - If reserve(n) does not fit, the active chunk is flushed first.
70
- - If n <= chunk_size after flushing, the reservation is served from a new active chunk (so the remainder becomes
71
- the next active chunk).
72
- - If n > chunk_size, reserve allocates a dedicated buffer and on commit it is "closed" (it does not become the
73
- next active chunk).
74
-
75
- Important exported-view caveat:
76
- - reserve() returns a memoryview. As long as any exported memoryview exists, the underlying bytearray must not be
77
- resized, or Python will raise BufferError. Therefore the active chunk bytearray is *fixed capacity*
78
- (len==chunk_size) and we track "used" bytes separately, writing via slice assignment rather than extend().
79
- """
80
-
81
- def __init__(
82
- self,
83
- *,
84
- max_bytes: ta.Optional[int] = None,
85
- chunk_size: int = 0,
86
- chunk_compact_threshold: float = .25,
87
- ) -> None:
88
- super().__init__()
89
-
90
- self._segs: ta.List[ta.Union[bytes, bytearray]] = []
91
-
92
- self._max_bytes = None if max_bytes is None else int(max_bytes)
93
-
94
- if chunk_size < 0:
95
- raise ValueError(chunk_size)
96
- self._chunk_size = chunk_size
97
-
98
- if not (0.0 <= chunk_compact_threshold <= 1.0):
99
- raise ValueError(chunk_compact_threshold)
100
- self._chunk_compact_threshold = chunk_compact_threshold
101
-
102
- self._active: ta.Optional[bytearray] = None
103
- self._active_used = 0
104
-
105
- _head_off = 0
106
- _len = 0
107
-
108
- _reserved: ta.Optional[bytearray] = None
109
- _reserved_len = 0
110
- _reserved_in_active = False
111
-
112
- def __len__(self) -> int:
113
- return self._len
114
-
115
- def _active_reserved_tail(self) -> int:
116
- if self._reserved_in_active and self._reserved is not None:
117
- return self._reserved_len
118
- return 0
119
-
120
- def _active_readable_len(self) -> int:
121
- if self._active is None:
122
- return 0
123
- tail = self._active_reserved_tail()
124
- rl = self._active_used - tail
125
- return rl if rl > 0 else 0
126
-
127
- def peek(self) -> memoryview:
128
- if not self._segs:
129
- return memoryview(b'')
130
-
131
- s0 = self._segs[0]
132
- mv = memoryview(s0)
133
- if self._head_off:
134
- mv = mv[self._head_off:]
135
-
136
- if self._active is not None and s0 is self._active:
137
- # Active is only meaningful by _active_used, not len(bytearray).
138
- rl = self._active_readable_len()
139
- if self._head_off >= rl:
140
- return memoryview(b'')
141
- mv = memoryview(self._active)[self._head_off:rl]
142
- return mv
143
-
144
- return mv
145
-
146
- def segments(self) -> ta.Sequence[memoryview]:
147
- if not self._segs:
148
- return ()
149
-
150
- out: ta.List[memoryview] = []
151
-
152
- last_i = len(self._segs) - 1
153
- for i, s in enumerate(self._segs):
154
- if self._active is not None and i == last_i and s is self._active:
155
- # Active chunk: create fresh view with readable length.
156
- rl = self._active_readable_len()
157
- if i == 0:
158
- # Active is also first segment; apply head_off.
159
- if self._head_off >= rl:
160
- continue
161
- mv = memoryview(self._active)[self._head_off:rl]
162
- else:
163
- if rl <= 0:
164
- continue
165
- mv = memoryview(self._active)[:rl]
166
- else:
167
- # Non-active segment.
168
- mv = memoryview(s)
169
- if i == 0 and self._head_off:
170
- mv = mv[self._head_off:]
171
-
172
- if len(mv):
173
- out.append(mv)
174
-
175
- return tuple(out)
176
-
177
- def _ensure_active(self) -> bytearray:
178
- if self._chunk_size <= 0:
179
- raise RuntimeError('no active chunk without chunk_size')
180
-
181
- a = self._active
182
- if a is None:
183
- a = bytearray(self._chunk_size) # fixed capacity
184
- self._segs.append(a)
185
- self._active = a
186
- self._active_used = 0
187
-
188
- return a
189
-
190
- def _flush_active(self) -> None:
191
- if (a := self._active) is None:
192
- return
193
-
194
- if self._reserved_in_active:
195
- raise OutstandingReserve('outstanding reserve')
196
-
197
- if (used := self._active_used) <= 0:
198
- if self._segs and self._segs[-1] is a:
199
- self._segs.pop()
200
- self._active = None
201
- self._active_used = 0
202
- return
203
-
204
- # If under threshold, always bytes() to avoid pinning.
205
- if self._chunk_size and (float(used) / float(self._chunk_size)) < self._chunk_compact_threshold:
206
- if not self._segs or self._segs[-1] is not a:
207
- raise RuntimeError('active not at tail')
208
- self._segs[-1] = bytes(memoryview(a)[:used])
209
-
210
- else:
211
- # Try to shrink in-place to used bytes. If exported views exist, this can BufferError; fall back to bytes()
212
- # in that case.
213
- if not self._segs or self._segs[-1] is not a:
214
- raise RuntimeError('active not at tail')
215
- try:
216
- del a[used:] # may raise BufferError if any exports exist
217
- except BufferError:
218
- self._segs[-1] = bytes(memoryview(a)[:used])
219
-
220
- self._active = None
221
- self._active_used = 0
222
-
223
- def write(self, data: BytesLike, /) -> None:
224
- if not data:
225
- return
226
- if isinstance(data, memoryview):
227
- data = data.tobytes()
228
- # elif isinstance(data, bytearray):
229
- # pass
230
- # else:
231
- # pass
232
-
233
- dl = len(data)
234
-
235
- if self._max_bytes is not None and self._len + dl > self._max_bytes:
236
- raise BufferTooLarge('buffer exceeded max_bytes')
237
-
238
- if self._chunk_size <= 0:
239
- self._segs.append(data)
240
- self._len += dl
241
- return
242
-
243
- if self._reserved_in_active:
244
- raise OutstandingReserve('outstanding reserve')
245
-
246
- if dl >= self._chunk_size:
247
- self._flush_active()
248
- self._segs.append(data)
249
- self._len += dl
250
- return
251
-
252
- a = self._ensure_active()
253
- if self._active_used + dl > self._chunk_size:
254
- self._flush_active()
255
- a = self._ensure_active()
256
-
257
- # Copy into fixed-capacity buffer; do not resize.
258
- memoryview(a)[self._active_used:self._active_used + dl] = data
259
- self._active_used += dl
260
- self._len += dl
261
-
262
- def reserve(self, n: int, /) -> memoryview:
263
- if n < 0:
264
- raise ValueError(n)
265
- if self._reserved is not None:
266
- raise OutstandingReserve('outstanding reserve')
267
-
268
- if self._chunk_size <= 0:
269
- b = bytearray(n)
270
- self._reserved = b
271
- self._reserved_len = n
272
- self._reserved_in_active = False
273
- return memoryview(b)
274
-
275
- if n > self._chunk_size:
276
- self._flush_active()
277
- b = bytearray(n)
278
- self._reserved = b
279
- self._reserved_len = n
280
- self._reserved_in_active = False
281
- return memoryview(b)
282
-
283
- # Ensure reservation fits in active; otherwise flush then create a new one.
284
- if self._active is not None and (self._active_used + n > self._chunk_size):
285
- self._flush_active()
286
-
287
- a = self._ensure_active()
288
-
289
- start = self._active_used
290
- # Reservation does not change _active_used (not readable until commit).
291
- self._reserved = a
292
- self._reserved_len = n
293
- self._reserved_in_active = True
294
- return memoryview(a)[start:start + n]
295
-
296
- def commit(self, n: int, /) -> None:
297
- if self._reserved is None:
298
- raise NoOutstandingReserve('no outstanding reserve')
299
- if n < 0 or n > self._reserved_len:
300
- raise ValueError(n)
301
-
302
- if self._reserved_in_active:
303
- a = self._reserved
304
- self._reserved = None
305
- self._reserved_len = 0
306
- self._reserved_in_active = False
307
-
308
- if self._max_bytes is not None and self._len + n > self._max_bytes:
309
- raise BufferTooLarge('buffer exceeded max_bytes')
310
-
311
- if n:
312
- self._active_used += n
313
- self._len += n
314
-
315
- # Keep active for reuse.
316
- self._active = a
317
- return
318
-
319
- b = self._reserved
320
- self._reserved = None
321
- self._reserved_len = 0
322
- self._reserved_in_active = False
323
-
324
- if self._max_bytes is not None and self._len + n > self._max_bytes:
325
- raise BufferTooLarge('buffer exceeded max_bytes')
326
-
327
- if not n:
328
- return
329
-
330
- if n == len(b):
331
- self._segs.append(b)
332
- self._len += n
333
- else:
334
- bb = bytes(memoryview(b)[:n])
335
- self._segs.append(bb)
336
- self._len += n
337
-
338
- def advance(self, n: int, /) -> None:
339
- if n < 0 or n > self._len:
340
- raise ValueError(n)
341
- if n == 0:
342
- return
343
-
344
- self._len -= n
345
-
346
- while n and self._segs:
347
- s0 = self._segs[0]
348
-
349
- if self._active is not None and s0 is self._active:
350
- avail0 = self._active_readable_len() - self._head_off
351
- else:
352
- avail0 = len(s0) - self._head_off
353
-
354
- if avail0 <= 0:
355
- popped = self._segs.pop(0)
356
- if popped is self._active:
357
- self._active = None
358
- self._active_used = 0
359
- self._head_off = 0
360
- continue
361
-
362
- if n < avail0:
363
- self._head_off += n
364
- return
365
-
366
- n -= avail0
367
- popped = self._segs.pop(0)
368
- if popped is self._active:
369
- self._active = None
370
- self._active_used = 0
371
- self._head_off = 0
372
-
373
- if n:
374
- raise RuntimeError(n)
375
-
376
- def split_to(self, n: int, /) -> SegmentedBytesView:
377
- if n < 0 or n > self._len:
378
- raise ValueError(n)
379
- if n == 0:
380
- return SegmentedBytesView(())
381
-
382
- out: ta.List[memoryview] = []
383
- rem = n
384
-
385
- while rem:
386
- if not self._segs:
387
- raise RuntimeError(rem)
388
-
389
- s0 = self._segs[0]
390
-
391
- if self._active is not None and s0 is self._active:
392
- rl = self._active_readable_len()
393
- if self._head_off >= rl:
394
- raise RuntimeError(rem)
395
- mv0 = memoryview(s0)[self._head_off:rl]
396
- else:
397
- mv0 = memoryview(s0)
398
- if self._head_off:
399
- mv0 = mv0[self._head_off:]
400
-
401
- if rem < len(mv0):
402
- out.append(mv0[:rem])
403
- self._head_off += rem
404
- self._len -= n
405
- return SegmentedBytesView(out)
406
-
407
- out.append(mv0)
408
- rem -= len(mv0)
409
- popped = self._segs.pop(0)
410
- if popped is self._active:
411
- self._active = None
412
- self._active_used = 0
413
- self._head_off = 0
414
-
415
- self._len -= n
416
- return SegmentedBytesView(out)
417
-
418
- def coalesce(self, n: int, /) -> memoryview:
419
- if n < 0:
420
- raise ValueError(n)
421
- if n > self._len:
422
- raise ValueError(n)
423
- if n == 0:
424
- return memoryview(b'')
425
-
426
- if self._reserved is not None:
427
- raise OutstandingReserve('outstanding reserve')
428
-
429
- mv0 = self.peek()
430
- if len(mv0) >= n:
431
- return mv0[:n]
432
-
433
- out = bytearray(n)
434
- w = 0
435
-
436
- new_segs: ta.List[ta.Union[bytes, bytearray]] = []
437
-
438
- seg_i = 0
439
- while w < n and seg_i < len(self._segs):
440
- s = self._segs[seg_i]
441
- off = self._head_off if seg_i == 0 else 0
442
-
443
- seg_len = len(s) - off
444
- if self._active is not None and seg_i == (len(self._segs) - 1) and s is self._active:
445
- seg_len = self._active_readable_len() - off
446
-
447
- if seg_len <= 0:
448
- seg_i += 1
449
- continue
450
-
451
- take = n - w
452
- if take > seg_len:
453
- take = seg_len
454
-
455
- out[w:w + take] = memoryview(s)[off:off + take]
456
- w += take
457
-
458
- if take < seg_len:
459
- rem = s[off + take:off + seg_len]
460
- if rem:
461
- new_segs.append(rem)
462
- seg_i += 1
463
- break
464
-
465
- seg_i += 1
466
-
467
- if seg_i < len(self._segs):
468
- new_segs.extend(self._segs[seg_i:])
469
-
470
- self._segs = [bytes(out), *new_segs]
471
- self._head_off = 0
472
-
473
- self._active = None
474
- self._active_used = 0
475
-
476
- return memoryview(self._segs[0])[:n]
477
-
478
- def find(self, sub: bytes, start: int = 0, end: ta.Optional[int] = None) -> int:
479
- start, end = _norm_slice(len(self), start, end)
480
-
481
- m = len(sub)
482
- if m == 0:
483
- return start
484
- if end - start < m:
485
- return -1
486
-
487
- limit = end - m
488
-
489
- tail = b''
490
- tail_gstart = 0
491
-
492
- gpos = 0
493
-
494
- last_i = len(self._segs) - 1
495
-
496
- for si, s in enumerate(self._segs):
497
- off = self._head_off if si == 0 else 0
498
-
499
- seg_len = len(s) - off
500
- if self._active is not None and si == last_i and s is self._active:
501
- seg_len = self._active_readable_len() - off
502
-
503
- if seg_len <= 0:
504
- continue
505
-
506
- seg_gs = gpos
507
- seg_ge = gpos + seg_len
508
-
509
- if limit >= seg_gs and start < seg_ge:
510
- ls = start - seg_gs if start > seg_gs else 0
511
- max_start_in_seg = limit - seg_gs
512
- end_search = max_start_in_seg + m
513
- if end_search > seg_len:
514
- end_search = seg_len
515
- if ls < end_search:
516
- idx = s.find(sub, off + ls, off + end_search)
517
- if idx != -1:
518
- return seg_gs + (idx - off)
519
-
520
- if m > 1 and tail:
521
- head_need = m - 1
522
- head = s[off:off + head_need]
523
- comb = tail + head
524
- j = comb.find(sub)
525
- if j != -1 and j < len(tail) < j + m:
526
- cand = tail_gstart + j
527
- if start <= cand <= limit:
528
- return cand
529
-
530
- if m > 1:
531
- take = m - 1
532
- if seg_len >= take:
533
- tail = s[off + seg_len - take:off + seg_len]
534
- tail_gstart = seg_ge - take
535
- else:
536
- tail = (tail + s[off:off + seg_len])[-(m - 1):]
537
- tail_gstart = seg_ge - len(tail)
538
-
539
- gpos = seg_ge
540
-
541
- return -1
542
-
543
- def rfind(self, sub: bytes, start: int = 0, end: ta.Optional[int] = None) -> int:
544
- start, end = _norm_slice(len(self), start, end)
545
-
546
- m = len(sub)
547
- if m == 0:
548
- return end
549
- if end - start < m:
550
- return -1
551
-
552
- limit = end - m
553
-
554
- if not self._segs:
555
- return -1
556
-
557
- best = -1
558
-
559
- seg_ge = self._len
560
- prev_s: ta.Optional[ta.Union[bytes, bytearray]] = None
561
- prev_off = 0
562
-
563
- last_i = len(self._segs) - 1
564
-
565
- for si in range(len(self._segs) - 1, -1, -1):
566
- s = self._segs[si]
567
- off = self._head_off if si == 0 else 0
568
-
569
- seg_len = len(s) - off
570
- if self._active is not None and si == last_i and s is self._active:
571
- seg_len = self._active_readable_len() - off
572
-
573
- if seg_len <= 0:
574
- continue
575
-
576
- seg_gs = seg_ge - seg_len
577
-
578
- if limit >= seg_gs and start < seg_ge:
579
- ls = start - seg_gs if start > seg_gs else 0
580
- max_start_in_seg = limit - seg_gs
581
- end_search = max_start_in_seg + m
582
- if end_search > seg_len:
583
- end_search = seg_len
584
- if ls < end_search:
585
- idx = s.rfind(sub, off + ls, off + end_search)
586
- if idx != -1:
587
- cand = seg_gs + (idx - off)
588
- if cand > best:
589
- best = cand
590
-
591
- if m > 1 and prev_s is not None:
592
- tail_need = m - 1
593
- if seg_len >= tail_need:
594
- tail = s[off + seg_len - tail_need:off + seg_len]
595
- tail_gstart = seg_ge - tail_need
596
-
597
- else:
598
- tail_parts = [s[off:off + seg_len]]
599
- tail_len = seg_len
600
- for sj in range(si - 1, -1, -1):
601
- if tail_len >= tail_need:
602
- break
603
-
604
- sj_s = self._segs[sj]
605
- sj_off = self._head_off if sj == 0 else 0
606
- sj_len = len(sj_s) - sj_off
607
- if self._active is not None and sj == last_i and sj_s is self._active:
608
- sj_len = self._active_readable_len() - sj_off
609
- if sj_len <= 0:
610
- continue
611
-
612
- take = min(tail_need - tail_len, sj_len)
613
- tail_parts.insert(0, sj_s[sj_off + sj_len - take:sj_off + sj_len])
614
- tail_len += take
615
-
616
- tail_combined = b''.join(tail_parts)
617
- tail = tail_combined[-(m - 1):] if len(tail_combined) >= m - 1 else tail_combined
618
- tail_gstart = seg_ge - len(tail)
619
-
620
- head_need = m - 1
621
- head = prev_s[prev_off:prev_off + head_need]
622
-
623
- comb = tail + head
624
- j = comb.rfind(sub)
625
- if j != -1 and j < len(tail) < j + m:
626
- cand = tail_gstart + j
627
- if start <= cand <= limit and cand > best:
628
- best = cand
629
-
630
- if best >= seg_gs:
631
- return best
632
-
633
- prev_s = s
634
- prev_off = off
635
- seg_ge = seg_gs
636
-
637
- return best