python-filewrap 0.2.2__py3-none-any.whl → 0.2.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- filewrap/__init__.py +86 -16
- {python_filewrap-0.2.2.dist-info → python_filewrap-0.2.3.1.dist-info}/METADATA +1 -1
- python_filewrap-0.2.3.1.dist-info/RECORD +7 -0
- python_filewrap-0.2.2.dist-info/RECORD +0 -7
- {python_filewrap-0.2.2.dist-info → python_filewrap-0.2.3.1.dist-info}/LICENSE +0 -0
- {python_filewrap-0.2.2.dist-info → python_filewrap-0.2.3.1.dist-info}/WHEEL +0 -0
filewrap/__init__.py
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# encoding: utf-8
|
|
3
3
|
|
|
4
|
+
# TODO: 使用 codecs.iterdecode 来避免解码过程中的一些重复操作
|
|
5
|
+
# TODO: AsyncTextIOWrapper 的 read 和 readline 算法效率不高,因为会反复创建二进制对象,如果可以复用一段或者几段(内存块组)内存,则可以大大增加效率,还可以引入环形缓冲区(使用长度限定的 bytearray,之后所有操作在 memoryview 上进行,根据当前的可用区块开返回 memoryview),以减少内存分配的开销
|
|
6
|
+
# TODO: AsyncTextIOWrapper.readline 有大量的字符串拼接操作,效率极低,需用 str.joins 方法优化
|
|
7
|
+
|
|
4
8
|
__author__ = "ChenyangGao <https://chenyanggao.github.io>"
|
|
5
|
-
__version__ = (0, 2,
|
|
9
|
+
__version__ = (0, 2, 3)
|
|
6
10
|
__all__ = [
|
|
7
11
|
"Buffer", "SupportsRead", "SupportsReadinto", "SupportsWrite", "SupportsSeek",
|
|
8
12
|
"AsyncBufferedReader", "AsyncTextIOWrapper",
|
|
@@ -84,6 +88,20 @@ class SupportsSeek(Protocol):
|
|
|
84
88
|
def seek(self, /, __offset: int, __whence: int = 0) -> int: ...
|
|
85
89
|
|
|
86
90
|
|
|
91
|
+
# TODO: 一些特定编码的 bom 用字典写死,编码名可以规范化,用 codecs.lookup(encoding).name
|
|
92
|
+
def get_bom(encoding: str) -> bytes:
|
|
93
|
+
code = memoryview(bytes("a", encoding))
|
|
94
|
+
if len(code) == 1:
|
|
95
|
+
return b""
|
|
96
|
+
for i in range(1, len(code)):
|
|
97
|
+
try:
|
|
98
|
+
str(code[:i], encoding)
|
|
99
|
+
return code[:i].tobytes()
|
|
100
|
+
except UnicodeDecodeError:
|
|
101
|
+
pass
|
|
102
|
+
raise UnicodeError
|
|
103
|
+
|
|
104
|
+
|
|
87
105
|
class AsyncBufferedReader(BufferedReader):
|
|
88
106
|
|
|
89
107
|
def __init__(
|
|
@@ -331,8 +349,9 @@ class AsyncBufferedReader(BufferedReader):
|
|
|
331
349
|
break
|
|
332
350
|
if length < BUFSIZE:
|
|
333
351
|
part1, part2 = buf_view[length:].tobytes(), buf_view[:length].tobytes()
|
|
334
|
-
|
|
335
|
-
buf_view[
|
|
352
|
+
index = len(part1)
|
|
353
|
+
buf_view[:index] = part1
|
|
354
|
+
buf_view[index:] = part2
|
|
336
355
|
running = False
|
|
337
356
|
buf_pos = self._buf_pos = BUFSIZE - length
|
|
338
357
|
else:
|
|
@@ -548,6 +567,7 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
548
567
|
write_through=write_through,
|
|
549
568
|
)
|
|
550
569
|
self.newline = newline
|
|
570
|
+
self._bom = get_bom(self.encoding)
|
|
551
571
|
|
|
552
572
|
def __del__(self, /):
|
|
553
573
|
try:
|
|
@@ -624,9 +644,8 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
624
644
|
|
|
625
645
|
ls_parts: list[str] = []
|
|
626
646
|
add_part = ls_parts.append
|
|
627
|
-
cache =
|
|
628
|
-
while data
|
|
629
|
-
cache += data
|
|
647
|
+
cache = data
|
|
648
|
+
while size and len(data) == size:
|
|
630
649
|
while cache:
|
|
631
650
|
try:
|
|
632
651
|
size -= process_part(cache)
|
|
@@ -635,15 +654,37 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
635
654
|
start, stop = e.start, e.end
|
|
636
655
|
if start:
|
|
637
656
|
size -= process_part(cache[:start])
|
|
638
|
-
if e.reason == "
|
|
657
|
+
if e.reason == "truncated data":
|
|
658
|
+
if stop == len(cache):
|
|
659
|
+
cache = cache[start:]
|
|
660
|
+
break
|
|
661
|
+
else:
|
|
662
|
+
while stop < len(cache):
|
|
663
|
+
stop += 1
|
|
664
|
+
try:
|
|
665
|
+
size -= process_part(cache[start:stop])
|
|
666
|
+
cache = cache[stop:]
|
|
667
|
+
break_this_loop = True
|
|
668
|
+
break
|
|
669
|
+
except UnicodeDecodeError as exc:
|
|
670
|
+
e = exc
|
|
671
|
+
if e.reason != "truncated data":
|
|
672
|
+
break
|
|
673
|
+
if stop == len(cache):
|
|
674
|
+
cache = cache[start:]
|
|
675
|
+
break_this_loop = True
|
|
676
|
+
break
|
|
677
|
+
if break_this_loop:
|
|
678
|
+
break
|
|
679
|
+
elif e.reason == "unexpected end of data" and stop == len(cache):
|
|
639
680
|
cache = cache[start:]
|
|
640
681
|
break
|
|
641
682
|
if errors == "strict":
|
|
642
|
-
raise
|
|
683
|
+
raise e
|
|
643
684
|
size -= process_part(cache[start:stop], errors)
|
|
644
685
|
cache = cache[stop:]
|
|
645
|
-
|
|
646
|
-
|
|
686
|
+
data = await read(size)
|
|
687
|
+
cache += data
|
|
647
688
|
if cache:
|
|
648
689
|
process_part(cache, errors)
|
|
649
690
|
return "".join(ls_parts)
|
|
@@ -665,9 +706,14 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
665
706
|
peek = None
|
|
666
707
|
if newline:
|
|
667
708
|
sepb = bytes(newline, encoding)
|
|
709
|
+
if bom := self._bom:
|
|
710
|
+
sepb = sepb.removeprefix(bom)
|
|
668
711
|
else:
|
|
669
712
|
crb = bytes("\r", encoding)
|
|
670
713
|
lfb = bytes("\n", encoding)
|
|
714
|
+
if bom := self._bom:
|
|
715
|
+
crb = crb.removeprefix(bom)
|
|
716
|
+
lfb = lfb.removeprefix(bom)
|
|
671
717
|
lfb_len = len(lfb)
|
|
672
718
|
buf = bytearray()
|
|
673
719
|
text = ""
|
|
@@ -702,17 +748,17 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
702
748
|
buf += peek_b
|
|
703
749
|
if newline:
|
|
704
750
|
if (idx := buf.find(sepb)) > -1:
|
|
705
|
-
idx +=
|
|
751
|
+
idx += len(sepb)
|
|
706
752
|
await read(idx - buf_stop)
|
|
707
753
|
del buf[idx:]
|
|
708
754
|
break
|
|
709
755
|
elif (idx := buf.find(lfb)) > -1:
|
|
710
|
-
idx +=
|
|
756
|
+
idx += len(lfb)
|
|
711
757
|
await read(idx - buf_stop)
|
|
712
758
|
del buf[idx:]
|
|
713
759
|
break
|
|
714
760
|
elif (idx := buf.find(crb)) > -1:
|
|
715
|
-
idx +=
|
|
761
|
+
idx += len(crb)
|
|
716
762
|
await read(idx - buf_stop)
|
|
717
763
|
if buf.startswith(lfb, idx):
|
|
718
764
|
await read(lfb_len)
|
|
@@ -720,6 +766,8 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
720
766
|
else:
|
|
721
767
|
del buf[idx:]
|
|
722
768
|
break
|
|
769
|
+
if peek_b:
|
|
770
|
+
await read(len(peek_b))
|
|
723
771
|
c = await read(1)
|
|
724
772
|
if not c:
|
|
725
773
|
reach_end = True
|
|
@@ -729,10 +777,32 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
729
777
|
try:
|
|
730
778
|
text += str(buf, encoding)
|
|
731
779
|
buf.clear()
|
|
732
|
-
except
|
|
780
|
+
except UnicodeDecodeError as e:
|
|
733
781
|
start, stop = e.start, e.end
|
|
734
782
|
if start:
|
|
735
783
|
text += str(buf[:start], encoding)
|
|
784
|
+
if e.reason == "truncated data":
|
|
785
|
+
if stop == len(buf):
|
|
786
|
+
buf = buf[start:]
|
|
787
|
+
break
|
|
788
|
+
else:
|
|
789
|
+
while stop < len(buf):
|
|
790
|
+
stop += 1
|
|
791
|
+
try:
|
|
792
|
+
text += str(buf[start:stop], encoding)
|
|
793
|
+
buf = buf[stop:]
|
|
794
|
+
break_this_loop = True
|
|
795
|
+
break
|
|
796
|
+
except UnicodeDecodeError as exc:
|
|
797
|
+
e = exc
|
|
798
|
+
if e.reason != "truncated data":
|
|
799
|
+
break
|
|
800
|
+
if stop == len(buf):
|
|
801
|
+
buf = buf[start:]
|
|
802
|
+
break_this_loop = True
|
|
803
|
+
break
|
|
804
|
+
if break_this_loop:
|
|
805
|
+
break
|
|
736
806
|
if e.reason == "unexpected end of data" and stop == len(buf):
|
|
737
807
|
buf = buf[start:]
|
|
738
808
|
break
|
|
@@ -820,11 +890,11 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
820
890
|
try:
|
|
821
891
|
text += str(buf, encoding)
|
|
822
892
|
buf.clear()
|
|
823
|
-
except
|
|
893
|
+
except UnicodeDecodeError as e:
|
|
824
894
|
start, stop = e.start, e.end
|
|
825
895
|
if start:
|
|
826
896
|
text += str(buf[:start], encoding)
|
|
827
|
-
if e.reason
|
|
897
|
+
if e.reason in ("unexpected end of data", "truncated data") and stop == len(buf):
|
|
828
898
|
buf = buf[start:]
|
|
829
899
|
break
|
|
830
900
|
if errors == "strict":
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
|
|
2
|
+
filewrap/__init__.py,sha256=uACyl0LdRv6joj8ayKhY2YFtmKrL5Fea_qH0T5j7KoI,66436
|
|
3
|
+
filewrap/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
python_filewrap-0.2.3.1.dist-info/LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
|
|
5
|
+
python_filewrap-0.2.3.1.dist-info/METADATA,sha256=8s4EU2QnJdSQlh6DqMY5ygfMtkYxWRCQppIPOw_3k-c,1415
|
|
6
|
+
python_filewrap-0.2.3.1.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
|
7
|
+
python_filewrap-0.2.3.1.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
|
|
2
|
-
filewrap/__init__.py,sha256=kQ3EL_pPx5jkImO5tPuhAx6PiQnocXeHwVmBz3_o60k,62700
|
|
3
|
-
filewrap/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
python_filewrap-0.2.2.dist-info/LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
|
|
5
|
-
python_filewrap-0.2.2.dist-info/METADATA,sha256=hOJOU4Q01T3SkJQ0VQ7qAVOZSUcvwlEe3q2uHeo_xZ4,1413
|
|
6
|
-
python_filewrap-0.2.2.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
|
7
|
-
python_filewrap-0.2.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|