python-filewrap 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- filewrap/__init__.py +81 -14
- {python_filewrap-0.2.2.dist-info → python_filewrap-0.2.3.dist-info}/METADATA +1 -1
- python_filewrap-0.2.3.dist-info/RECORD +7 -0
- python_filewrap-0.2.2.dist-info/RECORD +0 -7
- {python_filewrap-0.2.2.dist-info → python_filewrap-0.2.3.dist-info}/LICENSE +0 -0
- {python_filewrap-0.2.2.dist-info → python_filewrap-0.2.3.dist-info}/WHEEL +0 -0
filewrap/__init__.py
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# encoding: utf-8
|
|
3
3
|
|
|
4
|
+
# TODO: 使用 codecs.iterdecode 来避免解码过程中的一些重复操作
|
|
5
|
+
# TODO: AsyncTextIOWrapper 的 read 和 readline 算法效率不高,因为会反复创建二进制对象,如果可以复用一段或者几段(内存块组)内存,则可以大大增加效率,还可以引入环形缓冲区(使用长度限定的 bytearray,之后所有操作在 memoryview 上进行,根据当前的可用区块开返回 memoryview),以减少内存分配的开销
|
|
6
|
+
# TODO: AsyncTextIOWrapper.readline 有大量的字符串拼接操作,效率极低,需用 str.joins 方法优化
|
|
7
|
+
|
|
4
8
|
__author__ = "ChenyangGao <https://chenyanggao.github.io>"
|
|
5
|
-
__version__ = (0, 2,
|
|
9
|
+
__version__ = (0, 2, 3)
|
|
6
10
|
__all__ = [
|
|
7
11
|
"Buffer", "SupportsRead", "SupportsReadinto", "SupportsWrite", "SupportsSeek",
|
|
8
12
|
"AsyncBufferedReader", "AsyncTextIOWrapper",
|
|
@@ -84,6 +88,20 @@ class SupportsSeek(Protocol):
|
|
|
84
88
|
def seek(self, /, __offset: int, __whence: int = 0) -> int: ...
|
|
85
89
|
|
|
86
90
|
|
|
91
|
+
# TODO: 一些特定编码的 bom 用字典写死,编码名可以规范化,用 codecs.lookup(encoding).name
|
|
92
|
+
def get_bom(encoding: str) -> bytes:
|
|
93
|
+
code = memoryview(bytes("a", encoding))
|
|
94
|
+
if len(code) == 1:
|
|
95
|
+
return b""
|
|
96
|
+
for i in range(1, len(code)):
|
|
97
|
+
try:
|
|
98
|
+
str(code[:i], encoding)
|
|
99
|
+
return code[:i].tobytes()
|
|
100
|
+
except UnicodeDecodeError:
|
|
101
|
+
pass
|
|
102
|
+
raise UnicodeError
|
|
103
|
+
|
|
104
|
+
|
|
87
105
|
class AsyncBufferedReader(BufferedReader):
|
|
88
106
|
|
|
89
107
|
def __init__(
|
|
@@ -548,6 +566,7 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
548
566
|
write_through=write_through,
|
|
549
567
|
)
|
|
550
568
|
self.newline = newline
|
|
569
|
+
self._bom = get_bom(self.encoding)
|
|
551
570
|
|
|
552
571
|
def __del__(self, /):
|
|
553
572
|
try:
|
|
@@ -624,9 +643,8 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
624
643
|
|
|
625
644
|
ls_parts: list[str] = []
|
|
626
645
|
add_part = ls_parts.append
|
|
627
|
-
cache =
|
|
628
|
-
while data
|
|
629
|
-
cache += data
|
|
646
|
+
cache = data
|
|
647
|
+
while size and len(data) == size:
|
|
630
648
|
while cache:
|
|
631
649
|
try:
|
|
632
650
|
size -= process_part(cache)
|
|
@@ -635,15 +653,37 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
635
653
|
start, stop = e.start, e.end
|
|
636
654
|
if start:
|
|
637
655
|
size -= process_part(cache[:start])
|
|
638
|
-
if e.reason == "
|
|
656
|
+
if e.reason == "truncated data":
|
|
657
|
+
if stop == len(cache):
|
|
658
|
+
cache = cache[start:]
|
|
659
|
+
break
|
|
660
|
+
else:
|
|
661
|
+
while stop < len(cache):
|
|
662
|
+
stop += 1
|
|
663
|
+
try:
|
|
664
|
+
size -= process_part(cache[start:stop])
|
|
665
|
+
cache = cache[stop:]
|
|
666
|
+
break_this_loop = True
|
|
667
|
+
break
|
|
668
|
+
except UnicodeDecodeError as exc:
|
|
669
|
+
e = exc
|
|
670
|
+
if e.reason != "truncated data":
|
|
671
|
+
break
|
|
672
|
+
if stop == len(cache):
|
|
673
|
+
cache = cache[start:]
|
|
674
|
+
break_this_loop = True
|
|
675
|
+
break
|
|
676
|
+
if break_this_loop:
|
|
677
|
+
break
|
|
678
|
+
elif e.reason == "unexpected end of data" and stop == len(cache):
|
|
639
679
|
cache = cache[start:]
|
|
640
680
|
break
|
|
641
681
|
if errors == "strict":
|
|
642
|
-
raise
|
|
682
|
+
raise e
|
|
643
683
|
size -= process_part(cache[start:stop], errors)
|
|
644
684
|
cache = cache[stop:]
|
|
645
|
-
|
|
646
|
-
|
|
685
|
+
data = await read(size)
|
|
686
|
+
cache += data
|
|
647
687
|
if cache:
|
|
648
688
|
process_part(cache, errors)
|
|
649
689
|
return "".join(ls_parts)
|
|
@@ -665,9 +705,14 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
665
705
|
peek = None
|
|
666
706
|
if newline:
|
|
667
707
|
sepb = bytes(newline, encoding)
|
|
708
|
+
if bom := self._bom:
|
|
709
|
+
sepb = sepb.removeprefix(bom)
|
|
668
710
|
else:
|
|
669
711
|
crb = bytes("\r", encoding)
|
|
670
712
|
lfb = bytes("\n", encoding)
|
|
713
|
+
if bom := self._bom:
|
|
714
|
+
crb = crb.removeprefix(bom)
|
|
715
|
+
lfb = lfb.removeprefix(bom)
|
|
671
716
|
lfb_len = len(lfb)
|
|
672
717
|
buf = bytearray()
|
|
673
718
|
text = ""
|
|
@@ -702,17 +747,17 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
702
747
|
buf += peek_b
|
|
703
748
|
if newline:
|
|
704
749
|
if (idx := buf.find(sepb)) > -1:
|
|
705
|
-
idx +=
|
|
750
|
+
idx += len(sepb)
|
|
706
751
|
await read(idx - buf_stop)
|
|
707
752
|
del buf[idx:]
|
|
708
753
|
break
|
|
709
754
|
elif (idx := buf.find(lfb)) > -1:
|
|
710
|
-
idx +=
|
|
755
|
+
idx += len(lfb)
|
|
711
756
|
await read(idx - buf_stop)
|
|
712
757
|
del buf[idx:]
|
|
713
758
|
break
|
|
714
759
|
elif (idx := buf.find(crb)) > -1:
|
|
715
|
-
idx +=
|
|
760
|
+
idx += len(crb)
|
|
716
761
|
await read(idx - buf_stop)
|
|
717
762
|
if buf.startswith(lfb, idx):
|
|
718
763
|
await read(lfb_len)
|
|
@@ -729,10 +774,32 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
729
774
|
try:
|
|
730
775
|
text += str(buf, encoding)
|
|
731
776
|
buf.clear()
|
|
732
|
-
except
|
|
777
|
+
except UnicodeDecodeError as e:
|
|
733
778
|
start, stop = e.start, e.end
|
|
734
779
|
if start:
|
|
735
780
|
text += str(buf[:start], encoding)
|
|
781
|
+
if e.reason == "truncated data":
|
|
782
|
+
if stop == len(buf):
|
|
783
|
+
buf = buf[start:]
|
|
784
|
+
break
|
|
785
|
+
else:
|
|
786
|
+
while stop < len(buf):
|
|
787
|
+
stop += 1
|
|
788
|
+
try:
|
|
789
|
+
text += str(buf[start:stop], encoding)
|
|
790
|
+
buf = buf[stop:]
|
|
791
|
+
break_this_loop = True
|
|
792
|
+
break
|
|
793
|
+
except UnicodeDecodeError as exc:
|
|
794
|
+
e = exc
|
|
795
|
+
if e.reason != "truncated data":
|
|
796
|
+
break
|
|
797
|
+
if stop == len(buf):
|
|
798
|
+
buf = buf[start:]
|
|
799
|
+
break_this_loop = True
|
|
800
|
+
break
|
|
801
|
+
if break_this_loop:
|
|
802
|
+
break
|
|
736
803
|
if e.reason == "unexpected end of data" and stop == len(buf):
|
|
737
804
|
buf = buf[start:]
|
|
738
805
|
break
|
|
@@ -820,11 +887,11 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
820
887
|
try:
|
|
821
888
|
text += str(buf, encoding)
|
|
822
889
|
buf.clear()
|
|
823
|
-
except
|
|
890
|
+
except UnicodeDecodeError as e:
|
|
824
891
|
start, stop = e.start, e.end
|
|
825
892
|
if start:
|
|
826
893
|
text += str(buf[:start], encoding)
|
|
827
|
-
if e.reason
|
|
894
|
+
if e.reason in ("unexpected end of data", "truncated data") and stop == len(buf):
|
|
828
895
|
buf = buf[start:]
|
|
829
896
|
break
|
|
830
897
|
if errors == "strict":
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
|
|
2
|
+
filewrap/__init__.py,sha256=4df3xlJWICB_KuZMk-fKFruoajkYwt98rrv7f6OA4jg,66308
|
|
3
|
+
filewrap/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
python_filewrap-0.2.3.dist-info/LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
|
|
5
|
+
python_filewrap-0.2.3.dist-info/METADATA,sha256=sKgfIVxU80aZyA8QpqkNLf5PkmMj7R-b7PuK2Dq3w38,1413
|
|
6
|
+
python_filewrap-0.2.3.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
|
7
|
+
python_filewrap-0.2.3.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
|
|
2
|
-
filewrap/__init__.py,sha256=kQ3EL_pPx5jkImO5tPuhAx6PiQnocXeHwVmBz3_o60k,62700
|
|
3
|
-
filewrap/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
python_filewrap-0.2.2.dist-info/LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
|
|
5
|
-
python_filewrap-0.2.2.dist-info/METADATA,sha256=hOJOU4Q01T3SkJQ0VQ7qAVOZSUcvwlEe3q2uHeo_xZ4,1413
|
|
6
|
-
python_filewrap-0.2.2.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
|
7
|
-
python_filewrap-0.2.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|