python-filewrap 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- filewrap/__init__.py +136 -14
- {python_filewrap-0.2.1.dist-info → python_filewrap-0.2.3.dist-info}/METADATA +1 -1
- python_filewrap-0.2.3.dist-info/RECORD +7 -0
- python_filewrap-0.2.1.dist-info/RECORD +0 -7
- {python_filewrap-0.2.1.dist-info → python_filewrap-0.2.3.dist-info}/LICENSE +0 -0
- {python_filewrap-0.2.1.dist-info → python_filewrap-0.2.3.dist-info}/WHEEL +0 -0
filewrap/__init__.py
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# encoding: utf-8
|
|
3
3
|
|
|
4
|
+
# TODO: 使用 codecs.iterdecode 来避免解码过程中的一些重复操作
|
|
5
|
+
# TODO: AsyncTextIOWrapper 的 read 和 readline 算法效率不高,因为会反复创建二进制对象,如果可以复用一段或者几段(内存块组)内存,则可以大大增加效率,还可以引入环形缓冲区(使用长度限定的 bytearray,之后所有操作在 memoryview 上进行,根据当前的可用区块开返回 memoryview),以减少内存分配的开销
|
|
6
|
+
# TODO: AsyncTextIOWrapper.readline 有大量的字符串拼接操作,效率极低,需用 str.joins 方法优化
|
|
7
|
+
|
|
4
8
|
__author__ = "ChenyangGao <https://chenyanggao.github.io>"
|
|
5
|
-
__version__ = (0, 2,
|
|
9
|
+
__version__ = (0, 2, 3)
|
|
6
10
|
__all__ = [
|
|
7
11
|
"Buffer", "SupportsRead", "SupportsReadinto", "SupportsWrite", "SupportsSeek",
|
|
8
12
|
"AsyncBufferedReader", "AsyncTextIOWrapper",
|
|
@@ -14,6 +18,7 @@ __all__ = [
|
|
|
14
18
|
"bytes_to_chunk_iter", "bytes_to_chunk_async_iter",
|
|
15
19
|
"bytes_ensure_part_iter", "bytes_ensure_part_async_iter",
|
|
16
20
|
"progress_bytes_iter", "progress_bytes_async_iter",
|
|
21
|
+
"copyfileobj", "copyfileobj_async",
|
|
17
22
|
]
|
|
18
23
|
|
|
19
24
|
from asyncio import to_thread, Lock as AsyncLock
|
|
@@ -83,6 +88,20 @@ class SupportsSeek(Protocol):
|
|
|
83
88
|
def seek(self, /, __offset: int, __whence: int = 0) -> int: ...
|
|
84
89
|
|
|
85
90
|
|
|
91
|
+
# TODO: 一些特定编码的 bom 用字典写死,编码名可以规范化,用 codecs.lookup(encoding).name
|
|
92
|
+
def get_bom(encoding: str) -> bytes:
|
|
93
|
+
code = memoryview(bytes("a", encoding))
|
|
94
|
+
if len(code) == 1:
|
|
95
|
+
return b""
|
|
96
|
+
for i in range(1, len(code)):
|
|
97
|
+
try:
|
|
98
|
+
str(code[:i], encoding)
|
|
99
|
+
return code[:i].tobytes()
|
|
100
|
+
except UnicodeDecodeError:
|
|
101
|
+
pass
|
|
102
|
+
raise UnicodeError
|
|
103
|
+
|
|
104
|
+
|
|
86
105
|
class AsyncBufferedReader(BufferedReader):
|
|
87
106
|
|
|
88
107
|
def __init__(
|
|
@@ -547,6 +566,7 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
547
566
|
write_through=write_through,
|
|
548
567
|
)
|
|
549
568
|
self.newline = newline
|
|
569
|
+
self._bom = get_bom(self.encoding)
|
|
550
570
|
|
|
551
571
|
def __del__(self, /):
|
|
552
572
|
try:
|
|
@@ -623,9 +643,8 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
623
643
|
|
|
624
644
|
ls_parts: list[str] = []
|
|
625
645
|
add_part = ls_parts.append
|
|
626
|
-
cache =
|
|
627
|
-
while data
|
|
628
|
-
cache += data
|
|
646
|
+
cache = data
|
|
647
|
+
while size and len(data) == size:
|
|
629
648
|
while cache:
|
|
630
649
|
try:
|
|
631
650
|
size -= process_part(cache)
|
|
@@ -634,15 +653,37 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
634
653
|
start, stop = e.start, e.end
|
|
635
654
|
if start:
|
|
636
655
|
size -= process_part(cache[:start])
|
|
637
|
-
if e.reason == "
|
|
656
|
+
if e.reason == "truncated data":
|
|
657
|
+
if stop == len(cache):
|
|
658
|
+
cache = cache[start:]
|
|
659
|
+
break
|
|
660
|
+
else:
|
|
661
|
+
while stop < len(cache):
|
|
662
|
+
stop += 1
|
|
663
|
+
try:
|
|
664
|
+
size -= process_part(cache[start:stop])
|
|
665
|
+
cache = cache[stop:]
|
|
666
|
+
break_this_loop = True
|
|
667
|
+
break
|
|
668
|
+
except UnicodeDecodeError as exc:
|
|
669
|
+
e = exc
|
|
670
|
+
if e.reason != "truncated data":
|
|
671
|
+
break
|
|
672
|
+
if stop == len(cache):
|
|
673
|
+
cache = cache[start:]
|
|
674
|
+
break_this_loop = True
|
|
675
|
+
break
|
|
676
|
+
if break_this_loop:
|
|
677
|
+
break
|
|
678
|
+
elif e.reason == "unexpected end of data" and stop == len(cache):
|
|
638
679
|
cache = cache[start:]
|
|
639
680
|
break
|
|
640
681
|
if errors == "strict":
|
|
641
|
-
raise
|
|
682
|
+
raise e
|
|
642
683
|
size -= process_part(cache[start:stop], errors)
|
|
643
684
|
cache = cache[stop:]
|
|
644
|
-
|
|
645
|
-
|
|
685
|
+
data = await read(size)
|
|
686
|
+
cache += data
|
|
646
687
|
if cache:
|
|
647
688
|
process_part(cache, errors)
|
|
648
689
|
return "".join(ls_parts)
|
|
@@ -664,9 +705,14 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
664
705
|
peek = None
|
|
665
706
|
if newline:
|
|
666
707
|
sepb = bytes(newline, encoding)
|
|
708
|
+
if bom := self._bom:
|
|
709
|
+
sepb = sepb.removeprefix(bom)
|
|
667
710
|
else:
|
|
668
711
|
crb = bytes("\r", encoding)
|
|
669
712
|
lfb = bytes("\n", encoding)
|
|
713
|
+
if bom := self._bom:
|
|
714
|
+
crb = crb.removeprefix(bom)
|
|
715
|
+
lfb = lfb.removeprefix(bom)
|
|
670
716
|
lfb_len = len(lfb)
|
|
671
717
|
buf = bytearray()
|
|
672
718
|
text = ""
|
|
@@ -701,17 +747,17 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
701
747
|
buf += peek_b
|
|
702
748
|
if newline:
|
|
703
749
|
if (idx := buf.find(sepb)) > -1:
|
|
704
|
-
idx +=
|
|
750
|
+
idx += len(sepb)
|
|
705
751
|
await read(idx - buf_stop)
|
|
706
752
|
del buf[idx:]
|
|
707
753
|
break
|
|
708
754
|
elif (idx := buf.find(lfb)) > -1:
|
|
709
|
-
idx +=
|
|
755
|
+
idx += len(lfb)
|
|
710
756
|
await read(idx - buf_stop)
|
|
711
757
|
del buf[idx:]
|
|
712
758
|
break
|
|
713
759
|
elif (idx := buf.find(crb)) > -1:
|
|
714
|
-
idx +=
|
|
760
|
+
idx += len(crb)
|
|
715
761
|
await read(idx - buf_stop)
|
|
716
762
|
if buf.startswith(lfb, idx):
|
|
717
763
|
await read(lfb_len)
|
|
@@ -728,10 +774,32 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
728
774
|
try:
|
|
729
775
|
text += str(buf, encoding)
|
|
730
776
|
buf.clear()
|
|
731
|
-
except
|
|
777
|
+
except UnicodeDecodeError as e:
|
|
732
778
|
start, stop = e.start, e.end
|
|
733
779
|
if start:
|
|
734
780
|
text += str(buf[:start], encoding)
|
|
781
|
+
if e.reason == "truncated data":
|
|
782
|
+
if stop == len(buf):
|
|
783
|
+
buf = buf[start:]
|
|
784
|
+
break
|
|
785
|
+
else:
|
|
786
|
+
while stop < len(buf):
|
|
787
|
+
stop += 1
|
|
788
|
+
try:
|
|
789
|
+
text += str(buf[start:stop], encoding)
|
|
790
|
+
buf = buf[stop:]
|
|
791
|
+
break_this_loop = True
|
|
792
|
+
break
|
|
793
|
+
except UnicodeDecodeError as exc:
|
|
794
|
+
e = exc
|
|
795
|
+
if e.reason != "truncated data":
|
|
796
|
+
break
|
|
797
|
+
if stop == len(buf):
|
|
798
|
+
buf = buf[start:]
|
|
799
|
+
break_this_loop = True
|
|
800
|
+
break
|
|
801
|
+
if break_this_loop:
|
|
802
|
+
break
|
|
735
803
|
if e.reason == "unexpected end of data" and stop == len(buf):
|
|
736
804
|
buf = buf[start:]
|
|
737
805
|
break
|
|
@@ -819,11 +887,11 @@ class AsyncTextIOWrapper(TextIOWrapper):
|
|
|
819
887
|
try:
|
|
820
888
|
text += str(buf, encoding)
|
|
821
889
|
buf.clear()
|
|
822
|
-
except
|
|
890
|
+
except UnicodeDecodeError as e:
|
|
823
891
|
start, stop = e.start, e.end
|
|
824
892
|
if start:
|
|
825
893
|
text += str(buf[:start], encoding)
|
|
826
|
-
if e.reason
|
|
894
|
+
if e.reason in ("unexpected end of data", "truncated data") and stop == len(buf):
|
|
827
895
|
buf = buf[start:]
|
|
828
896
|
break
|
|
829
897
|
if errors == "strict":
|
|
@@ -1793,3 +1861,57 @@ async def progress_bytes_async_iter(
|
|
|
1793
1861
|
if callable(close_progress):
|
|
1794
1862
|
await ensure_async(close_progress)()
|
|
1795
1863
|
|
|
1864
|
+
|
|
1865
|
+
def copyfileobj(
|
|
1866
|
+
fsrc,
|
|
1867
|
+
fdst: SupportsWrite[Buffer],
|
|
1868
|
+
/,
|
|
1869
|
+
chunksize: int = COPY_BUFSIZE,
|
|
1870
|
+
):
|
|
1871
|
+
if chunksize <= 0:
|
|
1872
|
+
chunksize = COPY_BUFSIZE
|
|
1873
|
+
fdst_write = fdst.write
|
|
1874
|
+
fsrc_read = getattr(fsrc, "read", None)
|
|
1875
|
+
fsrc_readinto = getattr(fsrc, "readinto", None)
|
|
1876
|
+
if callable(fsrc_readinto):
|
|
1877
|
+
buf = bytearray(chunksize)
|
|
1878
|
+
view = memoryview(buf)
|
|
1879
|
+
while size := fsrc_readinto(buf):
|
|
1880
|
+
fdst_write(view[:size])
|
|
1881
|
+
elif callable(fsrc_read):
|
|
1882
|
+
while chunk := fsrc_read(chunksize):
|
|
1883
|
+
fdst_write(chunk)
|
|
1884
|
+
else:
|
|
1885
|
+
for chunk in fsrc:
|
|
1886
|
+
if chunk:
|
|
1887
|
+
fdst_write(chunk)
|
|
1888
|
+
|
|
1889
|
+
|
|
1890
|
+
async def copyfileobj_async(
|
|
1891
|
+
fsrc,
|
|
1892
|
+
fdst: SupportsWrite[Buffer],
|
|
1893
|
+
/,
|
|
1894
|
+
chunksize: int = COPY_BUFSIZE,
|
|
1895
|
+
threaded: bool = True,
|
|
1896
|
+
):
|
|
1897
|
+
if chunksize <= 0:
|
|
1898
|
+
chunksize = COPY_BUFSIZE
|
|
1899
|
+
fdst_write = ensure_async(fdst.write, threaded=threaded)
|
|
1900
|
+
fsrc_read = getattr(fsrc, "read", None)
|
|
1901
|
+
fsrc_readinto = getattr(fsrc, "readinto", None)
|
|
1902
|
+
if callable(fsrc_readinto):
|
|
1903
|
+
fsrc_readinto = ensure_async(fsrc_readinto, threaded=threaded)
|
|
1904
|
+
buf = bytearray(chunksize)
|
|
1905
|
+
view = memoryview(buf)
|
|
1906
|
+
while size := await fsrc_readinto(buf):
|
|
1907
|
+
await fdst_write(view[:size])
|
|
1908
|
+
elif callable(fsrc_read):
|
|
1909
|
+
fsrc_read = ensure_async(fsrc_read, threaded=threaded)
|
|
1910
|
+
while chunk := await fsrc_read(chunksize):
|
|
1911
|
+
await fdst_write(chunk)
|
|
1912
|
+
else:
|
|
1913
|
+
chunkiter = ensure_aiter(fsrc, threaded=threaded)
|
|
1914
|
+
async for chunk in chunkiter:
|
|
1915
|
+
if chunk:
|
|
1916
|
+
await fdst_write(chunk)
|
|
1917
|
+
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
|
|
2
|
+
filewrap/__init__.py,sha256=4df3xlJWICB_KuZMk-fKFruoajkYwt98rrv7f6OA4jg,66308
|
|
3
|
+
filewrap/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
python_filewrap-0.2.3.dist-info/LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
|
|
5
|
+
python_filewrap-0.2.3.dist-info/METADATA,sha256=sKgfIVxU80aZyA8QpqkNLf5PkmMj7R-b7PuK2Dq3w38,1413
|
|
6
|
+
python_filewrap-0.2.3.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
|
7
|
+
python_filewrap-0.2.3.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
|
|
2
|
-
filewrap/__init__.py,sha256=hHxlP4oHxigZM59pS1q2JcHWlir_6Tz5nFmEZKkmaJo,61023
|
|
3
|
-
filewrap/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
python_filewrap-0.2.1.dist-info/LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
|
|
5
|
-
python_filewrap-0.2.1.dist-info/METADATA,sha256=3uqJCHXBeLRckkyTTdzFCBLst6hv31dq68RFPU8Ei_k,1413
|
|
6
|
-
python_filewrap-0.2.1.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
|
7
|
-
python_filewrap-0.2.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|