python-filewrap 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
filewrap/__init__.py CHANGED
@@ -1,8 +1,12 @@
1
1
  #!/usr/bin/env python3
2
2
  # encoding: utf-8
3
3
 
4
+ # TODO: 使用 codecs.iterdecode 来避免解码过程中的一些重复操作
5
+ # TODO: AsyncTextIOWrapper 的 read 和 readline 算法效率不高,因为会反复创建二进制对象,如果可以复用一段或者几段(内存块组)内存,则可以大大增加效率,还可以引入环形缓冲区(使用长度限定的 bytearray,之后所有操作在 memoryview 上进行,根据当前的可用区块开返回 memoryview),以减少内存分配的开销
6
+ # TODO: AsyncTextIOWrapper.readline 有大量的字符串拼接操作,效率极低,需用 str.joins 方法优化
7
+
4
8
  __author__ = "ChenyangGao <https://chenyanggao.github.io>"
5
- __version__ = (0, 2, 1)
9
+ __version__ = (0, 2, 3)
6
10
  __all__ = [
7
11
  "Buffer", "SupportsRead", "SupportsReadinto", "SupportsWrite", "SupportsSeek",
8
12
  "AsyncBufferedReader", "AsyncTextIOWrapper",
@@ -14,6 +18,7 @@ __all__ = [
14
18
  "bytes_to_chunk_iter", "bytes_to_chunk_async_iter",
15
19
  "bytes_ensure_part_iter", "bytes_ensure_part_async_iter",
16
20
  "progress_bytes_iter", "progress_bytes_async_iter",
21
+ "copyfileobj", "copyfileobj_async",
17
22
  ]
18
23
 
19
24
  from asyncio import to_thread, Lock as AsyncLock
@@ -83,6 +88,20 @@ class SupportsSeek(Protocol):
83
88
  def seek(self, /, __offset: int, __whence: int = 0) -> int: ...
84
89
 
85
90
 
91
+ # TODO: 一些特定编码的 bom 用字典写死,编码名可以规范化,用 codecs.lookup(encoding).name
92
+ def get_bom(encoding: str) -> bytes:
93
+ code = memoryview(bytes("a", encoding))
94
+ if len(code) == 1:
95
+ return b""
96
+ for i in range(1, len(code)):
97
+ try:
98
+ str(code[:i], encoding)
99
+ return code[:i].tobytes()
100
+ except UnicodeDecodeError:
101
+ pass
102
+ raise UnicodeError
103
+
104
+
86
105
  class AsyncBufferedReader(BufferedReader):
87
106
 
88
107
  def __init__(
@@ -547,6 +566,7 @@ class AsyncTextIOWrapper(TextIOWrapper):
547
566
  write_through=write_through,
548
567
  )
549
568
  self.newline = newline
569
+ self._bom = get_bom(self.encoding)
550
570
 
551
571
  def __del__(self, /):
552
572
  try:
@@ -623,9 +643,8 @@ class AsyncTextIOWrapper(TextIOWrapper):
623
643
 
624
644
  ls_parts: list[str] = []
625
645
  add_part = ls_parts.append
626
- cache = b""
627
- while data := await read(size):
628
- cache += data
646
+ cache = data
647
+ while size and len(data) == size:
629
648
  while cache:
630
649
  try:
631
650
  size -= process_part(cache)
@@ -634,15 +653,37 @@ class AsyncTextIOWrapper(TextIOWrapper):
634
653
  start, stop = e.start, e.end
635
654
  if start:
636
655
  size -= process_part(cache[:start])
637
- if e.reason == "unexpected end of data" and stop == len(cache):
656
+ if e.reason == "truncated data":
657
+ if stop == len(cache):
658
+ cache = cache[start:]
659
+ break
660
+ else:
661
+ while stop < len(cache):
662
+ stop += 1
663
+ try:
664
+ size -= process_part(cache[start:stop])
665
+ cache = cache[stop:]
666
+ break_this_loop = True
667
+ break
668
+ except UnicodeDecodeError as exc:
669
+ e = exc
670
+ if e.reason != "truncated data":
671
+ break
672
+ if stop == len(cache):
673
+ cache = cache[start:]
674
+ break_this_loop = True
675
+ break
676
+ if break_this_loop:
677
+ break
678
+ elif e.reason == "unexpected end of data" and stop == len(cache):
638
679
  cache = cache[start:]
639
680
  break
640
681
  if errors == "strict":
641
- raise
682
+ raise e
642
683
  size -= process_part(cache[start:stop], errors)
643
684
  cache = cache[stop:]
644
- if len(data) < size:
645
- break
685
+ data = await read(size)
686
+ cache += data
646
687
  if cache:
647
688
  process_part(cache, errors)
648
689
  return "".join(ls_parts)
@@ -664,9 +705,14 @@ class AsyncTextIOWrapper(TextIOWrapper):
664
705
  peek = None
665
706
  if newline:
666
707
  sepb = bytes(newline, encoding)
708
+ if bom := self._bom:
709
+ sepb = sepb.removeprefix(bom)
667
710
  else:
668
711
  crb = bytes("\r", encoding)
669
712
  lfb = bytes("\n", encoding)
713
+ if bom := self._bom:
714
+ crb = crb.removeprefix(bom)
715
+ lfb = lfb.removeprefix(bom)
670
716
  lfb_len = len(lfb)
671
717
  buf = bytearray()
672
718
  text = ""
@@ -701,17 +747,17 @@ class AsyncTextIOWrapper(TextIOWrapper):
701
747
  buf += peek_b
702
748
  if newline:
703
749
  if (idx := buf.find(sepb)) > -1:
704
- idx += 1
750
+ idx += len(sepb)
705
751
  await read(idx - buf_stop)
706
752
  del buf[idx:]
707
753
  break
708
754
  elif (idx := buf.find(lfb)) > -1:
709
- idx += 1
755
+ idx += len(lfb)
710
756
  await read(idx - buf_stop)
711
757
  del buf[idx:]
712
758
  break
713
759
  elif (idx := buf.find(crb)) > -1:
714
- idx += 1
760
+ idx += len(crb)
715
761
  await read(idx - buf_stop)
716
762
  if buf.startswith(lfb, idx):
717
763
  await read(lfb_len)
@@ -728,10 +774,32 @@ class AsyncTextIOWrapper(TextIOWrapper):
728
774
  try:
729
775
  text += str(buf, encoding)
730
776
  buf.clear()
731
- except UnicodeEncodeError as e:
777
+ except UnicodeDecodeError as e:
732
778
  start, stop = e.start, e.end
733
779
  if start:
734
780
  text += str(buf[:start], encoding)
781
+ if e.reason == "truncated data":
782
+ if stop == len(buf):
783
+ buf = buf[start:]
784
+ break
785
+ else:
786
+ while stop < len(buf):
787
+ stop += 1
788
+ try:
789
+ text += str(buf[start:stop], encoding)
790
+ buf = buf[stop:]
791
+ break_this_loop = True
792
+ break
793
+ except UnicodeDecodeError as exc:
794
+ e = exc
795
+ if e.reason != "truncated data":
796
+ break
797
+ if stop == len(buf):
798
+ buf = buf[start:]
799
+ break_this_loop = True
800
+ break
801
+ if break_this_loop:
802
+ break
735
803
  if e.reason == "unexpected end of data" and stop == len(buf):
736
804
  buf = buf[start:]
737
805
  break
@@ -819,11 +887,11 @@ class AsyncTextIOWrapper(TextIOWrapper):
819
887
  try:
820
888
  text += str(buf, encoding)
821
889
  buf.clear()
822
- except UnicodeEncodeError as e:
890
+ except UnicodeDecodeError as e:
823
891
  start, stop = e.start, e.end
824
892
  if start:
825
893
  text += str(buf[:start], encoding)
826
- if e.reason == "unexpected end of data" and stop == len(buf):
894
+ if e.reason in ("unexpected end of data", "truncated data") and stop == len(buf):
827
895
  buf = buf[start:]
828
896
  break
829
897
  if errors == "strict":
@@ -1793,3 +1861,57 @@ async def progress_bytes_async_iter(
1793
1861
  if callable(close_progress):
1794
1862
  await ensure_async(close_progress)()
1795
1863
 
1864
+
1865
+ def copyfileobj(
1866
+ fsrc,
1867
+ fdst: SupportsWrite[Buffer],
1868
+ /,
1869
+ chunksize: int = COPY_BUFSIZE,
1870
+ ):
1871
+ if chunksize <= 0:
1872
+ chunksize = COPY_BUFSIZE
1873
+ fdst_write = fdst.write
1874
+ fsrc_read = getattr(fsrc, "read", None)
1875
+ fsrc_readinto = getattr(fsrc, "readinto", None)
1876
+ if callable(fsrc_readinto):
1877
+ buf = bytearray(chunksize)
1878
+ view = memoryview(buf)
1879
+ while size := fsrc_readinto(buf):
1880
+ fdst_write(view[:size])
1881
+ elif callable(fsrc_read):
1882
+ while chunk := fsrc_read(chunksize):
1883
+ fdst_write(chunk)
1884
+ else:
1885
+ for chunk in fsrc:
1886
+ if chunk:
1887
+ fdst_write(chunk)
1888
+
1889
+
1890
+ async def copyfileobj_async(
1891
+ fsrc,
1892
+ fdst: SupportsWrite[Buffer],
1893
+ /,
1894
+ chunksize: int = COPY_BUFSIZE,
1895
+ threaded: bool = True,
1896
+ ):
1897
+ if chunksize <= 0:
1898
+ chunksize = COPY_BUFSIZE
1899
+ fdst_write = ensure_async(fdst.write, threaded=threaded)
1900
+ fsrc_read = getattr(fsrc, "read", None)
1901
+ fsrc_readinto = getattr(fsrc, "readinto", None)
1902
+ if callable(fsrc_readinto):
1903
+ fsrc_readinto = ensure_async(fsrc_readinto, threaded=threaded)
1904
+ buf = bytearray(chunksize)
1905
+ view = memoryview(buf)
1906
+ while size := await fsrc_readinto(buf):
1907
+ await fdst_write(view[:size])
1908
+ elif callable(fsrc_read):
1909
+ fsrc_read = ensure_async(fsrc_read, threaded=threaded)
1910
+ while chunk := await fsrc_read(chunksize):
1911
+ await fdst_write(chunk)
1912
+ else:
1913
+ chunkiter = ensure_aiter(fsrc, threaded=threaded)
1914
+ async for chunk in chunkiter:
1915
+ if chunk:
1916
+ await fdst_write(chunk)
1917
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: python-filewrap
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Python file wrappers.
5
5
  Home-page: https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-filewrap
6
6
  License: MIT
@@ -0,0 +1,7 @@
1
+ LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
2
+ filewrap/__init__.py,sha256=4df3xlJWICB_KuZMk-fKFruoajkYwt98rrv7f6OA4jg,66308
3
+ filewrap/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ python_filewrap-0.2.3.dist-info/LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
5
+ python_filewrap-0.2.3.dist-info/METADATA,sha256=sKgfIVxU80aZyA8QpqkNLf5PkmMj7R-b7PuK2Dq3w38,1413
6
+ python_filewrap-0.2.3.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
7
+ python_filewrap-0.2.3.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
2
- filewrap/__init__.py,sha256=hHxlP4oHxigZM59pS1q2JcHWlir_6Tz5nFmEZKkmaJo,61023
3
- filewrap/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- python_filewrap-0.2.1.dist-info/LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
5
- python_filewrap-0.2.1.dist-info/METADATA,sha256=3uqJCHXBeLRckkyTTdzFCBLst6hv31dq68RFPU8Ei_k,1413
6
- python_filewrap-0.2.1.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
7
- python_filewrap-0.2.1.dist-info/RECORD,,