python-filewrap 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
filewrap/__init__.py CHANGED
@@ -1,8 +1,12 @@
1
1
  #!/usr/bin/env python3
2
2
  # encoding: utf-8
3
3
 
4
+ # TODO: 使用 codecs.iterdecode 来避免解码过程中的一些重复操作
5
+ # TODO: AsyncTextIOWrapper 的 read 和 readline 算法效率不高,因为会反复创建二进制对象,如果可以复用一段或者几段(内存块组)内存,则可以大大增加效率,还可以引入环形缓冲区(使用长度限定的 bytearray,之后所有操作在 memoryview 上进行,根据当前的可用区块开返回 memoryview),以减少内存分配的开销
6
+ # TODO: AsyncTextIOWrapper.readline 有大量的字符串拼接操作,效率极低,需用 str.joins 方法优化
7
+
4
8
  __author__ = "ChenyangGao <https://chenyanggao.github.io>"
5
- __version__ = (0, 2, 2)
9
+ __version__ = (0, 2, 3)
6
10
  __all__ = [
7
11
  "Buffer", "SupportsRead", "SupportsReadinto", "SupportsWrite", "SupportsSeek",
8
12
  "AsyncBufferedReader", "AsyncTextIOWrapper",
@@ -84,6 +88,20 @@ class SupportsSeek(Protocol):
84
88
  def seek(self, /, __offset: int, __whence: int = 0) -> int: ...
85
89
 
86
90
 
91
+ # TODO: 一些特定编码的 bom 用字典写死,编码名可以规范化,用 codecs.lookup(encoding).name
92
+ def get_bom(encoding: str) -> bytes:
93
+ code = memoryview(bytes("a", encoding))
94
+ if len(code) == 1:
95
+ return b""
96
+ for i in range(1, len(code)):
97
+ try:
98
+ str(code[:i], encoding)
99
+ return code[:i].tobytes()
100
+ except UnicodeDecodeError:
101
+ pass
102
+ raise UnicodeError
103
+
104
+
87
105
  class AsyncBufferedReader(BufferedReader):
88
106
 
89
107
  def __init__(
@@ -548,6 +566,7 @@ class AsyncTextIOWrapper(TextIOWrapper):
548
566
  write_through=write_through,
549
567
  )
550
568
  self.newline = newline
569
+ self._bom = get_bom(self.encoding)
551
570
 
552
571
  def __del__(self, /):
553
572
  try:
@@ -624,9 +643,8 @@ class AsyncTextIOWrapper(TextIOWrapper):
624
643
 
625
644
  ls_parts: list[str] = []
626
645
  add_part = ls_parts.append
627
- cache = b""
628
- while data := await read(size):
629
- cache += data
646
+ cache = data
647
+ while size and len(data) == size:
630
648
  while cache:
631
649
  try:
632
650
  size -= process_part(cache)
@@ -635,15 +653,37 @@ class AsyncTextIOWrapper(TextIOWrapper):
635
653
  start, stop = e.start, e.end
636
654
  if start:
637
655
  size -= process_part(cache[:start])
638
- if e.reason == "unexpected end of data" and stop == len(cache):
656
+ if e.reason == "truncated data":
657
+ if stop == len(cache):
658
+ cache = cache[start:]
659
+ break
660
+ else:
661
+ while stop < len(cache):
662
+ stop += 1
663
+ try:
664
+ size -= process_part(cache[start:stop])
665
+ cache = cache[stop:]
666
+ break_this_loop = True
667
+ break
668
+ except UnicodeDecodeError as exc:
669
+ e = exc
670
+ if e.reason != "truncated data":
671
+ break
672
+ if stop == len(cache):
673
+ cache = cache[start:]
674
+ break_this_loop = True
675
+ break
676
+ if break_this_loop:
677
+ break
678
+ elif e.reason == "unexpected end of data" and stop == len(cache):
639
679
  cache = cache[start:]
640
680
  break
641
681
  if errors == "strict":
642
- raise
682
+ raise e
643
683
  size -= process_part(cache[start:stop], errors)
644
684
  cache = cache[stop:]
645
- if len(data) < size:
646
- break
685
+ data = await read(size)
686
+ cache += data
647
687
  if cache:
648
688
  process_part(cache, errors)
649
689
  return "".join(ls_parts)
@@ -665,9 +705,14 @@ class AsyncTextIOWrapper(TextIOWrapper):
665
705
  peek = None
666
706
  if newline:
667
707
  sepb = bytes(newline, encoding)
708
+ if bom := self._bom:
709
+ sepb = sepb.removeprefix(bom)
668
710
  else:
669
711
  crb = bytes("\r", encoding)
670
712
  lfb = bytes("\n", encoding)
713
+ if bom := self._bom:
714
+ crb = crb.removeprefix(bom)
715
+ lfb = lfb.removeprefix(bom)
671
716
  lfb_len = len(lfb)
672
717
  buf = bytearray()
673
718
  text = ""
@@ -702,17 +747,17 @@ class AsyncTextIOWrapper(TextIOWrapper):
702
747
  buf += peek_b
703
748
  if newline:
704
749
  if (idx := buf.find(sepb)) > -1:
705
- idx += 1
750
+ idx += len(sepb)
706
751
  await read(idx - buf_stop)
707
752
  del buf[idx:]
708
753
  break
709
754
  elif (idx := buf.find(lfb)) > -1:
710
- idx += 1
755
+ idx += len(lfb)
711
756
  await read(idx - buf_stop)
712
757
  del buf[idx:]
713
758
  break
714
759
  elif (idx := buf.find(crb)) > -1:
715
- idx += 1
760
+ idx += len(crb)
716
761
  await read(idx - buf_stop)
717
762
  if buf.startswith(lfb, idx):
718
763
  await read(lfb_len)
@@ -729,10 +774,32 @@ class AsyncTextIOWrapper(TextIOWrapper):
729
774
  try:
730
775
  text += str(buf, encoding)
731
776
  buf.clear()
732
- except UnicodeEncodeError as e:
777
+ except UnicodeDecodeError as e:
733
778
  start, stop = e.start, e.end
734
779
  if start:
735
780
  text += str(buf[:start], encoding)
781
+ if e.reason == "truncated data":
782
+ if stop == len(buf):
783
+ buf = buf[start:]
784
+ break
785
+ else:
786
+ while stop < len(buf):
787
+ stop += 1
788
+ try:
789
+ text += str(buf[start:stop], encoding)
790
+ buf = buf[stop:]
791
+ break_this_loop = True
792
+ break
793
+ except UnicodeDecodeError as exc:
794
+ e = exc
795
+ if e.reason != "truncated data":
796
+ break
797
+ if stop == len(buf):
798
+ buf = buf[start:]
799
+ break_this_loop = True
800
+ break
801
+ if break_this_loop:
802
+ break
736
803
  if e.reason == "unexpected end of data" and stop == len(buf):
737
804
  buf = buf[start:]
738
805
  break
@@ -820,11 +887,11 @@ class AsyncTextIOWrapper(TextIOWrapper):
820
887
  try:
821
888
  text += str(buf, encoding)
822
889
  buf.clear()
823
- except UnicodeEncodeError as e:
890
+ except UnicodeDecodeError as e:
824
891
  start, stop = e.start, e.end
825
892
  if start:
826
893
  text += str(buf[:start], encoding)
827
- if e.reason == "unexpected end of data" and stop == len(buf):
894
+ if e.reason in ("unexpected end of data", "truncated data") and stop == len(buf):
828
895
  buf = buf[start:]
829
896
  break
830
897
  if errors == "strict":
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: python-filewrap
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: Python file wrappers.
5
5
  Home-page: https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-filewrap
6
6
  License: MIT
@@ -0,0 +1,7 @@
1
+ LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
2
+ filewrap/__init__.py,sha256=4df3xlJWICB_KuZMk-fKFruoajkYwt98rrv7f6OA4jg,66308
3
+ filewrap/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ python_filewrap-0.2.3.dist-info/LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
5
+ python_filewrap-0.2.3.dist-info/METADATA,sha256=sKgfIVxU80aZyA8QpqkNLf5PkmMj7R-b7PuK2Dq3w38,1413
6
+ python_filewrap-0.2.3.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
7
+ python_filewrap-0.2.3.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
2
- filewrap/__init__.py,sha256=kQ3EL_pPx5jkImO5tPuhAx6PiQnocXeHwVmBz3_o60k,62700
3
- filewrap/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- python_filewrap-0.2.2.dist-info/LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
5
- python_filewrap-0.2.2.dist-info/METADATA,sha256=hOJOU4Q01T3SkJQ0VQ7qAVOZSUcvwlEe3q2uHeo_xZ4,1413
6
- python_filewrap-0.2.2.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
7
- python_filewrap-0.2.2.dist-info/RECORD,,