python-filewrap 0.2.2__py3-none-any.whl → 0.2.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
filewrap/__init__.py CHANGED
@@ -1,8 +1,12 @@
1
1
  #!/usr/bin/env python3
2
2
  # encoding: utf-8
3
3
 
4
+ # TODO: 使用 codecs.iterdecode 来避免解码过程中的一些重复操作
5
+ # TODO: AsyncTextIOWrapper 的 read 和 readline 算法效率不高,因为会反复创建二进制对象,如果可以复用一段或者几段(内存块组)内存,则可以大大增加效率,还可以引入环形缓冲区(使用长度限定的 bytearray,之后所有操作在 memoryview 上进行,根据当前的可用区块开返回 memoryview),以减少内存分配的开销
6
+ # TODO: AsyncTextIOWrapper.readline 有大量的字符串拼接操作,效率极低,需用 str.joins 方法优化
7
+
4
8
  __author__ = "ChenyangGao <https://chenyanggao.github.io>"
5
- __version__ = (0, 2, 2)
9
+ __version__ = (0, 2, 3)
6
10
  __all__ = [
7
11
  "Buffer", "SupportsRead", "SupportsReadinto", "SupportsWrite", "SupportsSeek",
8
12
  "AsyncBufferedReader", "AsyncTextIOWrapper",
@@ -84,6 +88,20 @@ class SupportsSeek(Protocol):
84
88
  def seek(self, /, __offset: int, __whence: int = 0) -> int: ...
85
89
 
86
90
 
91
+ # TODO: 一些特定编码的 bom 用字典写死,编码名可以规范化,用 codecs.lookup(encoding).name
92
+ def get_bom(encoding: str) -> bytes:
93
+ code = memoryview(bytes("a", encoding))
94
+ if len(code) == 1:
95
+ return b""
96
+ for i in range(1, len(code)):
97
+ try:
98
+ str(code[:i], encoding)
99
+ return code[:i].tobytes()
100
+ except UnicodeDecodeError:
101
+ pass
102
+ raise UnicodeError
103
+
104
+
87
105
  class AsyncBufferedReader(BufferedReader):
88
106
 
89
107
  def __init__(
@@ -331,8 +349,9 @@ class AsyncBufferedReader(BufferedReader):
331
349
  break
332
350
  if length < BUFSIZE:
333
351
  part1, part2 = buf_view[length:].tobytes(), buf_view[:length].tobytes()
334
- buf_view[:length] = part1
335
- buf_view[length:] = part2
352
+ index = len(part1)
353
+ buf_view[:index] = part1
354
+ buf_view[index:] = part2
336
355
  running = False
337
356
  buf_pos = self._buf_pos = BUFSIZE - length
338
357
  else:
@@ -548,6 +567,7 @@ class AsyncTextIOWrapper(TextIOWrapper):
548
567
  write_through=write_through,
549
568
  )
550
569
  self.newline = newline
570
+ self._bom = get_bom(self.encoding)
551
571
 
552
572
  def __del__(self, /):
553
573
  try:
@@ -624,9 +644,8 @@ class AsyncTextIOWrapper(TextIOWrapper):
624
644
 
625
645
  ls_parts: list[str] = []
626
646
  add_part = ls_parts.append
627
- cache = b""
628
- while data := await read(size):
629
- cache += data
647
+ cache = data
648
+ while size and len(data) == size:
630
649
  while cache:
631
650
  try:
632
651
  size -= process_part(cache)
@@ -635,15 +654,37 @@ class AsyncTextIOWrapper(TextIOWrapper):
635
654
  start, stop = e.start, e.end
636
655
  if start:
637
656
  size -= process_part(cache[:start])
638
- if e.reason == "unexpected end of data" and stop == len(cache):
657
+ if e.reason == "truncated data":
658
+ if stop == len(cache):
659
+ cache = cache[start:]
660
+ break
661
+ else:
662
+ while stop < len(cache):
663
+ stop += 1
664
+ try:
665
+ size -= process_part(cache[start:stop])
666
+ cache = cache[stop:]
667
+ break_this_loop = True
668
+ break
669
+ except UnicodeDecodeError as exc:
670
+ e = exc
671
+ if e.reason != "truncated data":
672
+ break
673
+ if stop == len(cache):
674
+ cache = cache[start:]
675
+ break_this_loop = True
676
+ break
677
+ if break_this_loop:
678
+ break
679
+ elif e.reason == "unexpected end of data" and stop == len(cache):
639
680
  cache = cache[start:]
640
681
  break
641
682
  if errors == "strict":
642
- raise
683
+ raise e
643
684
  size -= process_part(cache[start:stop], errors)
644
685
  cache = cache[stop:]
645
- if len(data) < size:
646
- break
686
+ data = await read(size)
687
+ cache += data
647
688
  if cache:
648
689
  process_part(cache, errors)
649
690
  return "".join(ls_parts)
@@ -665,9 +706,14 @@ class AsyncTextIOWrapper(TextIOWrapper):
665
706
  peek = None
666
707
  if newline:
667
708
  sepb = bytes(newline, encoding)
709
+ if bom := self._bom:
710
+ sepb = sepb.removeprefix(bom)
668
711
  else:
669
712
  crb = bytes("\r", encoding)
670
713
  lfb = bytes("\n", encoding)
714
+ if bom := self._bom:
715
+ crb = crb.removeprefix(bom)
716
+ lfb = lfb.removeprefix(bom)
671
717
  lfb_len = len(lfb)
672
718
  buf = bytearray()
673
719
  text = ""
@@ -702,17 +748,17 @@ class AsyncTextIOWrapper(TextIOWrapper):
702
748
  buf += peek_b
703
749
  if newline:
704
750
  if (idx := buf.find(sepb)) > -1:
705
- idx += 1
751
+ idx += len(sepb)
706
752
  await read(idx - buf_stop)
707
753
  del buf[idx:]
708
754
  break
709
755
  elif (idx := buf.find(lfb)) > -1:
710
- idx += 1
756
+ idx += len(lfb)
711
757
  await read(idx - buf_stop)
712
758
  del buf[idx:]
713
759
  break
714
760
  elif (idx := buf.find(crb)) > -1:
715
- idx += 1
761
+ idx += len(crb)
716
762
  await read(idx - buf_stop)
717
763
  if buf.startswith(lfb, idx):
718
764
  await read(lfb_len)
@@ -720,6 +766,8 @@ class AsyncTextIOWrapper(TextIOWrapper):
720
766
  else:
721
767
  del buf[idx:]
722
768
  break
769
+ if peek_b:
770
+ await read(len(peek_b))
723
771
  c = await read(1)
724
772
  if not c:
725
773
  reach_end = True
@@ -729,10 +777,32 @@ class AsyncTextIOWrapper(TextIOWrapper):
729
777
  try:
730
778
  text += str(buf, encoding)
731
779
  buf.clear()
732
- except UnicodeEncodeError as e:
780
+ except UnicodeDecodeError as e:
733
781
  start, stop = e.start, e.end
734
782
  if start:
735
783
  text += str(buf[:start], encoding)
784
+ if e.reason == "truncated data":
785
+ if stop == len(buf):
786
+ buf = buf[start:]
787
+ break
788
+ else:
789
+ while stop < len(buf):
790
+ stop += 1
791
+ try:
792
+ text += str(buf[start:stop], encoding)
793
+ buf = buf[stop:]
794
+ break_this_loop = True
795
+ break
796
+ except UnicodeDecodeError as exc:
797
+ e = exc
798
+ if e.reason != "truncated data":
799
+ break
800
+ if stop == len(buf):
801
+ buf = buf[start:]
802
+ break_this_loop = True
803
+ break
804
+ if break_this_loop:
805
+ break
736
806
  if e.reason == "unexpected end of data" and stop == len(buf):
737
807
  buf = buf[start:]
738
808
  break
@@ -820,11 +890,11 @@ class AsyncTextIOWrapper(TextIOWrapper):
820
890
  try:
821
891
  text += str(buf, encoding)
822
892
  buf.clear()
823
- except UnicodeEncodeError as e:
893
+ except UnicodeDecodeError as e:
824
894
  start, stop = e.start, e.end
825
895
  if start:
826
896
  text += str(buf[:start], encoding)
827
- if e.reason == "unexpected end of data" and stop == len(buf):
897
+ if e.reason in ("unexpected end of data", "truncated data") and stop == len(buf):
828
898
  buf = buf[start:]
829
899
  break
830
900
  if errors == "strict":
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: python-filewrap
3
- Version: 0.2.2
3
+ Version: 0.2.3.1
4
4
  Summary: Python file wrappers.
5
5
  Home-page: https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-filewrap
6
6
  License: MIT
@@ -0,0 +1,7 @@
1
+ LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
2
+ filewrap/__init__.py,sha256=uACyl0LdRv6joj8ayKhY2YFtmKrL5Fea_qH0T5j7KoI,66436
3
+ filewrap/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ python_filewrap-0.2.3.1.dist-info/LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
5
+ python_filewrap-0.2.3.1.dist-info/METADATA,sha256=8s4EU2QnJdSQlh6DqMY5ygfMtkYxWRCQppIPOw_3k-c,1415
6
+ python_filewrap-0.2.3.1.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
7
+ python_filewrap-0.2.3.1.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
2
- filewrap/__init__.py,sha256=kQ3EL_pPx5jkImO5tPuhAx6PiQnocXeHwVmBz3_o60k,62700
3
- filewrap/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- python_filewrap-0.2.2.dist-info/LICENSE,sha256=o5242_N2TgDsWwFhPn7yr8YJNF7XsJM5NxUMtcT97bc,1100
5
- python_filewrap-0.2.2.dist-info/METADATA,sha256=hOJOU4Q01T3SkJQ0VQ7qAVOZSUcvwlEe3q2uHeo_xZ4,1413
6
- python_filewrap-0.2.2.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
7
- python_filewrap-0.2.2.dist-info/RECORD,,