python-filewrap 0.2.6.1__tar.gz → 0.2.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,28 +1,26 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: python-filewrap
3
- Version: 0.2.6.1
3
+ Version: 0.2.7
4
4
  Summary: Python file wrappers.
5
5
  Home-page: https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-filewrap
6
6
  License: MIT
7
7
  Keywords: file,wrapper
8
8
  Author: ChenyangGao
9
9
  Author-email: wosiwujm@gmail.com
10
- Requires-Python: >=3.10,<4.0
10
+ Requires-Python: >=3.12,<4.0
11
11
  Classifier: Development Status :: 5 - Production/Stable
12
12
  Classifier: Intended Audience :: Developers
13
13
  Classifier: License :: OSI Approved :: MIT License
14
14
  Classifier: Operating System :: OS Independent
15
15
  Classifier: Programming Language :: Python
16
16
  Classifier: Programming Language :: Python :: 3
17
- Classifier: Programming Language :: Python :: 3.10
18
- Classifier: Programming Language :: Python :: 3.11
19
17
  Classifier: Programming Language :: Python :: 3.12
20
18
  Classifier: Programming Language :: Python :: 3.13
21
19
  Classifier: Programming Language :: Python :: 3 :: Only
22
20
  Classifier: Topic :: Software Development
23
21
  Classifier: Topic :: Software Development :: Libraries
24
22
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
25
- Requires-Dist: python-asynctools (>=0.0.5)
23
+ Requires-Dist: python-asynctools (>=0.1)
26
24
  Requires-Dist: python-property (>=0.0.3)
27
25
  Project-URL: Repository, https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-filewrap
28
26
  Description-Content-Type: text/markdown
@@ -1,64 +1,40 @@
1
1
  #!/usr/bin/env python3
2
2
  # encoding: utf-8
3
3
 
4
- # TODO: 实现一种 memoryview,可以作为环形缓冲区使用
5
- # TODO: 使用 codecs.iterdecode 来避免解码过程中的一些重复操作
6
- # TODO: AsyncTextIOWrapper 的 read 和 readline 算法效率不高,因为会反复创建二进制对象,如果可以复用一段或者几段(内存块组)内存,则可以大大增加效率,还可以引入环形缓冲区(使用长度限定的 bytearray,之后所有操作在 memoryview 上进行,根据当前的可用区块开返回 memoryview),以减少内存分配的开销
7
- # TODO: AsyncTextIOWrapper.readline 有大量的字符串拼接操作,效率极低,需用 str.joins 方法优化
8
-
9
4
  __author__ = "ChenyangGao <https://chenyanggao.github.io>"
10
- __version__ = (0, 2, 6)
5
+ __version__ = (0, 2, 7)
11
6
  __all__ = [
12
- "Buffer", "SupportsRead", "SupportsReadinto", "SupportsWrite", "SupportsSeek",
13
- "AsyncBufferedReader", "AsyncTextIOWrapper",
14
- "bio_chunk_iter", "bio_chunk_async_iter",
15
- "bio_skip_iter", "bio_skip_async_iter",
16
- "bytes_iter", "bytes_async_iter",
17
- "bytes_iter_skip", "bytes_async_iter_skip",
18
- "bytes_iter_to_reader", "bytes_iter_to_async_reader",
19
- "bytes_to_chunk_iter", "bytes_to_chunk_async_iter",
20
- "bytes_ensure_part_iter", "bytes_ensure_part_async_iter",
21
- "progress_bytes_iter", "progress_bytes_async_iter",
22
- "copyfileobj", "copyfileobj_async",
7
+ "SupportsRead", "SupportsReadinto", "SupportsWrite", "SupportsSeek",
8
+ "AsyncBufferedReader", "AsyncTextIOWrapper", "buffer_length",
9
+ "bio_chunk_iter", "bio_chunk_async_iter", "bio_skip_iter", "bio_skip_async_iter",
10
+ "bytes_iter", "bytes_async_iter", "bytes_iter_skip", "bytes_async_iter_skip",
11
+ "bytes_iter_to_reader", "bytes_iter_to_async_reader", "bytes_to_chunk_iter",
12
+ "bytes_to_chunk_async_iter", "bytes_ensure_part_iter", "bytes_ensure_part_async_iter",
13
+ "progress_bytes_iter", "progress_bytes_async_iter", "copyfileobj", "copyfileobj_async",
14
+ "bound_bufferd_reader", "bound_bufferd_async_reader",
23
15
  ]
24
16
 
25
- from array import array
26
- from asyncio import to_thread, Lock as AsyncLock
27
- from collections.abc import Awaitable, AsyncIterable, AsyncIterator, Callable, Iterable, Iterator, Sized
28
- from functools import update_wrapper
29
- from io import BufferedIOBase, BufferedReader, BytesIO, RawIOBase, TextIOWrapper
30
- from inspect import isawaitable, iscoroutinefunction, isasyncgen, isgenerator
17
+ from asyncio import Lock as AsyncLock
18
+ from codecs import getdecoder, getencoder
19
+ from collections.abc import (
20
+ Awaitable, AsyncIterable, AsyncIterator, Buffer, Callable, Iterable, Iterator,
21
+ )
22
+ from functools import cached_property
23
+ from io import BufferedIOBase, BufferedReader, RawIOBase, TextIOWrapper
24
+ from inspect import isawaitable, isasyncgen, isgenerator
31
25
  from itertools import chain
32
- from os import linesep
33
26
  from re import compile as re_compile
34
- from shutil import COPY_BUFSIZE # type: ignore
35
27
  from threading import Lock
36
- from typing import cast, runtime_checkable, Any, BinaryIO, ParamSpec, Protocol, Self, TypeVar
37
-
38
- try:
39
- from collections.abc import Buffer # type: ignore
40
- except ImportError:
41
- from _ctypes import _SimpleCData
42
- from array import array
43
-
44
- @runtime_checkable
45
- class Buffer(Protocol): # type: ignore
46
- def __buffer__(self, flags: int, /) -> memoryview:
47
- pass
48
-
49
- Buffer.register(bytes)
50
- Buffer.register(bytearray)
51
- Buffer.register(memoryview)
52
- Buffer.register(_SimpleCData)
53
- Buffer.register(array)
28
+ from typing import cast, runtime_checkable, Any, BinaryIO, Final, Protocol, Self
54
29
 
55
30
  from asynctools import async_chain, ensure_async, ensure_aiter, run_async
56
31
  from property import funcproperty
57
32
 
58
33
 
59
- Args = ParamSpec("Args")
60
- _T_co = TypeVar("_T_co", covariant=True)
61
- _T_contra = TypeVar("_T_contra", contravariant=True)
34
+ READ_BUFSIZE = 1 << 13
35
+ WRITE_BUFSIZE = 1 << 16
36
+ CRE_NOT_UNIX_NEWLINES: Final = re_compile("\r\n|\r")
37
+
62
38
 
63
39
  @BufferedIOBase.register
64
40
  class VirtualBufferedReader:
@@ -67,54 +43,25 @@ class VirtualBufferedReader:
67
43
  raise TypeError("not allowed to create instances")
68
44
  return super().__new__(cls, *a, **k)
69
45
 
70
- CRE_NOT_UNIX_NEWLINES_sub = re_compile("\r\n|\r").sub
71
-
72
-
73
- def buffer_length(b: Buffer, /) -> int:
74
- if isinstance(b, Sized):
75
- return len(b)
76
- else:
77
- return len(memoryview(b))
78
-
79
-
80
- def ensure_bytes(b: Buffer, /) -> array | bytes | bytearray | memoryview:
81
- if isinstance(b, (array, bytes, bytearray, memoryview)):
82
- return b
83
- return memoryview(b)
84
-
85
46
 
86
47
  @runtime_checkable
87
- class SupportsRead(Protocol[_T_co]):
88
- def read(self, /, __length: int = ...) -> _T_co: ...
48
+ class SupportsRead[T](Protocol):
49
+ def read(self, /, size: int) -> T: ...
89
50
 
90
51
 
91
52
  @runtime_checkable
92
- class SupportsReadinto(Protocol):
93
- def readinto(self, /, buf: Buffer = ...) -> int: ...
53
+ class SupportsReadinto[T](Protocol):
54
+ def readinto(self, /, buf: T): ...
94
55
 
95
56
 
96
57
  @runtime_checkable
97
- class SupportsWrite(Protocol[_T_contra]):
98
- def write(self, /, __s: _T_contra) -> object: ...
58
+ class SupportsWrite[T](Protocol):
59
+ def write(self, /, data: T): ...
99
60
 
100
61
 
101
62
  @runtime_checkable
102
63
  class SupportsSeek(Protocol):
103
- def seek(self, /, __offset: int, __whence: int = 0) -> int: ...
104
-
105
-
106
- # TODO: 一些特定编码的 bom 用字典写死,编码名可以规范化,用 codecs.lookup(encoding).name
107
- def get_bom(encoding: str) -> bytes:
108
- code = memoryview(bytes("a", encoding))
109
- if buffer_length(code) == 1:
110
- return b""
111
- for i in range(1, buffer_length(code)):
112
- try:
113
- str(code[:i], encoding)
114
- return code[:i].tobytes()
115
- except UnicodeDecodeError:
116
- pass
117
- raise UnicodeError
64
+ def seek(self, /, offset: int, whence: int): ...
118
65
 
119
66
 
120
67
  class AsyncBufferedReader(BufferedReader):
@@ -123,9 +70,11 @@ class AsyncBufferedReader(BufferedReader):
123
70
  self,
124
71
  /,
125
72
  raw: RawIOBase,
126
- buffer_size: int = 8192,
73
+ buffer_size: int = 0,
127
74
  ):
128
- super().__init__(raw, min(buffer_size, 1))
75
+ super().__init__(raw, 1)
76
+ if buffer_size <= 0:
77
+ buffer_size = READ_BUFSIZE
129
78
  self._buf = bytearray(buffer_size)
130
79
  self._buf_view = memoryview(self._buf)
131
80
  self._buf_pos = 0
@@ -163,6 +112,33 @@ class AsyncBufferedReader(BufferedReader):
163
112
  def length(self, /) -> int:
164
113
  return getattr(self.raw, "length")
165
114
 
115
+ @cached_property
116
+ def _close(self, /):
117
+ try:
118
+ return getattr(self.raw, "aclose")
119
+ except AttributeError:
120
+ return getattr(self.raw, "close")
121
+
122
+ @cached_property
123
+ def _flush(self, /):
124
+ return ensure_async(self.raw.flush, threaded=True)
125
+
126
+ @cached_property
127
+ def _read(self, /):
128
+ return ensure_async(self.raw.read, threaded=True)
129
+
130
+ @cached_property
131
+ def _readinto(self, /):
132
+ return ensure_async(self.raw.readinto, threaded=True)
133
+
134
+ @cached_property
135
+ def _readline(self, /):
136
+ return ensure_async(self.raw.readline, threaded=True)
137
+
138
+ @cached_property
139
+ def _seek(self, /):
140
+ return ensure_async(self.raw.seek, threaded=True)
141
+
166
142
  def calibrate(self, /, target: int = -1) -> bool:
167
143
  pos = self._pos
168
144
  if target < 0:
@@ -183,25 +159,15 @@ class AsyncBufferedReader(BufferedReader):
183
159
  return reusable
184
160
 
185
161
  async def aclose(self, /):
186
- raw = self.raw
187
- try:
188
- ret = getattr(raw, "aclose")()
189
- except (AttributeError, TypeError):
190
- ret = getattr(raw, "close")()
162
+ ret = self.close()
191
163
  if isawaitable(ret):
192
164
  await ret
193
165
 
194
166
  def close(self, /):
195
- raw = self.raw
196
- try:
197
- ret = getattr(raw, "aclose")()
198
- except (AttributeError, TypeError):
199
- ret = getattr(raw, "close")()
200
- if isawaitable(ret):
201
- run_async(ret)
167
+ return run_async(self._close())
202
168
 
203
169
  async def flush(self, /):
204
- return await ensure_async(self.raw.flush, threaded=True)()
170
+ return await self._flush()
205
171
 
206
172
  def peek(self, size: int = 0, /) -> bytes:
207
173
  start, stop = self._buf_pos, self._buf_stop
@@ -242,7 +208,7 @@ class AsyncBufferedReader(BufferedReader):
242
208
  buf_size += await self.readinto(buffer_view[buf_size:])
243
209
  return buffer_view[:buf_size].tobytes()
244
210
  BUFSIZE = buffer_length(buf_view)
245
- read = ensure_async(self.raw.read, threaded=True)
211
+ read = self._read
246
212
  buffer = bytearray(buf_view[buf_pos:buf_stop])
247
213
  try:
248
214
  while data := await read(BUFSIZE):
@@ -287,7 +253,7 @@ class AsyncBufferedReader(BufferedReader):
287
253
  return buf_view[buf_pos:buf_pos_stop].tobytes()
288
254
  size -= buf_size
289
255
  try:
290
- data = await ensure_async(self.raw.read, threaded=True)(size)
256
+ data = await self._read(size)
291
257
  except:
292
258
  self.calibrate()
293
259
  raise
@@ -330,9 +296,9 @@ class AsyncBufferedReader(BufferedReader):
330
296
  self._pos += size
331
297
  return size
332
298
  try:
333
- readinto = ensure_async(self.raw.readinto, threaded=True)
299
+ readinto = self._readinto
334
300
  except AttributeError:
335
- read = ensure_async(self.raw.read, threaded=True)
301
+ read = self._read
336
302
  async def readinto(buffer, /) -> int:
337
303
  data = await read(buffer_length(buffer))
338
304
  if data:
@@ -402,9 +368,9 @@ class AsyncBufferedReader(BufferedReader):
402
368
  self._pos += size
403
369
  return size
404
370
  try:
405
- readinto = ensure_async(self.raw.readinto, threaded=True)
371
+ readinto = self._readinfo
406
372
  except AttributeError:
407
- read = ensure_async(self.raw.read, threaded=True)
373
+ read = self._read
408
374
  async def readinto(buffer, /) -> int:
409
375
  data = await read(buffer_length(buffer))
410
376
  if data:
@@ -471,14 +437,14 @@ class AsyncBufferedReader(BufferedReader):
471
437
  self._pos += buf_pos_stop - buf_pos
472
438
  return self._buf_view[buf_pos:buf_pos_stop].tobytes()
473
439
  try:
474
- readline = ensure_async(self.raw.readline, threaded=True)
440
+ readline = self._readline
475
441
  except AttributeError:
476
442
  async def readline(size: None | int = -1, /) -> bytes:
477
443
  if size == 0:
478
444
  return b""
479
445
  if size is None:
480
446
  size = -1
481
- read = ensure_async(self.raw.read, threaded=True)
447
+ read = self._read
482
448
  cache = bytearray()
483
449
  if size > 0:
484
450
  while size and (c := await read(1)):
@@ -541,7 +507,7 @@ class AsyncBufferedReader(BufferedReader):
541
507
  elif whence == 2:
542
508
  if target > 0:
543
509
  raise ValueError("target out of range: overflow")
544
- target = self._pos = await ensure_async(self.raw.seek, threaded=True)(target, 2)
510
+ target = self._pos = await self._seek(target, 2)
545
511
  if target != pos:
546
512
  self.calibrate(target)
547
513
  return target
@@ -553,7 +519,7 @@ class AsyncBufferedReader(BufferedReader):
553
519
  self._buf_pos = buf_pos
554
520
  pos = self._pos = target
555
521
  else:
556
- pos = self._pos = await ensure_async(self.raw.seek, threaded=True)(target, 0)
522
+ pos = self._pos = await self._seek(target, 0)
557
523
  self._buf_pos = self._buf_stop = 0
558
524
  return pos
559
525
 
@@ -582,7 +548,7 @@ class AsyncTextIOWrapper(TextIOWrapper):
582
548
  write_through=write_through,
583
549
  )
584
550
  self.newline = newline
585
- self._bom = get_bom(self.encoding)
551
+ self._text = ""
586
552
 
587
553
  def __del__(self, /):
588
554
  try:
@@ -608,107 +574,116 @@ class AsyncTextIOWrapper(TextIOWrapper):
608
574
  def __getattr__(self, attr, /):
609
575
  return getattr(self.buffer, attr)
610
576
 
611
- async def aclose(self, /):
612
- buffer = self.buffer
577
+ @cached_property
578
+ def _close(self, /):
613
579
  try:
614
- ret = getattr(buffer, "aclose")()
615
- except (AttributeError, TypeError):
616
- ret = getattr(buffer, "close")()
580
+ return getattr(self.buffer, "aclose")
581
+ except AttributeError:
582
+ return getattr(self.buffer, "close")
583
+
584
+ @cached_property
585
+ def _flush(self, /):
586
+ return ensure_async(self.buffer.flush, threaded=True)
587
+
588
+ @cached_property
589
+ def _read(self, /):
590
+ return ensure_async(self.buffer.read, threaded=True)
591
+
592
+ @cached_property
593
+ def _seek(self, /):
594
+ return ensure_async(getattr(self.buffer, "seek"), threaded=True)
595
+
596
+ @cached_property
597
+ def _tell(self, /):
598
+ return ensure_async(getattr(self.buffer, "tell"), threaded=True)
599
+
600
+ @cached_property
601
+ def _truncate(self, /):
602
+ return ensure_async(self.buffer.truncate, threaded=True)
603
+
604
+ @cached_property
605
+ def _write(self, /):
606
+ return ensure_async(self.buffer.write, threaded=True)
607
+
608
+ async def aclose(self, /):
609
+ ret = self.close()
617
610
  if isawaitable(ret):
618
611
  await ret
619
612
 
620
613
  def close(self, /):
621
- buffer = self.buffer
622
- try:
623
- ret = getattr(buffer, "aclose")()
624
- except (AttributeError, TypeError):
625
- ret = getattr(buffer, "close")()
626
- if isawaitable(ret):
627
- run_async(ret)
614
+ return run_async(self._close())
628
615
 
629
616
  async def flush(self, /):
630
- return await ensure_async(self.buffer.flush, threaded=True)()
617
+ return await self._flush()
631
618
 
632
619
  async def read(self, size: None | int = -1, /) -> str: # type: ignore
633
620
  if self.closed:
634
621
  raise ValueError("I/O operation on closed file.")
635
622
  if size == 0:
636
623
  return ""
637
- if size is None:
624
+ if size is None or size < 0:
638
625
  size = -1
639
- read = ensure_async(self.buffer.read, threaded=True)
640
- encoding = self.encoding
626
+ total = size
627
+ if text := self._text:
628
+ if 0 < size <= len(text):
629
+ self._text = text[size:]
630
+ return text[:size]
631
+ if size > 0:
632
+ size -= len(text)
633
+ decode = getdecoder(self.encoding)
634
+ read = self._read
641
635
  errors = self.errors or "strict"
642
636
  newline = self.newline
643
- if size < 0:
644
- data = await read(-1)
645
- else:
646
- data = await read(size)
647
- if not isinstance(data, Sized):
648
- data = memoryview(data)
637
+ data = await read(size)
649
638
  if size < 0 or buffer_length(data) < size:
650
- text = str(data, encoding, errors)
639
+ text_new, _ = decode(data, errors)
651
640
  if newline is None:
652
- text = CRE_NOT_UNIX_NEWLINES_sub("\n", text)
653
- return text
641
+ text_new = CRE_NOT_UNIX_NEWLINES.sub("\n", text_new)
642
+ self._text = ""
643
+ return text_new
644
+
645
+ ls_parts = [text]
646
+ add_part = ls_parts.append
654
647
 
655
648
  def process_part(data, errors="strict", /) -> int:
656
- text = str(data, encoding, errors)
657
- if newline is None:
658
- text = CRE_NOT_UNIX_NEWLINES_sub("\n", text)
649
+ nonlocal size
650
+ text, n = decode(data, errors)
659
651
  add_part(text)
660
- return len(text)
652
+ if newline is None:
653
+ if text != "\r":
654
+ newlines = CRE_NOT_UNIX_NEWLINES.findall(text)
655
+ size -= len(text) - (sum(map(len, newlines)) - len(newlines) + text.endswith("\r"))
656
+ else:
657
+ size -= len(text)
658
+ return n
661
659
 
662
- ls_parts: list[str] = []
663
- add_part = ls_parts.append
664
- if not isinstance(data, Sized):
665
- data = memoryview(data)
666
- cache = bytes(data)
660
+ cache: bytes | memoryview = memoryview(data)
667
661
  while size and buffer_length(data) == size:
668
662
  while cache:
669
663
  try:
670
- size -= process_part(cache)
671
- cache = b""
664
+ cache = cache[process_part(cache):]
665
+ break
672
666
  except UnicodeDecodeError as e:
673
667
  start, stop = e.start, e.end
674
668
  if start:
675
- size -= process_part(cache[:start])
676
- if e.reason == "truncated data":
677
- if stop == buffer_length(cache):
669
+ process_part(cache[:start])
670
+ if e.reason in ("truncated data", "unexpected end of data"):
671
+ if stop == len(cache):
678
672
  cache = cache[start:]
679
673
  break
680
- else:
681
- while stop < buffer_length(cache):
682
- stop += 1
683
- try:
684
- size -= process_part(cache[start:stop])
685
- cache = cache[stop:]
686
- break_this_loop = True
687
- break
688
- except UnicodeDecodeError as exc:
689
- e = exc
690
- if e.reason != "truncated data":
691
- break
692
- if stop == buffer_length(cache):
693
- cache = cache[start:]
694
- break_this_loop = True
695
- break
696
- if break_this_loop:
697
- break
698
- elif e.reason == "unexpected end of data" and stop == buffer_length(cache):
699
- cache = cache[start:]
700
- break
701
- if errors == "strict":
674
+ elif errors == "strict":
702
675
  raise e
703
- size -= process_part(cache[start:stop], errors)
704
676
  cache = cache[stop:]
705
677
  data = await read(size)
706
- if not isinstance(data, Sized):
707
- data = memoryview(data)
708
- cache += data
709
- if cache:
678
+ cache = memoryview(bytes(cache) + data)
679
+ if size and cache:
710
680
  process_part(cache, errors)
711
- return "".join(ls_parts)
681
+ text_new = "".join(ls_parts)
682
+ if newline is None:
683
+ text_new = CRE_NOT_UNIX_NEWLINES.sub("\n", text_new)
684
+ text += text_new
685
+ self._text = text[total:]
686
+ return text[:total]
712
687
 
713
688
  async def readline(self, size=-1, /) -> str: # type: ignore
714
689
  if self.closed:
@@ -717,232 +692,53 @@ class AsyncTextIOWrapper(TextIOWrapper):
717
692
  return ""
718
693
  if size is None:
719
694
  size = -1
720
- read = ensure_async(self.buffer.read, threaded=True)
721
- seek = self.seek
722
- encoding = self.encoding
723
- errors = self.errors or "strict"
724
695
  newline = self.newline
725
- peek = getattr(self.buffer, "peek", None)
726
- if not callable(peek):
727
- peek = None
728
- if newline:
729
- sepb = bytes(newline, encoding)
730
- if bom := self._bom:
731
- sepb = sepb.removeprefix(bom)
732
- else:
733
- crb = bytes("\r", encoding)
734
- lfb = bytes("\n", encoding)
735
- if bom := self._bom:
736
- crb = crb.removeprefix(bom)
737
- lfb = lfb.removeprefix(bom)
738
- lfb_len = buffer_length(lfb)
739
- buf = bytearray()
740
- text = ""
741
- reach_end = False
742
- if size < 0:
743
- while True:
744
- if peek is None:
745
- while c := await read(1):
746
- buf += c
747
- if newline:
748
- if buf.endswith(sepb):
749
- break
750
- elif buf.endswith(lfb):
751
- break
752
- elif buf.endswith(crb):
753
- peek_maybe_lfb = await read(lfb_len)
754
- if not isinstance(peek_maybe_lfb, Sized):
755
- peek_maybe_lfb = memoryview(peek_maybe_lfb)
756
- if peek_maybe_lfb == lfb:
757
- buf += lfb
758
- elif peek_maybe_lfb:
759
- # TODO: 这是一个提前量,未必需要立即往回 seek,因为转换为 str 后可能尾部不是 \r(因为可以和前面的符号结合),所以这个可能可以被复用,如果需要优化,可以在程序结束时的 finally 部分最终执行 seek(可能最终字符被消耗所以不需要 seek)
760
- o = buffer_length(peek_maybe_lfb)
761
- await seek(-o, 1)
762
- if o < lfb_len:
763
- reach_end = True
764
- break
765
- else:
766
- reach_end = True
696
+ if newline is None:
697
+ newline = "\n"
698
+ def find_stop(text: str, /) -> int:
699
+ if newline:
700
+ idx = text.find(newline)
701
+ if idx > -1:
702
+ idx += len(newline)
767
703
  else:
768
- while True:
769
- buf_stop = buffer_length(buf)
770
- peek_b = peek()
771
- if peek_b:
772
- buf += peek_b
773
- if newline:
774
- if (idx := buf.find(sepb)) > -1:
775
- idx += buffer_length(sepb)
776
- await read(idx - buf_stop)
777
- del buf[idx:]
778
- break
779
- elif (idx := buf.find(lfb)) > -1:
780
- idx += buffer_length(lfb)
781
- await read(idx - buf_stop)
782
- del buf[idx:]
783
- break
784
- elif (idx := buf.find(crb)) > -1:
785
- idx += buffer_length(crb)
786
- await read(idx - buf_stop)
787
- if buf.startswith(lfb, idx):
788
- await read(lfb_len)
789
- del buf[idx+lfb_len:]
790
- else:
791
- del buf[idx:]
792
- break
793
- if peek_b:
794
- await read(buffer_length(peek_b))
795
- c = await read(1)
796
- if not c:
797
- reach_end = True
798
- break
799
- buf += c
800
- while buf:
801
- try:
802
- text += str(buf, encoding)
803
- buf.clear()
804
- except UnicodeDecodeError as e:
805
- start, stop = e.start, e.end
806
- if start:
807
- text += str(buf[:start], encoding)
808
- if e.reason == "truncated data":
809
- if stop == buffer_length(buf):
810
- buf = buf[start:]
811
- break
812
- else:
813
- while stop < buffer_length(buf):
814
- stop += 1
815
- try:
816
- text += str(buf[start:stop], encoding)
817
- buf = buf[stop:]
818
- break_this_loop = True
819
- break
820
- except UnicodeDecodeError as exc:
821
- e = exc
822
- if e.reason != "truncated data":
823
- break
824
- if stop == buffer_length(buf):
825
- buf = buf[start:]
826
- break_this_loop = True
827
- break
828
- if break_this_loop:
829
- break
830
- if e.reason == "unexpected end of data" and stop == buffer_length(buf):
831
- buf = buf[start:]
832
- break
833
- if errors == "strict":
834
- raise
835
- text += str(buf[start:stop], encoding, errors)
836
- buf = buf[stop:]
704
+ idx = 0
705
+ else:
706
+ idx1 = text.find("\r")
707
+ idx2 = text.find("\n")
708
+ if idx1 > -1:
709
+ if idx2 == -1:
710
+ idx = idx1
711
+ elif idx2 - idx1 == 1:
712
+ idx = idx2
713
+ elif idx1 < idx2:
714
+ idx = idx1
715
+ else:
716
+ idx = idx2
837
717
  else:
838
- if newline:
839
- if text.endswith(newline):
840
- return text[:-len(newline)] + "\n"
841
- elif newline is None:
842
- if text.endswith("\r\n"):
843
- return text[:-2] + "\n"
844
- elif text.endswith("\r"):
845
- return text[:-1] + "\n"
846
- elif text.endswith("\n"):
847
- return text
848
- elif text.endswith(("\r\n", "\r", "\n")):
849
- return text
850
- if reach_end:
851
- return text
718
+ idx = idx2
719
+ idx += 1
720
+ return idx
721
+ if size > 0:
722
+ text = await self.read(size)
723
+ stop = find_stop(text)
724
+ if not stop or stop == len(text):
725
+ return text
726
+ self._text = text[stop:] + self._text
727
+ return text[:stop]
852
728
  else:
853
- while True:
854
- rem = size - len(text)
855
- if peek is None:
856
- while rem and (c := await read(1)):
857
- buf += c
858
- rem -= 1
859
- if newline:
860
- if buf.endswith(sepb):
861
- break
862
- elif buf.endswith(lfb):
863
- break
864
- elif buf.endswith(crb):
865
- peek_maybe_lfb = await read(lfb_len)
866
- if not isinstance(peek_maybe_lfb, Sized):
867
- peek_maybe_lfb = memoryview(peek_maybe_lfb)
868
- if peek_maybe_lfb == lfb:
869
- buf += lfb
870
- elif peek_maybe_lfb:
871
- o = buffer_length(peek_maybe_lfb)
872
- await seek(-o, 1)
873
- if o < lfb_len:
874
- reach_end = True
875
- break
729
+ ls_part: list[str] = []
730
+ add_part = ls_part.append
731
+ while text := await self.read(READ_BUFSIZE):
732
+ if stop := find_stop(text):
733
+ if stop == len(text):
734
+ add_part(text)
876
735
  else:
877
- reach_end = True
878
- else:
879
- while rem:
880
- buf_stop = buffer_length(buf)
881
- peek_b = peek()
882
- if peek_b:
883
- if buffer_length(peek_b) >= rem:
884
- buf += peek_b[:rem]
885
- rem = 0
886
- else:
887
- buf += peek_b
888
- rem -= buffer_length(peek_b)
889
- if newline:
890
- if (idx := buf.find(sepb)) > -1:
891
- idx += 1
892
- await read(idx - buf_stop)
893
- del buf[idx:]
894
- break
895
- elif (idx := buf.find(lfb)) > -1:
896
- idx += 1
897
- await read(idx - buf_stop)
898
- del buf[idx:]
899
- break
900
- elif (idx := buf.find(crb)) > -1:
901
- idx += 1
902
- await read(idx - buf_stop)
903
- if buf.startswith(lfb, idx):
904
- await read(lfb_len)
905
- del buf[idx+lfb_len:]
906
- else:
907
- del buf[idx:]
908
- break
909
- if rem:
910
- c = await read(1)
911
- if not c:
912
- reach_end = True
913
- break
914
- rem -= 1
915
- buf += c
916
- while buf:
917
- try:
918
- text += str(buf, encoding)
919
- buf.clear()
920
- except UnicodeDecodeError as e:
921
- start, stop = e.start, e.end
922
- if start:
923
- text += str(buf[:start], encoding)
924
- if e.reason in ("unexpected end of data", "truncated data") and stop == buffer_length(buf):
925
- buf = buf[start:]
926
- break
927
- if errors == "strict":
928
- raise
929
- text += str(buf[start:stop], encoding, errors)
930
- buf = buf[stop:]
736
+ add_part(text[:stop])
737
+ self._text = text[stop:] + self._text
738
+ break
931
739
  else:
932
- if newline:
933
- if text.endswith(newline):
934
- return text[:-len(newline)] + "\n"
935
- elif newline is None:
936
- if text.endswith("\r\n"):
937
- return text[:-2] + "\n"
938
- elif text.endswith("\r"):
939
- return text[:-1] + "\n"
940
- elif text.endswith("\n"):
941
- return text
942
- elif text.endswith(("\r\n", "\r", "\n")):
943
- return text
944
- if reach_end or len(text) == size:
945
- return text
740
+ add_part(text)
741
+ return "".join(ls_part)
946
742
 
947
743
  async def readlines(self, hint=-1, /) -> list[str]: # type: ignore
948
744
  if self.closed:
@@ -978,25 +774,26 @@ class AsyncTextIOWrapper(TextIOWrapper):
978
774
  self.newline = newline
979
775
 
980
776
  async def seek(self, target: int, whence: int = 0, /) -> int: # type: ignore
981
- return await ensure_async(getattr(self.buffer, "seek"), threaded=True)(target, whence)
777
+ pos = self.tell()
778
+ cur = await self._seek(target, whence)
779
+ if cur != pos:
780
+ self._text = ""
781
+ return cur
982
782
 
983
783
  def tell(self, /) -> int:
984
- return getattr(self.buffer, "tell")()
784
+ return self._tell()
985
785
 
986
786
  async def truncate(self, pos: None | int = None, /) -> int: # type: ignore
987
- return await ensure_async(getattr(self.buffer, "truncate"), threaded=True)(pos)
787
+ return await self._truncate(pos)
988
788
 
989
789
  async def write(self, text: str, /) -> int: # type: ignore
990
- match self.newline:
991
- case "" | "\n":
992
- pass
993
- case None:
994
- if linesep != "\n":
995
- text = text.replace("\n", linesep)
996
- case _:
997
- text = text.replace("\n", linesep)
790
+ if newline := self.newline:
791
+ text.replace("\n", newline)
998
792
  data = bytes(text, self.encoding, self.errors or "strict")
999
- await ensure_async(self.buffer.write, threaded=True)(data)
793
+ if self.tell():
794
+ if bom := get_bom(self.encoding):
795
+ data.removeprefix(bom)
796
+ await self._write(data)
1000
797
  if self.write_through or self.line_buffering and ("\n" in text or "\r" in text):
1001
798
  await self.flush()
1002
799
  return len(text)
@@ -1007,11 +804,25 @@ class AsyncTextIOWrapper(TextIOWrapper):
1007
804
  await write(line)
1008
805
 
1009
806
 
807
+ def get_bom(encoding: str, /) -> bytes:
808
+ """get BOM (byte order mark) of the encoding
809
+ """
810
+ bom, _ = getencoder(encoding)("")
811
+ return bom
812
+
813
+
814
+ def buffer_length(b: Buffer, /) -> int:
815
+ try:
816
+ return len(b) # type: ignore
817
+ except:
818
+ return len(memoryview(b))
819
+
820
+
1010
821
  def bio_chunk_iter(
1011
822
  bio: SupportsRead[Buffer] | SupportsReadinto | Callable[[int], Buffer],
1012
823
  /,
1013
824
  size: int = -1,
1014
- chunksize: int = COPY_BUFSIZE,
825
+ chunksize: int = WRITE_BUFSIZE,
1015
826
  can_buffer: bool = False,
1016
827
  callback: None | Callable[[int], Any] = None,
1017
828
  ) -> Iterator[Buffer]:
@@ -1077,16 +888,16 @@ def bio_chunk_iter(
1077
888
 
1078
889
 
1079
890
  async def bio_chunk_async_iter(
1080
- bio: SupportsRead[Buffer] | SupportsRead[Awaitable[Buffer]] | SupportsReadinto | Callable[[int], Buffer | Awaitable[Buffer]],
891
+ bio: SupportsRead[Buffer] | SupportsRead[Awaitable[Buffer]] | SupportsReadinto | Callable[[int], Buffer] | Callable[[int], Awaitable[Buffer]],
1081
892
  /,
1082
893
  size: int = -1,
1083
- chunksize: int = COPY_BUFSIZE,
894
+ chunksize: int = WRITE_BUFSIZE,
1084
895
  can_buffer: bool = False,
1085
896
  callback: None | Callable[[int], Any] = None,
1086
897
  ) -> AsyncIterator[Buffer]:
1087
898
  use_readinto = False
1088
899
  if callable(bio):
1089
- read = ensure_async(bio, threaded=True)
900
+ read: Callable[[int], Awaitable[Buffer]] = ensure_async(bio, threaded=True)
1090
901
  elif can_buffer and isinstance(bio, SupportsReadinto):
1091
902
  readinto = ensure_async(bio.readinto, threaded=True)
1092
903
  use_readinto = True
@@ -1148,7 +959,7 @@ def bio_skip_iter(
1148
959
  bio: SupportsRead[Buffer] | SupportsReadinto | Callable[[int], Buffer],
1149
960
  /,
1150
961
  size: int = -1,
1151
- chunksize: int = COPY_BUFSIZE,
962
+ chunksize: int = WRITE_BUFSIZE,
1152
963
  callback: None | Callable[[int], Any] = None,
1153
964
  ) -> Iterator[int]:
1154
965
  if size == 0:
@@ -1164,7 +975,7 @@ def bio_skip_iter(
1164
975
  length = seek(0, 2) - curpos
1165
976
  except Exception:
1166
977
  if chunksize <= 0:
1167
- chunksize = COPY_BUFSIZE
978
+ chunksize = WRITE_BUFSIZE
1168
979
  if callable(bio):
1169
980
  read = bio
1170
981
  elif hasattr(bio, "readinto"):
@@ -1216,10 +1027,10 @@ def bio_skip_iter(
1216
1027
 
1217
1028
 
1218
1029
  async def bio_skip_async_iter(
1219
- bio: SupportsRead[Buffer] | SupportsRead[Awaitable[Buffer]] | SupportsReadinto | Callable[[int], Buffer | Awaitable[Buffer]],
1030
+ bio: SupportsRead[Buffer] | SupportsRead[Awaitable[Buffer]] | SupportsReadinto | Callable[[int], Buffer] | Callable[[int], Awaitable[Buffer]],
1220
1031
  /,
1221
1032
  size: int = -1,
1222
- chunksize: int = COPY_BUFSIZE,
1033
+ chunksize: int = WRITE_BUFSIZE,
1223
1034
  callback: None | Callable[[int], Any] = None,
1224
1035
  ) -> AsyncIterator[int]:
1225
1036
  if size == 0:
@@ -1235,9 +1046,9 @@ async def bio_skip_async_iter(
1235
1046
  length = (await seek(0, 2)) - curpos
1236
1047
  except Exception:
1237
1048
  if chunksize <= 0:
1238
- chunksize = COPY_BUFSIZE
1049
+ chunksize = WRITE_BUFSIZE
1239
1050
  if callable(bio):
1240
- read = ensure_async(bio, threaded=True)
1051
+ read: Callable[[int], Awaitable[Buffer]] = ensure_async(bio, threaded=True)
1241
1052
  elif hasattr(bio, "readinto"):
1242
1053
  readinto = ensure_async(bio.readinto, threaded=True)
1243
1054
  buf = bytearray(chunksize)
@@ -1458,7 +1269,7 @@ def bytes_iter_to_reader(
1458
1269
  del unconsumed[:]
1459
1270
  try:
1460
1271
  while True:
1461
- b = ensure_bytes(getnext())
1272
+ b = memoryview(getnext())
1462
1273
  if not b:
1463
1274
  continue
1464
1275
  m = n + buffer_length(b)
@@ -1635,7 +1446,7 @@ def bytes_iter_to_async_reader(
1635
1446
  del unconsumed[:]
1636
1447
  try:
1637
1448
  while True:
1638
- b = ensure_bytes(await getnext())
1449
+ b = memoryview(await getnext())
1639
1450
  if not b:
1640
1451
  continue
1641
1452
  m = n + buffer_length(b)
@@ -1740,7 +1551,7 @@ def bytes_iter_to_async_reader(
1740
1551
  def bytes_to_chunk_iter(
1741
1552
  b: Buffer,
1742
1553
  /,
1743
- chunksize: int = COPY_BUFSIZE,
1554
+ chunksize: int = WRITE_BUFSIZE,
1744
1555
  ) -> Iterator[memoryview]:
1745
1556
  m = memoryview(b)
1746
1557
  for i in range(0, buffer_length(m), chunksize):
@@ -1750,7 +1561,7 @@ def bytes_to_chunk_iter(
1750
1561
  async def bytes_to_chunk_async_iter(
1751
1562
  b: Buffer,
1752
1563
  /,
1753
- chunksize: int = COPY_BUFSIZE,
1564
+ chunksize: int = WRITE_BUFSIZE,
1754
1565
  ) -> AsyncIterator[memoryview]:
1755
1566
  m = memoryview(b)
1756
1567
  for i in range(0, buffer_length(m), chunksize):
@@ -1760,7 +1571,7 @@ async def bytes_to_chunk_async_iter(
1760
1571
  def bytes_ensure_part_iter(
1761
1572
  it: Iterable[Buffer],
1762
1573
  /,
1763
- partsize: int = COPY_BUFSIZE,
1574
+ partsize: int = WRITE_BUFSIZE,
1764
1575
  ) -> Iterator[Buffer]:
1765
1576
  n = partsize
1766
1577
  for b in it:
@@ -1788,7 +1599,7 @@ def bytes_ensure_part_iter(
1788
1599
  async def bytes_ensure_part_async_iter(
1789
1600
  it: Iterable[Buffer] | AsyncIterable[Buffer],
1790
1601
  /,
1791
- partsize: int = COPY_BUFSIZE,
1602
+ partsize: int = WRITE_BUFSIZE,
1792
1603
  ) -> AsyncIterator[Buffer]:
1793
1604
  n = partsize
1794
1605
  async for b in ensure_aiter(it):
@@ -1813,7 +1624,7 @@ async def bytes_ensure_part_async_iter(
1813
1624
  n = partsize
1814
1625
 
1815
1626
 
1816
- def progress_bytes_iter(
1627
+ def progress_bytes_iter[**Args](
1817
1628
  it: Iterable[Buffer] | Callable[[], Buffer],
1818
1629
  make_progress: None | Callable[Args, Any] = None,
1819
1630
  /,
@@ -1846,7 +1657,7 @@ def progress_bytes_iter(
1846
1657
  close_progress()
1847
1658
 
1848
1659
 
1849
- async def progress_bytes_async_iter(
1660
+ async def progress_bytes_async_iter[**Args](
1850
1661
  it: Iterable[Buffer] | AsyncIterable[Buffer] | Callable[[], Buffer] | Callable[[], Awaitable[Buffer]],
1851
1662
  make_progress: None | Callable[Args, Any] = None,
1852
1663
  /,
@@ -1897,10 +1708,10 @@ def copyfileobj(
1897
1708
  fsrc,
1898
1709
  fdst: SupportsWrite[Buffer],
1899
1710
  /,
1900
- chunksize: int = COPY_BUFSIZE,
1711
+ chunksize: int = WRITE_BUFSIZE,
1901
1712
  ):
1902
1713
  if chunksize <= 0:
1903
- chunksize = COPY_BUFSIZE
1714
+ chunksize = WRITE_BUFSIZE
1904
1715
  fdst_write = fdst.write
1905
1716
  fsrc_read = getattr(fsrc, "read", None)
1906
1717
  fsrc_readinto = getattr(fsrc, "readinto", None)
@@ -1922,11 +1733,11 @@ async def copyfileobj_async(
1922
1733
  fsrc,
1923
1734
  fdst: SupportsWrite[Buffer],
1924
1735
  /,
1925
- chunksize: int = COPY_BUFSIZE,
1736
+ chunksize: int = WRITE_BUFSIZE,
1926
1737
  threaded: bool = True,
1927
1738
  ):
1928
1739
  if chunksize <= 0:
1929
- chunksize = COPY_BUFSIZE
1740
+ chunksize = WRITE_BUFSIZE
1930
1741
  fdst_write = ensure_async(fdst.write, threaded=threaded)
1931
1742
  fsrc_read = getattr(fsrc, "read", None)
1932
1743
  fsrc_readinto = getattr(fsrc, "readinto", None)
@@ -1946,3 +1757,72 @@ async def copyfileobj_async(
1946
1757
  if chunk:
1947
1758
  await fdst_write(chunk)
1948
1759
 
1760
+
1761
+ def bound_bufferd_reader(
1762
+ file: SupportsRead[Buffer],
1763
+ size: int = -1,
1764
+ ) -> SupportsRead[Buffer]:
1765
+ if size < 0:
1766
+ return file
1767
+ f_read = file.read
1768
+ f_readinto = getattr(file, "readinto", None)
1769
+ class Reader:
1770
+ @staticmethod
1771
+ def read(n: None | int = -1, /) -> Buffer:
1772
+ nonlocal size
1773
+ if n == 0 or size <= 0:
1774
+ return b""
1775
+ elif n is None or n < 0:
1776
+ data = f_read(size)
1777
+ size = 0
1778
+ else:
1779
+ data = f_read(min(size, n))
1780
+ size -= buffer_length(data)
1781
+ return data
1782
+ @staticmethod
1783
+ def readinto(buffer, /) -> int:
1784
+ nonlocal size
1785
+ if f_readinto is None:
1786
+ raise NotImplementedError("readinto")
1787
+ if size > 0:
1788
+ n = f_readinto(memoryview(buffer)[:size])
1789
+ size -= n
1790
+ return n
1791
+ return 0
1792
+ return Reader()
1793
+
1794
+
1795
+ def bound_bufferd_async_reader(
1796
+ file: SupportsRead[Buffer] | SupportsRead[Awaitable[Buffer]],
1797
+ size: int = -1,
1798
+ ) -> SupportsRead[Awaitable[Buffer]]:
1799
+ f_read: Callable[[int], Awaitable[Buffer]] = ensure_async(file.read, threaded=True)
1800
+ f_readinto = getattr(file, "readinto", None)
1801
+ if f_readinto is not None:
1802
+ f_readinto = ensure_async(f_readinto, threaded=True)
1803
+ at_end = False
1804
+ class Reader:
1805
+ @staticmethod
1806
+ async def read(n: None | int = -1, /) -> Buffer:
1807
+ nonlocal size
1808
+ if n == 0 or size <= 0:
1809
+ return b""
1810
+ elif n is None or n < 0:
1811
+ data = await f_read(size)
1812
+ size = 0
1813
+ else:
1814
+ data = await f_read(min(size, n))
1815
+ size -= buffer_length(data)
1816
+ return data
1817
+ @staticmethod
1818
+ async def readinto(buffer, /) -> int:
1819
+ nonlocal size
1820
+ if f_readinto is None:
1821
+ raise NotImplementedError("readinto")
1822
+ if size > 0:
1823
+ n = await f_readinto(memoryview(buffer)[:size])
1824
+ size -= n
1825
+ return n
1826
+ return 0
1827
+ return Reader()
1828
+
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "python-filewrap"
3
- version = "0.2.6.1"
3
+ version = "0.2.7"
4
4
  description = "Python file wrappers."
5
5
  authors = ["ChenyangGao <wosiwujm@gmail.com>"]
6
6
  license = "MIT"
@@ -13,7 +13,7 @@ classifiers = [
13
13
  "Development Status :: 5 - Production/Stable",
14
14
  "Programming Language :: Python",
15
15
  "Programming Language :: Python :: 3",
16
- "Programming Language :: Python :: 3.10",
16
+ "Programming Language :: Python :: 3.12",
17
17
  "Programming Language :: Python :: 3 :: Only",
18
18
  "Operating System :: OS Independent",
19
19
  "Intended Audience :: Developers",
@@ -26,8 +26,8 @@ include = [
26
26
  ]
27
27
 
28
28
  [tool.poetry.dependencies]
29
- python = "^3.10"
30
- python-asynctools = ">=0.0.5"
29
+ python = "^3.12"
30
+ python-asynctools = ">=0.1"
31
31
  python-property = ">=0.0.3"
32
32
 
33
33
  [build-system]