omlish 0.0.0.dev102__py3-none-any.whl → 0.0.0.dev104__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- omlish/__about__.py +2 -2
- omlish/concurrent/threadlets.py +3 -0
- omlish/diag/pycharm/pycharm.py +33 -0
- omlish/fnpairs.py +22 -12
- omlish/formats/json/cli/cli.py +35 -1
- omlish/formats/json/cli/formats.py +7 -0
- omlish/formats/json/cli/io.py +74 -0
- omlish/http/consts.py +2 -0
- omlish/http/jwt.py +179 -0
- omlish/io/__init__.py +3 -0
- omlish/io/pyio.py +2757 -0
- omlish/io/trampoline.py +293 -0
- omlish/lite/cached.py +9 -1
- omlish/lite/contextmanagers.py +34 -0
- omlish/lite/marshal.py +72 -29
- omlish/lite/pidfile.py +1 -1
- omlish/lite/subprocesses.py +18 -0
- omlish/specs/__init__.py +2 -0
- omlish/sync.py +55 -0
- {omlish-0.0.0.dev102.dist-info → omlish-0.0.0.dev104.dist-info}/METADATA +1 -1
- {omlish-0.0.0.dev102.dist-info → omlish-0.0.0.dev104.dist-info}/RECORD +25 -20
- {omlish-0.0.0.dev102.dist-info → omlish-0.0.0.dev104.dist-info}/LICENSE +0 -0
- {omlish-0.0.0.dev102.dist-info → omlish-0.0.0.dev104.dist-info}/WHEEL +0 -0
- {omlish-0.0.0.dev102.dist-info → omlish-0.0.0.dev104.dist-info}/entry_points.txt +0 -0
- {omlish-0.0.0.dev102.dist-info → omlish-0.0.0.dev104.dist-info}/top_level.txt +0 -0
omlish/io/pyio.py
ADDED
@@ -0,0 +1,2757 @@
|
|
1
|
+
# type: ignore
|
2
|
+
# ruff: noqa
|
3
|
+
# flake8: ignore
|
4
|
+
"""
|
5
|
+
Python implementation of the io module.
|
6
|
+
|
7
|
+
https://github.com/python/cpython/blob/8fa4dc4ba8646c59f945f2451c53e2919f066065/Lib/_pyio.py
|
8
|
+
"""
|
9
|
+
# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
|
10
|
+
# --------------------------------------------
|
11
|
+
#
|
12
|
+
# 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and the Individual or Organization
|
13
|
+
# ("Licensee") accessing and otherwise using this software ("Python") in source or binary form and its associated
|
14
|
+
# documentation.
|
15
|
+
#
|
16
|
+
# 2. Subject to the terms and conditions of this License Agreement, PSF hereby grants Licensee a nonexclusive,
|
17
|
+
# royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative
|
18
|
+
# works, distribute, and otherwise use Python alone or in any derivative version, provided, however, that PSF's License
|
19
|
+
# Agreement and PSF's notice of copyright, i.e., "Copyright (c) 2001-2024 Python Software Foundation; All Rights
|
20
|
+
# Reserved" are retained in Python alone or in any derivative version prepared by Licensee.
|
21
|
+
#
|
22
|
+
# 3. In the event Licensee prepares a derivative work that is based on or incorporates Python or any part thereof, and
|
23
|
+
# wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in
|
24
|
+
# any such work a brief summary of the changes made to Python.
|
25
|
+
#
|
26
|
+
# 4. PSF is making Python available to Licensee on an "AS IS" basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES,
|
27
|
+
# EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY
|
28
|
+
# OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT INFRINGE ANY THIRD PARTY
|
29
|
+
# RIGHTS.
|
30
|
+
#
|
31
|
+
# 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL
|
32
|
+
# DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, OR ANY DERIVATIVE THEREOF, EVEN IF
|
33
|
+
# ADVISED OF THE POSSIBILITY THEREOF.
|
34
|
+
#
|
35
|
+
# 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions.
|
36
|
+
#
|
37
|
+
# 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint
|
38
|
+
# venture between PSF and Licensee. This License Agreement does not grant permission to use PSF trademarks or trade
|
39
|
+
# name in a trademark sense to endorse or promote products or services of Licensee, or any third party.
|
40
|
+
#
|
41
|
+
# 8. By copying, installing or otherwise using Python, Licensee agrees to be bound by the terms and conditions of this
|
42
|
+
# License Agreement.
|
43
|
+
import abc
|
44
|
+
import codecs
|
45
|
+
import errno
|
46
|
+
import io
|
47
|
+
import locale
|
48
|
+
import os
|
49
|
+
import stat
|
50
|
+
import sys
|
51
|
+
import threading
|
52
|
+
import warnings
|
53
|
+
|
54
|
+
|
55
|
+
valid_seek_flags = {0, 1, 2} # Hardwired values
|
56
|
+
if hasattr(os, 'SEEK_HOLE'):
|
57
|
+
valid_seek_flags.add(os.SEEK_HOLE)
|
58
|
+
valid_seek_flags.add(os.SEEK_DATA)
|
59
|
+
|
60
|
+
# open() uses st_blksize whenever we can
|
61
|
+
DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
|
62
|
+
|
63
|
+
# NOTE: Base classes defined here are registered with the "official" ABCs defined in io.py. We don't use real
|
64
|
+
# inheritance though, because we don't want to inherit the C implementations.
|
65
|
+
|
66
|
+
# Does open() check its 'errors' argument?
|
67
|
+
_CHECK_ERRORS = hasattr(sys, 'gettotalrefcount') or sys.flags.dev_mode
|
68
|
+
|
69
|
+
|
70
|
+
def text_encoding(encoding, stacklevel=2):
|
71
|
+
"""
|
72
|
+
A helper function to choose the text encoding.
|
73
|
+
|
74
|
+
When encoding is not None, this function returns it. Otherwise, this function returns the default text encoding
|
75
|
+
(i.e. "locale" or "utf-8" depends on UTF-8 mode).
|
76
|
+
|
77
|
+
This function emits an EncodingWarning if *encoding* is None and sys.flags.warn_default_encoding is true.
|
78
|
+
|
79
|
+
This can be used in APIs with an encoding=None parameter that pass it to TextIOWrapper or open. However, please
|
80
|
+
consider using encoding="utf-8" for new APIs.
|
81
|
+
"""
|
82
|
+
|
83
|
+
if encoding is None:
|
84
|
+
if sys.flags.utf8_mode:
|
85
|
+
encoding = 'utf-8'
|
86
|
+
else:
|
87
|
+
encoding = 'locale'
|
88
|
+
|
89
|
+
if sys.flags.warn_default_encoding:
|
90
|
+
warnings.warn("'encoding' argument not specified.", EncodingWarning, stacklevel + 1)
|
91
|
+
|
92
|
+
return encoding
|
93
|
+
|
94
|
+
|
95
|
+
# Wrapper for builtins.open
|
96
|
+
#
|
97
|
+
# Trick so that open() won't become a bound method when stored as a class variable (as dbm.dumb does).
|
98
|
+
#
|
99
|
+
# See init_set_builtins_open() in Python/pylifecycle.c.
|
100
|
+
def open(
|
101
|
+
file,
|
102
|
+
mode='r',
|
103
|
+
buffering=-1,
|
104
|
+
encoding=None,
|
105
|
+
errors=None,
|
106
|
+
newline=None,
|
107
|
+
closefd=True,
|
108
|
+
opener=None,
|
109
|
+
):
|
110
|
+
r"""
|
111
|
+
Open file and return a stream. Raise OSError upon failure.
|
112
|
+
|
113
|
+
file is either a text or byte string giving the name (and the path if the file isn't in the current working
|
114
|
+
directory) of the file to be opened or an integer file descriptor of the file to be wrapped. (If a file descriptor
|
115
|
+
is given, it is closed when the returned I/O object is closed, unless closefd is set to False.)
|
116
|
+
|
117
|
+
mode is an optional string that specifies the mode in which the file is opened. It defaults to 'r' which means open
|
118
|
+
for reading in text mode. Other common values are 'w' for writing (truncating the file if it already exists), 'x'
|
119
|
+
for exclusive creation of a new file, and 'a' for appending (which on some Unix systems, means that all writes
|
120
|
+
append to the end of the file regardless of the current seek position). In text mode, if encoding is not specified
|
121
|
+
the encoding used is platform dependent. (For reading and writing raw bytes use binary mode and leave encoding
|
122
|
+
unspecified.) The available modes are:
|
123
|
+
|
124
|
+
========= ===============================================================
|
125
|
+
Character Meaning
|
126
|
+
--------- ---------------------------------------------------------------
|
127
|
+
'r' open for reading (default)
|
128
|
+
'w' open for writing, truncating the file first
|
129
|
+
'x' create a new file and open it for writing
|
130
|
+
'a' open for writing, appending to the end of the file if it exists
|
131
|
+
'b' binary mode
|
132
|
+
't' text mode (default)
|
133
|
+
'+' open a disk file for updating (reading and writing)
|
134
|
+
========= ===============================================================
|
135
|
+
|
136
|
+
The default mode is 'rt' (open for reading text). For binary random access, the mode 'w+b' opens and truncates the
|
137
|
+
file to 0 bytes, while 'r+b' opens the file without truncation. The 'x' mode implies 'w' and raises an
|
138
|
+
`FileExistsError` if the file already exists.
|
139
|
+
|
140
|
+
Python distinguishes between files opened in binary and text modes, even when the underlying operating system
|
141
|
+
doesn't. Files opened in binary mode (appending 'b' to the mode argument) return contents as bytes objects without
|
142
|
+
any decoding. In text mode (the default, or when 't' is appended to the mode argument), the contents of the file are
|
143
|
+
returned as strings, the bytes having been first decoded using a platform-dependent encoding or using the specified
|
144
|
+
encoding if given.
|
145
|
+
|
146
|
+
buffering is an optional integer used to set the buffering policy. Pass 0 to switch buffering off (only allowed in
|
147
|
+
binary mode), 1 to select line buffering (only usable in text mode), and an integer > 1 to indicate the size of a
|
148
|
+
fixed-size chunk buffer. When no buffering argument is given, the default buffering policy works as follows:
|
149
|
+
|
150
|
+
* Binary files are buffered in fixed-size chunks; the size of the buffer is chosen using a heuristic trying to
|
151
|
+
determine the underlying device's "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`. On many systems, the
|
152
|
+
buffer will typically be 4096 or 8192 bytes long.
|
153
|
+
|
154
|
+
* "Interactive" text files (files for which isatty() returns True) use line buffering. Other text files use the
|
155
|
+
policy described above for binary files.
|
156
|
+
|
157
|
+
encoding is the str name of the encoding used to decode or encode the file. This should only be used in text mode.
|
158
|
+
The default encoding is platform dependent, but any encoding supported by Python can be passed. See the codecs
|
159
|
+
module for the list of supported encodings.
|
160
|
+
|
161
|
+
errors is an optional string that specifies how encoding errors are to be handled---this argument should not be used
|
162
|
+
in binary mode. Pass 'strict' to raise a ValueError exception if there is an encoding error (the default of None has
|
163
|
+
the same effect), or pass 'ignore' to ignore errors. (Note that ignoring encoding errors can lead to data loss.) See
|
164
|
+
the documentation for codecs.register for a list of the permitted encoding error strings.
|
165
|
+
|
166
|
+
newline is a string controlling how universal newlines works (it only applies to text mode). It can be None, '',
|
167
|
+
'\n', '\r', and '\r\n'. It works as follows:
|
168
|
+
|
169
|
+
* On input, if newline is None, universal newlines mode is enabled. Lines in the input can end in '\n', '\r', or
|
170
|
+
'\r\n', and these are translated into '\n' before being returned to the caller. If it is '', universal newline
|
171
|
+
mode is enabled, but line endings are returned to the caller untranslated. If it has any of the other legal
|
172
|
+
values, input lines are only terminated by the given string, and the line ending is returned to the caller
|
173
|
+
untranslated.
|
174
|
+
|
175
|
+
* On output, if newline is None, any '\n' characters written are translated to the system default line separator,
|
176
|
+
os.linesep. If newline is '', no translation takes place. If newline is any of the other legal values, any '\n'
|
177
|
+
characters written are translated to the given string.
|
178
|
+
|
179
|
+
closedfd is a bool. If closefd is False, the underlying file descriptor will be kept open when the file is closed.
|
180
|
+
This does not work when a file name is given and must be True in that case.
|
181
|
+
|
182
|
+
The newly created file is non-inheritable.
|
183
|
+
|
184
|
+
A custom opener can be used by passing a callable as *opener*. The underlying file descriptor for the file object is
|
185
|
+
then obtained by calling *opener* with (*file*, *flags*). *opener* must return an open file descriptor (passing
|
186
|
+
os.open as *opener* results in functionality similar to passing None).
|
187
|
+
|
188
|
+
open() returns a file object whose type depends on the mode, and through which the standard file operations such as
|
189
|
+
reading and writing are performed. When open() is used to open a file in a text mode ('w', 'r', 'wt', 'rt', etc.),
|
190
|
+
it returns a TextIOWrapper. When used to open a file in a binary mode, the returned class varies: in read binary
|
191
|
+
mode, it returns a BufferedReader; in write binary and append binary modes, it returns a BufferedWriter, and in
|
192
|
+
read/write mode, it returns a BufferedRandom.
|
193
|
+
|
194
|
+
It is also possible to use a string or bytearray as a file for both reading and writing. For strings StringIO can be
|
195
|
+
used like a file opened in a text mode, and for bytes a BytesIO can be used like a file opened in a binary mode.
|
196
|
+
"""
|
197
|
+
|
198
|
+
if not isinstance(file, int):
|
199
|
+
file = os.fspath(file)
|
200
|
+
if not isinstance(file, (str, bytes, int)):
|
201
|
+
raise TypeError('invalid file: %r' % file)
|
202
|
+
if not isinstance(mode, str):
|
203
|
+
raise TypeError('invalid mode: %r' % mode)
|
204
|
+
if not isinstance(buffering, int):
|
205
|
+
raise TypeError('invalid buffering: %r' % buffering)
|
206
|
+
if encoding is not None and not isinstance(encoding, str):
|
207
|
+
raise TypeError('invalid encoding: %r' % encoding)
|
208
|
+
if errors is not None and not isinstance(errors, str):
|
209
|
+
raise TypeError('invalid errors: %r' % errors)
|
210
|
+
|
211
|
+
modes = set(mode)
|
212
|
+
if modes - set('axrwb+t') or len(mode) > len(modes):
|
213
|
+
raise ValueError('invalid mode: %r' % mode)
|
214
|
+
creating = 'x' in modes
|
215
|
+
reading = 'r' in modes
|
216
|
+
writing = 'w' in modes
|
217
|
+
appending = 'a' in modes
|
218
|
+
updating = '+' in modes
|
219
|
+
text = 't' in modes
|
220
|
+
binary = 'b' in modes
|
221
|
+
if text and binary:
|
222
|
+
raise ValueError("can't have text and binary mode at once")
|
223
|
+
if creating + reading + writing + appending > 1:
|
224
|
+
raise ValueError("can't have read/write/append mode at once")
|
225
|
+
if not (creating or reading or writing or appending):
|
226
|
+
raise ValueError('must have exactly one of read/write/append mode')
|
227
|
+
if binary and encoding is not None:
|
228
|
+
raise ValueError("binary mode doesn't take an encoding argument")
|
229
|
+
if binary and errors is not None:
|
230
|
+
raise ValueError("binary mode doesn't take an errors argument")
|
231
|
+
if binary and newline is not None:
|
232
|
+
raise ValueError("binary mode doesn't take a newline argument")
|
233
|
+
if binary and buffering == 1:
|
234
|
+
warnings.warn(
|
235
|
+
"line buffering (buffering=1) isn't supported in binary "
|
236
|
+
"mode, the default buffer size will be used",
|
237
|
+
RuntimeWarning,
|
238
|
+
2,
|
239
|
+
)
|
240
|
+
|
241
|
+
raw = FileIO(
|
242
|
+
file,
|
243
|
+
(creating and 'x' or '')
|
244
|
+
+ (reading and 'r' or '')
|
245
|
+
+ (writing and 'w' or '')
|
246
|
+
+ (appending and 'a' or '')
|
247
|
+
+ (updating and '+' or ''),
|
248
|
+
closefd,
|
249
|
+
opener=opener,
|
250
|
+
)
|
251
|
+
|
252
|
+
result = raw
|
253
|
+
try:
|
254
|
+
line_buffering = False
|
255
|
+
if buffering == 1 or buffering < 0 and raw._isatty_open_only():
|
256
|
+
buffering = -1
|
257
|
+
line_buffering = True
|
258
|
+
if buffering < 0:
|
259
|
+
buffering = raw._blksize
|
260
|
+
if buffering < 0:
|
261
|
+
raise ValueError('invalid buffering size')
|
262
|
+
if buffering == 0:
|
263
|
+
if binary:
|
264
|
+
return result
|
265
|
+
raise ValueError("can't have unbuffered text I/O")
|
266
|
+
|
267
|
+
if updating:
|
268
|
+
buffer = BufferedRandom(raw, buffering)
|
269
|
+
elif creating or writing or appending:
|
270
|
+
buffer = BufferedWriter(raw, buffering)
|
271
|
+
elif reading:
|
272
|
+
buffer = BufferedReader(raw, buffering)
|
273
|
+
else:
|
274
|
+
raise ValueError('unknown mode: %r' % mode)
|
275
|
+
|
276
|
+
result = buffer
|
277
|
+
if binary:
|
278
|
+
return result
|
279
|
+
|
280
|
+
encoding = text_encoding(encoding)
|
281
|
+
text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
|
282
|
+
result = text
|
283
|
+
text.mode = mode
|
284
|
+
return result
|
285
|
+
|
286
|
+
except: # noqa
|
287
|
+
result.close()
|
288
|
+
raise
|
289
|
+
|
290
|
+
|
291
|
+
# Define a default pure-Python implementation for open_code() that does not allow hooks. Warn on first use. Defined for
|
292
|
+
# tests.
|
293
|
+
def _open_code_with_warning(path):
|
294
|
+
"""
|
295
|
+
Opens the provided file with mode ``'rb'``. This function should be used when the intent is to treat the contents as
|
296
|
+
executable code.
|
297
|
+
|
298
|
+
``path`` should be an absolute path.
|
299
|
+
|
300
|
+
When supported by the runtime, this function can be hooked in order to allow embedders more control over code files.
|
301
|
+
This functionality is not supported on the current runtime.
|
302
|
+
"""
|
303
|
+
|
304
|
+
warnings.warn('_pyio.open_code() may not be using hooks', RuntimeWarning, 2)
|
305
|
+
return open(path, 'rb')
|
306
|
+
|
307
|
+
|
308
|
+
try:
|
309
|
+
open_code = io.open_code
|
310
|
+
except AttributeError:
|
311
|
+
open_code = _open_code_with_warning
|
312
|
+
|
313
|
+
|
314
|
+
# In normal operation, both `UnsupportedOperation`s should be bound to the same object.
|
315
|
+
UnsupportedOperation = io.UnsupportedOperation
|
316
|
+
|
317
|
+
|
318
|
+
@io.IOBase.register
|
319
|
+
class IOBase(metaclass=abc.ABCMeta):
|
320
|
+
"""
|
321
|
+
The abstract base class for all I/O classes.
|
322
|
+
|
323
|
+
This class provides dummy implementations for many methods that derived classes can override selectively; the
|
324
|
+
default implementations represent a file that cannot be read, written or seeked.
|
325
|
+
|
326
|
+
Even though IOBase does not declare read or write because their signatures will vary, implementations and clients
|
327
|
+
should consider those methods part of the interface. Also, implementations may raise UnsupportedOperation when
|
328
|
+
operations they do not support are called.
|
329
|
+
|
330
|
+
The basic type used for binary data read from or written to a file is bytes. Other bytes-like objects are accepted
|
331
|
+
as method arguments too. Text I/O classes work with str data.
|
332
|
+
|
333
|
+
Note that calling any method (even inquiries) on a closed stream is undefined. Implementations may raise OSError in
|
334
|
+
this case.
|
335
|
+
|
336
|
+
IOBase (and its subclasses) support the iterator protocol, meaning that an IOBase object can be iterated over
|
337
|
+
yielding the lines in a stream.
|
338
|
+
|
339
|
+
IOBase also supports the :keyword:`with` statement. In this example, fp is closed after the suite of the with
|
340
|
+
statement is complete:
|
341
|
+
|
342
|
+
with open('spam.txt', 'r') as fp:
|
343
|
+
fp.write('Spam and eggs!')
|
344
|
+
"""
|
345
|
+
|
346
|
+
# Internal
|
347
|
+
|
348
|
+
def _unsupported(self, name):
|
349
|
+
"""Internal: raise an OSError exception for unsupported operations."""
|
350
|
+
|
351
|
+
raise UnsupportedOperation('%s.%s() not supported' % (self.__class__.__name__, name))
|
352
|
+
|
353
|
+
# Positioning
|
354
|
+
|
355
|
+
def seek(self, pos, whence=0):
|
356
|
+
"""
|
357
|
+
Change stream position.
|
358
|
+
|
359
|
+
Change the stream position to byte offset pos. Argument pos is interpreted relative to the position indicated by
|
360
|
+
whence. Values for whence are ints:
|
361
|
+
|
362
|
+
* 0 -- start of stream (the default); offset should be zero or positive
|
363
|
+
* 1 -- current stream position; offset may be negative
|
364
|
+
* 2 -- end of stream; offset is usually negative
|
365
|
+
Some operating systems / file systems could provide additional values.
|
366
|
+
|
367
|
+
Return an int indicating the new absolute position.
|
368
|
+
"""
|
369
|
+
|
370
|
+
self._unsupported('seek')
|
371
|
+
|
372
|
+
def tell(self):
|
373
|
+
"""Return an int indicating the current stream position."""
|
374
|
+
|
375
|
+
return self.seek(0, 1)
|
376
|
+
|
377
|
+
def truncate(self, pos=None):
|
378
|
+
"""
|
379
|
+
Truncate file to size bytes.
|
380
|
+
|
381
|
+
Size defaults to the current IO position as reported by tell(). Return the new size.
|
382
|
+
"""
|
383
|
+
|
384
|
+
self._unsupported('truncate')
|
385
|
+
|
386
|
+
# Flush and close
|
387
|
+
|
388
|
+
def flush(self):
|
389
|
+
"""
|
390
|
+
Flush write buffers, if applicable.
|
391
|
+
|
392
|
+
This is not implemented for read-only and non-blocking streams.
|
393
|
+
"""
|
394
|
+
|
395
|
+
self._checkClosed()
|
396
|
+
# XXX Should this return the number of bytes written???
|
397
|
+
|
398
|
+
__closed = False
|
399
|
+
|
400
|
+
def close(self):
|
401
|
+
"""
|
402
|
+
Flush and close the IO object.
|
403
|
+
|
404
|
+
This method has no effect if the file is already closed.
|
405
|
+
"""
|
406
|
+
|
407
|
+
if not self.__closed:
|
408
|
+
try:
|
409
|
+
self.flush()
|
410
|
+
finally:
|
411
|
+
self.__closed = True
|
412
|
+
|
413
|
+
def __del__(self):
|
414
|
+
"""Destructor. Calls close()."""
|
415
|
+
|
416
|
+
try:
|
417
|
+
closed = self.closed
|
418
|
+
except AttributeError:
|
419
|
+
# If getting closed fails, then the object is probably in an unusable state, so ignore.
|
420
|
+
return
|
421
|
+
|
422
|
+
if closed:
|
423
|
+
return
|
424
|
+
|
425
|
+
# If close() fails, the caller logs the exception with sys.unraisablehook. close() must be called at the end at
|
426
|
+
# __del__().
|
427
|
+
self.close()
|
428
|
+
|
429
|
+
# Inquiries
|
430
|
+
|
431
|
+
def seekable(self):
|
432
|
+
"""
|
433
|
+
Return a bool indicating whether object supports random access.
|
434
|
+
|
435
|
+
If False, seek(), tell() and truncate() will raise OSError.
|
436
|
+
This method may need to do a test seek().
|
437
|
+
"""
|
438
|
+
|
439
|
+
return False
|
440
|
+
|
441
|
+
def _checkSeekable(self, msg=None):
|
442
|
+
"""Internal: raise UnsupportedOperation if file is not seekable"""
|
443
|
+
|
444
|
+
if not self.seekable():
|
445
|
+
raise UnsupportedOperation('File or stream is not seekable.' if msg is None else msg)
|
446
|
+
|
447
|
+
def readable(self):
|
448
|
+
"""
|
449
|
+
Return a bool indicating whether object was opened for reading.
|
450
|
+
|
451
|
+
If False, read() will raise OSError.
|
452
|
+
"""
|
453
|
+
|
454
|
+
return False
|
455
|
+
|
456
|
+
def _checkReadable(self, msg=None):
|
457
|
+
"""Internal: raise UnsupportedOperation if file is not readable"""
|
458
|
+
|
459
|
+
if not self.readable():
|
460
|
+
raise UnsupportedOperation('File or stream is not readable.' if msg is None else msg)
|
461
|
+
|
462
|
+
def writable(self):
|
463
|
+
"""
|
464
|
+
Return a bool indicating whether object was opened for writing.
|
465
|
+
|
466
|
+
If False, write() and truncate() will raise OSError.
|
467
|
+
"""
|
468
|
+
|
469
|
+
return False
|
470
|
+
|
471
|
+
def _checkWritable(self, msg=None):
|
472
|
+
"""Internal: raise UnsupportedOperation if file is not writable"""
|
473
|
+
|
474
|
+
if not self.writable():
|
475
|
+
raise UnsupportedOperation('File or stream is not writable.' if msg is None else msg)
|
476
|
+
|
477
|
+
@property
|
478
|
+
def closed(self):
|
479
|
+
"""
|
480
|
+
closed: bool. True iff the file has been closed.
|
481
|
+
|
482
|
+
For backwards compatibility, this is a property, not a predicate.
|
483
|
+
"""
|
484
|
+
|
485
|
+
return self.__closed
|
486
|
+
|
487
|
+
def _checkClosed(self, msg=None):
|
488
|
+
"""Internal: raise a ValueError if file is closed"""
|
489
|
+
|
490
|
+
if self.closed:
|
491
|
+
raise ValueError('I/O operation on closed file.' if msg is None else msg)
|
492
|
+
|
493
|
+
# Context manager
|
494
|
+
|
495
|
+
def __enter__(self): # That's a forward reference
|
496
|
+
"""Context management protocol. Returns self (an instance of IOBase)."""
|
497
|
+
|
498
|
+
self._checkClosed()
|
499
|
+
return self
|
500
|
+
|
501
|
+
def __exit__(self, *args):
|
502
|
+
"""Context management protocol. Calls close()"""
|
503
|
+
|
504
|
+
self.close()
|
505
|
+
|
506
|
+
# Lower-level APIs
|
507
|
+
|
508
|
+
# XXX Should these be present even if unimplemented?
|
509
|
+
|
510
|
+
def fileno(self):
|
511
|
+
"""
|
512
|
+
Returns underlying file descriptor (an int) if one exists.
|
513
|
+
|
514
|
+
An OSError is raised if the IO object does not use a file descriptor.
|
515
|
+
"""
|
516
|
+
|
517
|
+
self._unsupported('fileno')
|
518
|
+
|
519
|
+
def isatty(self):
|
520
|
+
"""
|
521
|
+
Return a bool indicating whether this is an 'interactive' stream.
|
522
|
+
|
523
|
+
Return False if it can't be determined.
|
524
|
+
"""
|
525
|
+
|
526
|
+
self._checkClosed()
|
527
|
+
return False
|
528
|
+
|
529
|
+
# Readline[s] and writelines
|
530
|
+
|
531
|
+
def readline(self, size=-1):
|
532
|
+
r"""
|
533
|
+
Read and return a line of bytes from the stream.
|
534
|
+
|
535
|
+
If size is specified, at most size bytes will be read. Size should be an int.
|
536
|
+
|
537
|
+
The line terminator is always b'\n' for binary files; for text files, the newlines argument to open can be used
|
538
|
+
to select the line terminator(s) recognized.
|
539
|
+
"""
|
540
|
+
|
541
|
+
# For backwards compatibility, a (slowish) readline().
|
542
|
+
if hasattr(self, 'peek'):
|
543
|
+
def nreadahead():
|
544
|
+
readahead = self.peek(1)
|
545
|
+
if not readahead:
|
546
|
+
return 1
|
547
|
+
n = (readahead.find(b'\n') + 1) or len(readahead)
|
548
|
+
if size >= 0:
|
549
|
+
n = min(n, size)
|
550
|
+
return n
|
551
|
+
|
552
|
+
else:
|
553
|
+
def nreadahead():
|
554
|
+
return 1
|
555
|
+
|
556
|
+
if size is None:
|
557
|
+
size = -1
|
558
|
+
else:
|
559
|
+
try:
|
560
|
+
size_index = size.__index__
|
561
|
+
except AttributeError:
|
562
|
+
raise TypeError(f'{size!r} is not an integer')
|
563
|
+
else:
|
564
|
+
size = size_index()
|
565
|
+
|
566
|
+
res = bytearray()
|
567
|
+
while size < 0 or len(res) < size:
|
568
|
+
b = self.read(nreadahead())
|
569
|
+
if not b:
|
570
|
+
break
|
571
|
+
res += b
|
572
|
+
if res.endswith(b'\n'):
|
573
|
+
break
|
574
|
+
|
575
|
+
return bytes(res)
|
576
|
+
|
577
|
+
def __iter__(self):
|
578
|
+
self._checkClosed()
|
579
|
+
return self
|
580
|
+
|
581
|
+
def __next__(self):
|
582
|
+
line = self.readline()
|
583
|
+
if not line:
|
584
|
+
raise StopIteration
|
585
|
+
return line
|
586
|
+
|
587
|
+
def readlines(self, hint=None):
|
588
|
+
"""
|
589
|
+
Return a list of lines from the stream.
|
590
|
+
|
591
|
+
hint can be specified to control the number of lines read: no more lines will be read if the total size (in
|
592
|
+
bytes/characters) of all lines so far exceeds hint.
|
593
|
+
"""
|
594
|
+
|
595
|
+
if hint is None or hint <= 0:
|
596
|
+
return list(self)
|
597
|
+
n = 0
|
598
|
+
lines = []
|
599
|
+
for line in self:
|
600
|
+
lines.append(line)
|
601
|
+
n += len(line)
|
602
|
+
if n >= hint:
|
603
|
+
break
|
604
|
+
return lines
|
605
|
+
|
606
|
+
def writelines(self, lines):
|
607
|
+
"""
|
608
|
+
Write a list of lines to the stream.
|
609
|
+
|
610
|
+
Line separators are not added, so it is usual for each of the lines provided to have a line separator at the
|
611
|
+
end.
|
612
|
+
"""
|
613
|
+
|
614
|
+
self._checkClosed()
|
615
|
+
for line in lines:
|
616
|
+
self.write(line)
|
617
|
+
|
618
|
+
|
619
|
+
@io.RawIOBase.register
|
620
|
+
class RawIOBase(IOBase):
|
621
|
+
"""Base class for raw binary I/O."""
|
622
|
+
|
623
|
+
# The read() method is implemented by calling readinto(); derived classes that want to support read() only need to
|
624
|
+
# implement readinto() as a primitive operation. In general, readinto() can be more efficient than read().
|
625
|
+
|
626
|
+
# (It would be tempting to also provide an implementation of readinto() in terms of read(), in case the latter is a
|
627
|
+
# more suitable primitive operation, but that would lead to nasty recursion in case a subclass doesn't implement
|
628
|
+
# either.)
|
629
|
+
|
630
|
+
def read(self, size=-1):
|
631
|
+
"""
|
632
|
+
Read and return up to size bytes, where size is an int.
|
633
|
+
|
634
|
+
Returns an empty bytes object on EOF, or None if the object is set not to block and has no data to read.
|
635
|
+
"""
|
636
|
+
|
637
|
+
if size is None:
|
638
|
+
size = -1
|
639
|
+
if size < 0:
|
640
|
+
return self.readall()
|
641
|
+
b = bytearray(size.__index__())
|
642
|
+
n = self.readinto(b)
|
643
|
+
if n is None:
|
644
|
+
return None
|
645
|
+
del b[n:]
|
646
|
+
return bytes(b)
|
647
|
+
|
648
|
+
def readall(self):
|
649
|
+
"""Read until EOF, using multiple read() call."""
|
650
|
+
|
651
|
+
res = bytearray()
|
652
|
+
while data := self.read(DEFAULT_BUFFER_SIZE):
|
653
|
+
res += data
|
654
|
+
if res:
|
655
|
+
return bytes(res)
|
656
|
+
else:
|
657
|
+
# b'' or None
|
658
|
+
return data
|
659
|
+
|
660
|
+
def readinto(self, b):
|
661
|
+
"""
|
662
|
+
Read bytes into a pre-allocated bytes-like object b.
|
663
|
+
|
664
|
+
Returns an int representing the number of bytes read (0 for EOF), or None if the object is set not to block and
|
665
|
+
has no data to read.
|
666
|
+
"""
|
667
|
+
|
668
|
+
self._unsupported('readinto')
|
669
|
+
|
670
|
+
def write(self, b):
|
671
|
+
"""
|
672
|
+
Write the given buffer to the IO stream.
|
673
|
+
|
674
|
+
Returns the number of bytes written, which may be less than the length of b in bytes.
|
675
|
+
"""
|
676
|
+
|
677
|
+
self._unsupported('write')
|
678
|
+
|
679
|
+
|
680
|
+
@io.BufferedIOBase.register
|
681
|
+
class BufferedIOBase(IOBase):
|
682
|
+
"""
|
683
|
+
Base class for buffered IO objects.
|
684
|
+
|
685
|
+
The main difference with RawIOBase is that the read() method supports omitting the size argument, and does not have
|
686
|
+
a default implementation that defers to readinto().
|
687
|
+
|
688
|
+
In addition, read(), readinto() and write() may raise BlockingIOError if the underlying raw stream is in
|
689
|
+
non-blocking mode and not ready; unlike their raw counterparts, they will never return None.
|
690
|
+
|
691
|
+
A typical implementation should not inherit from a RawIOBase implementation, but wrap one.
|
692
|
+
"""
|
693
|
+
|
694
|
+
def read(self, size=-1):
|
695
|
+
"""
|
696
|
+
Read and return up to size bytes, where size is an int.
|
697
|
+
|
698
|
+
If the argument is omitted, None, or negative, reads and returns all data until EOF.
|
699
|
+
|
700
|
+
If the argument is positive, and the underlying raw stream is not 'interactive', multiple raw reads may be
|
701
|
+
issued to satisfy the byte count (unless EOF is reached first). But for interactive raw streams (XXX and for
|
702
|
+
pipes?), at most one raw read will be issued, and a short result does not imply that EOF is imminent.
|
703
|
+
|
704
|
+
Returns an empty bytes array on EOF.
|
705
|
+
|
706
|
+
Raises BlockingIOError if the underlying raw stream has no data at the moment.
|
707
|
+
"""
|
708
|
+
|
709
|
+
self._unsupported('read')
|
710
|
+
|
711
|
+
def read1(self, size=-1):
|
712
|
+
"""Read up to size bytes with at most one read() system call, where size is an int."""
|
713
|
+
|
714
|
+
self._unsupported('read1')
|
715
|
+
|
716
|
+
def readinto(self, b):
|
717
|
+
"""
|
718
|
+
Read bytes into a pre-allocated bytes-like object b.
|
719
|
+
|
720
|
+
Like read(), this may issue multiple reads to the underlying raw stream, unless the latter is 'interactive'.
|
721
|
+
|
722
|
+
Returns an int representing the number of bytes read (0 for EOF).
|
723
|
+
|
724
|
+
Raises BlockingIOError if the underlying raw stream has no data at the moment.
|
725
|
+
"""
|
726
|
+
|
727
|
+
return self._readinto(b, read1=False)
|
728
|
+
|
729
|
+
def readinto1(self, b):
|
730
|
+
"""
|
731
|
+
Read bytes into buffer *b*, using at most one system call
|
732
|
+
|
733
|
+
Returns an int representing the number of bytes read (0 for EOF).
|
734
|
+
|
735
|
+
Raises BlockingIOError if the underlying raw stream has no data at the moment.
|
736
|
+
"""
|
737
|
+
|
738
|
+
return self._readinto(b, read1=True)
|
739
|
+
|
740
|
+
def _readinto(self, b, read1):
|
741
|
+
if not isinstance(b, memoryview):
|
742
|
+
b = memoryview(b)
|
743
|
+
b = b.cast('B')
|
744
|
+
|
745
|
+
if read1:
|
746
|
+
data = self.read1(len(b))
|
747
|
+
else:
|
748
|
+
data = self.read(len(b))
|
749
|
+
n = len(data)
|
750
|
+
|
751
|
+
b[:n] = data
|
752
|
+
|
753
|
+
return n
|
754
|
+
|
755
|
+
def write(self, b):
|
756
|
+
"""
|
757
|
+
Write the given bytes buffer to the IO stream.
|
758
|
+
|
759
|
+
Return the number of bytes written, which is always the length of b in bytes.
|
760
|
+
|
761
|
+
Raises BlockingIOError if the buffer is full and the underlying raw stream cannot accept more data at the
|
762
|
+
moment.
|
763
|
+
"""
|
764
|
+
|
765
|
+
self._unsupported('write')
|
766
|
+
|
767
|
+
def detach(self):
|
768
|
+
"""
|
769
|
+
Separate the underlying raw stream from the buffer and return it.
|
770
|
+
|
771
|
+
After the raw stream has been detached, the buffer is in an unusable state.
|
772
|
+
"""
|
773
|
+
|
774
|
+
self._unsupported('detach')
|
775
|
+
|
776
|
+
|
777
|
+
class _BufferedIOMixin(BufferedIOBase):
|
778
|
+
"""
|
779
|
+
A mixin implementation of BufferedIOBase with an underlying raw stream.
|
780
|
+
|
781
|
+
This passes most requests on to the underlying raw stream. It does *not* provide implementations of read(),
|
782
|
+
readinto() or write().
|
783
|
+
"""
|
784
|
+
|
785
|
+
def __init__(self, raw):
|
786
|
+
self._raw = raw
|
787
|
+
|
788
|
+
# Positioning
|
789
|
+
|
790
|
+
def seek(self, pos, whence=0):
|
791
|
+
new_position = self.raw.seek(pos, whence)
|
792
|
+
if new_position < 0:
|
793
|
+
raise OSError('seek() returned an invalid position')
|
794
|
+
return new_position
|
795
|
+
|
796
|
+
def tell(self):
|
797
|
+
pos = self.raw.tell()
|
798
|
+
if pos < 0:
|
799
|
+
raise OSError('tell() returned an invalid position')
|
800
|
+
return pos
|
801
|
+
|
802
|
+
def truncate(self, pos=None):
|
803
|
+
self._checkClosed()
|
804
|
+
self._checkWritable()
|
805
|
+
|
806
|
+
# Flush the stream. We're mixing buffered I/O with lower-level I/O, and a flush may be necessary to synch both
|
807
|
+
# views of the current file state.
|
808
|
+
self.flush()
|
809
|
+
|
810
|
+
if pos is None:
|
811
|
+
pos = self.tell()
|
812
|
+
|
813
|
+
# XXX: Should seek() be used, instead of passing the position directly to truncate?
|
814
|
+
return self.raw.truncate(pos)
|
815
|
+
|
816
|
+
# Flush and close
|
817
|
+
|
818
|
+
def flush(self):
|
819
|
+
if self.closed:
|
820
|
+
raise ValueError('flush on closed file')
|
821
|
+
self.raw.flush()
|
822
|
+
|
823
|
+
def close(self):
|
824
|
+
if self.raw is not None and not self.closed:
|
825
|
+
try:
|
826
|
+
# may raise BlockingIOError or BrokenPipeError etc
|
827
|
+
self.flush()
|
828
|
+
finally:
|
829
|
+
self.raw.close()
|
830
|
+
|
831
|
+
def detach(self):
|
832
|
+
if self.raw is None:
|
833
|
+
raise ValueError('raw stream already detached')
|
834
|
+
self.flush()
|
835
|
+
raw = self._raw
|
836
|
+
self._raw = None
|
837
|
+
return raw
|
838
|
+
|
839
|
+
# Inquiries
|
840
|
+
|
841
|
+
def seekable(self):
|
842
|
+
return self.raw.seekable()
|
843
|
+
|
844
|
+
@property
|
845
|
+
def raw(self):
|
846
|
+
return self._raw
|
847
|
+
|
848
|
+
@property
|
849
|
+
def closed(self):
|
850
|
+
return self.raw.closed
|
851
|
+
|
852
|
+
@property
|
853
|
+
def name(self):
|
854
|
+
return self.raw.name
|
855
|
+
|
856
|
+
@property
|
857
|
+
def mode(self):
|
858
|
+
return self.raw.mode
|
859
|
+
|
860
|
+
def __getstate__(self):
|
861
|
+
raise TypeError(f'cannot pickle {self.__class__.__name__!r} object')
|
862
|
+
|
863
|
+
def __repr__(self):
|
864
|
+
modname = self.__class__.__module__
|
865
|
+
clsname = self.__class__.__qualname__
|
866
|
+
try:
|
867
|
+
name = self.name
|
868
|
+
except AttributeError:
|
869
|
+
return f'<{modname}.{clsname}>'
|
870
|
+
else:
|
871
|
+
return f'<{modname}.{clsname} name={name!r}>'
|
872
|
+
|
873
|
+
# Lower-level APIs
|
874
|
+
|
875
|
+
def fileno(self):
|
876
|
+
return self.raw.fileno()
|
877
|
+
|
878
|
+
def isatty(self):
|
879
|
+
return self.raw.isatty()
|
880
|
+
|
881
|
+
|
882
|
+
class BytesIO(BufferedIOBase):
|
883
|
+
"""Buffered I/O implementation using an in-memory bytes buffer."""
|
884
|
+
|
885
|
+
# Initialize _buffer as soon as possible since it's used by __del__() which calls close()
|
886
|
+
_buffer = None
|
887
|
+
|
888
|
+
def __init__(self, initial_bytes=None):
|
889
|
+
buf = bytearray()
|
890
|
+
if initial_bytes is not None:
|
891
|
+
buf += initial_bytes
|
892
|
+
self._buffer = buf
|
893
|
+
self._pos = 0
|
894
|
+
|
895
|
+
def __getstate__(self):
|
896
|
+
if self.closed:
|
897
|
+
raise ValueError('__getstate__ on closed file')
|
898
|
+
return self.__dict__.copy()
|
899
|
+
|
900
|
+
def getvalue(self):
|
901
|
+
"""Return the bytes value (contents) of the buffer"""
|
902
|
+
|
903
|
+
if self.closed:
|
904
|
+
raise ValueError('getvalue on closed file')
|
905
|
+
return bytes(self._buffer)
|
906
|
+
|
907
|
+
def getbuffer(self):
|
908
|
+
"""Return a readable and writable view of the buffer."""
|
909
|
+
|
910
|
+
if self.closed:
|
911
|
+
raise ValueError('getbuffer on closed file')
|
912
|
+
return memoryview(self._buffer)
|
913
|
+
|
914
|
+
def close(self):
|
915
|
+
if self._buffer is not None:
|
916
|
+
self._buffer.clear()
|
917
|
+
super().close()
|
918
|
+
|
919
|
+
def read(self, size=-1):
|
920
|
+
if self.closed:
|
921
|
+
raise ValueError('read from closed file')
|
922
|
+
if size is None:
|
923
|
+
size = -1
|
924
|
+
else:
|
925
|
+
try:
|
926
|
+
size_index = size.__index__
|
927
|
+
except AttributeError:
|
928
|
+
raise TypeError(f'{size!r} is not an integer')
|
929
|
+
else:
|
930
|
+
size = size_index()
|
931
|
+
if size < 0:
|
932
|
+
size = len(self._buffer)
|
933
|
+
if len(self._buffer) <= self._pos:
|
934
|
+
return b''
|
935
|
+
newpos = min(len(self._buffer), self._pos + size)
|
936
|
+
b = self._buffer[self._pos:newpos]
|
937
|
+
self._pos = newpos
|
938
|
+
return bytes(b)
|
939
|
+
|
940
|
+
def read1(self, size=-1):
|
941
|
+
"""This is the same as read."""
|
942
|
+
|
943
|
+
return self.read(size)
|
944
|
+
|
945
|
+
def write(self, b):
|
946
|
+
if self.closed:
|
947
|
+
raise ValueError('write to closed file')
|
948
|
+
if isinstance(b, str):
|
949
|
+
raise TypeError("can't write str to binary stream")
|
950
|
+
with memoryview(b) as view:
|
951
|
+
n = view.nbytes # Size of any bytes-like object
|
952
|
+
if n == 0:
|
953
|
+
return 0
|
954
|
+
pos = self._pos
|
955
|
+
if pos > len(self._buffer):
|
956
|
+
# Inserts null bytes between the current end of the file and the new write position.
|
957
|
+
padding = b'\x00' * (pos - len(self._buffer))
|
958
|
+
self._buffer += padding
|
959
|
+
self._buffer[pos:pos + n] = b
|
960
|
+
self._pos += n
|
961
|
+
return n
|
962
|
+
|
963
|
+
def seek(self, pos, whence=0):
|
964
|
+
if self.closed:
|
965
|
+
raise ValueError('seek on closed file')
|
966
|
+
try:
|
967
|
+
pos_index = pos.__index__
|
968
|
+
except AttributeError:
|
969
|
+
raise TypeError(f'{pos!r} is not an integer')
|
970
|
+
else:
|
971
|
+
pos = pos_index()
|
972
|
+
if whence == 0:
|
973
|
+
if pos < 0:
|
974
|
+
raise ValueError('negative seek position %r' % (pos,))
|
975
|
+
self._pos = pos
|
976
|
+
elif whence == 1:
|
977
|
+
self._pos = max(0, self._pos + pos)
|
978
|
+
elif whence == 2:
|
979
|
+
self._pos = max(0, len(self._buffer) + pos)
|
980
|
+
else:
|
981
|
+
raise ValueError('unsupported whence value')
|
982
|
+
return self._pos
|
983
|
+
|
984
|
+
def tell(self):
|
985
|
+
if self.closed:
|
986
|
+
raise ValueError('tell on closed file')
|
987
|
+
return self._pos
|
988
|
+
|
989
|
+
def truncate(self, pos=None):
|
990
|
+
if self.closed:
|
991
|
+
raise ValueError('truncate on closed file')
|
992
|
+
if pos is None:
|
993
|
+
pos = self._pos
|
994
|
+
else:
|
995
|
+
try:
|
996
|
+
pos_index = pos.__index__
|
997
|
+
except AttributeError:
|
998
|
+
raise TypeError(f'{pos!r} is not an integer')
|
999
|
+
else:
|
1000
|
+
pos = pos_index()
|
1001
|
+
if pos < 0:
|
1002
|
+
raise ValueError('negative truncate position %r' % (pos,))
|
1003
|
+
del self._buffer[pos:]
|
1004
|
+
return pos
|
1005
|
+
|
1006
|
+
def readable(self):
|
1007
|
+
if self.closed:
|
1008
|
+
raise ValueError('I/O operation on closed file.')
|
1009
|
+
return True
|
1010
|
+
|
1011
|
+
def writable(self):
|
1012
|
+
if self.closed:
|
1013
|
+
raise ValueError('I/O operation on closed file.')
|
1014
|
+
return True
|
1015
|
+
|
1016
|
+
def seekable(self):
|
1017
|
+
if self.closed:
|
1018
|
+
raise ValueError('I/O operation on closed file.')
|
1019
|
+
return True
|
1020
|
+
|
1021
|
+
|
1022
|
+
class BufferedReader(_BufferedIOMixin):
|
1023
|
+
"""
|
1024
|
+
BufferedReader(raw[, buffer_size])
|
1025
|
+
|
1026
|
+
A buffer for a readable, sequential BaseRawIO object.
|
1027
|
+
|
1028
|
+
The constructor creates a BufferedReader for the given readable raw stream and buffer_size. If buffer_size is
|
1029
|
+
omitted, DEFAULT_BUFFER_SIZE is used.
|
1030
|
+
"""
|
1031
|
+
|
1032
|
+
def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
|
1033
|
+
"""Create a new buffered reader using the given readable raw IO object."""
|
1034
|
+
|
1035
|
+
if not raw.readable():
|
1036
|
+
raise OSError('"raw" argument must be readable.')
|
1037
|
+
|
1038
|
+
_BufferedIOMixin.__init__(self, raw)
|
1039
|
+
if buffer_size <= 0:
|
1040
|
+
raise ValueError('invalid buffer size')
|
1041
|
+
self.buffer_size = buffer_size
|
1042
|
+
self._reset_read_buf()
|
1043
|
+
self._read_lock = threading.Lock()
|
1044
|
+
|
1045
|
+
def readable(self):
|
1046
|
+
return self.raw.readable()
|
1047
|
+
|
1048
|
+
def _reset_read_buf(self):
|
1049
|
+
self._read_buf = b''
|
1050
|
+
self._read_pos = 0
|
1051
|
+
|
1052
|
+
def read(self, size=None):
|
1053
|
+
"""
|
1054
|
+
Read size bytes.
|
1055
|
+
|
1056
|
+
Returns exactly size bytes of data unless the underlying raw IO stream reaches EOF or if the call would block in
|
1057
|
+
non-blocking mode. If size is negative, read until EOF or until read() would block.
|
1058
|
+
"""
|
1059
|
+
|
1060
|
+
if size is not None and size < -1:
|
1061
|
+
raise ValueError('invalid number of bytes to read')
|
1062
|
+
with self._read_lock:
|
1063
|
+
return self._read_unlocked(size)
|
1064
|
+
|
1065
|
+
def _read_unlocked(self, n=None):
|
1066
|
+
nodata_val = b''
|
1067
|
+
empty_values = (b'', None)
|
1068
|
+
buf = self._read_buf
|
1069
|
+
pos = self._read_pos
|
1070
|
+
|
1071
|
+
# Special case for when the number of bytes to read is unspecified.
|
1072
|
+
if n is None or n == -1:
|
1073
|
+
self._reset_read_buf()
|
1074
|
+
if hasattr(self.raw, 'readall'):
|
1075
|
+
chunk = self.raw.readall()
|
1076
|
+
if chunk is None:
|
1077
|
+
return buf[pos:] or None
|
1078
|
+
else:
|
1079
|
+
return buf[pos:] + chunk
|
1080
|
+
|
1081
|
+
chunks = [buf[pos:]] # Strip the consumed bytes.
|
1082
|
+
current_size = 0
|
1083
|
+
while True:
|
1084
|
+
# Read until EOF or until read() would block.
|
1085
|
+
chunk = self.raw.read()
|
1086
|
+
if chunk in empty_values:
|
1087
|
+
nodata_val = chunk
|
1088
|
+
break
|
1089
|
+
current_size += len(chunk)
|
1090
|
+
chunks.append(chunk)
|
1091
|
+
return b''.join(chunks) or nodata_val
|
1092
|
+
|
1093
|
+
# The number of bytes to read is specified, return at most n bytes.
|
1094
|
+
avail = len(buf) - pos # Length of the available buffered data.
|
1095
|
+
if n <= avail:
|
1096
|
+
# Fast path: the data to read is fully buffered.
|
1097
|
+
self._read_pos += n
|
1098
|
+
return buf[pos:pos + n]
|
1099
|
+
|
1100
|
+
# Slow path: read from the stream until enough bytes are read, or until an EOF occurs or until read() would
|
1101
|
+
# block.
|
1102
|
+
chunks = [buf[pos:]]
|
1103
|
+
wanted = max(self.buffer_size, n)
|
1104
|
+
while avail < n:
|
1105
|
+
chunk = self.raw.read(wanted)
|
1106
|
+
if chunk in empty_values:
|
1107
|
+
nodata_val = chunk
|
1108
|
+
break
|
1109
|
+
avail += len(chunk)
|
1110
|
+
chunks.append(chunk)
|
1111
|
+
|
1112
|
+
# n is more than avail only when an EOF occurred or when read() would have blocked.
|
1113
|
+
n = min(n, avail)
|
1114
|
+
out = b''.join(chunks)
|
1115
|
+
self._read_buf = out[n:] # Save the extra data in the buffer.
|
1116
|
+
self._read_pos = 0
|
1117
|
+
return out[:n] if out else nodata_val
|
1118
|
+
|
1119
|
+
def peek(self, size=0):
|
1120
|
+
"""
|
1121
|
+
Returns buffered bytes without advancing the position.
|
1122
|
+
|
1123
|
+
The argument indicates a desired minimal number of bytes; we do at most one raw read to satisfy it. We never
|
1124
|
+
return more than self.buffer_size.
|
1125
|
+
"""
|
1126
|
+
|
1127
|
+
self._checkClosed('peek of closed file')
|
1128
|
+
with self._read_lock:
|
1129
|
+
return self._peek_unlocked(size)
|
1130
|
+
|
1131
|
+
def _peek_unlocked(self, n=0):
|
1132
|
+
want = min(n, self.buffer_size)
|
1133
|
+
have = len(self._read_buf) - self._read_pos
|
1134
|
+
if have < want or have <= 0:
|
1135
|
+
to_read = self.buffer_size - have
|
1136
|
+
current = self.raw.read(to_read)
|
1137
|
+
if current:
|
1138
|
+
self._read_buf = self._read_buf[self._read_pos:] + current
|
1139
|
+
self._read_pos = 0
|
1140
|
+
return self._read_buf[self._read_pos:]
|
1141
|
+
|
1142
|
+
def read1(self, size=-1):
|
1143
|
+
"""Reads up to size bytes, with at most one read() system call."""
|
1144
|
+
|
1145
|
+
# Returns up to size bytes. If at least one byte is buffered, we only return buffered bytes. Otherwise, we do
|
1146
|
+
# one raw read.
|
1147
|
+
self._checkClosed('read of closed file')
|
1148
|
+
if size < 0:
|
1149
|
+
size = self.buffer_size
|
1150
|
+
if size == 0:
|
1151
|
+
return b''
|
1152
|
+
with self._read_lock:
|
1153
|
+
self._peek_unlocked(1)
|
1154
|
+
return self._read_unlocked(min(size, len(self._read_buf) - self._read_pos))
|
1155
|
+
|
1156
|
+
# Implementing readinto() and readinto1() is not strictly necessary (we could rely on the base class that provides
|
1157
|
+
# an implementation in terms of read() and read1()). We do it anyway to keep the _pyio implementation similar to the
|
1158
|
+
# io implementation (which implements the methods for performance reasons).
|
1159
|
+
def _readinto(self, buf, read1):
|
1160
|
+
"""Read data into *buf* with at most one system call."""
|
1161
|
+
|
1162
|
+
self._checkClosed('readinto of closed file')
|
1163
|
+
|
1164
|
+
# Need to create a memoryview object of type 'b', otherwise we may not be able to assign bytes to it, and
|
1165
|
+
# slicing it would create a new object.
|
1166
|
+
if not isinstance(buf, memoryview):
|
1167
|
+
buf = memoryview(buf)
|
1168
|
+
if buf.nbytes == 0:
|
1169
|
+
return 0
|
1170
|
+
buf = buf.cast('B')
|
1171
|
+
|
1172
|
+
written = 0
|
1173
|
+
with self._read_lock:
|
1174
|
+
while written < len(buf):
|
1175
|
+
# First try to read from internal buffer
|
1176
|
+
avail = min(len(self._read_buf) - self._read_pos, len(buf))
|
1177
|
+
if avail:
|
1178
|
+
buf[written: written + avail] = self._read_buf[self._read_pos: self._read_pos + avail]
|
1179
|
+
self._read_pos += avail
|
1180
|
+
written += avail
|
1181
|
+
if written == len(buf):
|
1182
|
+
break
|
1183
|
+
|
1184
|
+
# If remaining space in callers buffer is larger than internal buffer, read directly into callers buffer
|
1185
|
+
if len(buf) - written > self.buffer_size:
|
1186
|
+
n = self.raw.readinto(buf[written:])
|
1187
|
+
if not n:
|
1188
|
+
break # eof
|
1189
|
+
written += n
|
1190
|
+
|
1191
|
+
# Otherwise refill internal buffer - unless we're in read1 mode and already got some data
|
1192
|
+
elif not (read1 and written):
|
1193
|
+
if not self._peek_unlocked(1):
|
1194
|
+
break # eof
|
1195
|
+
|
1196
|
+
# In readinto1 mode, return as soon as we have some data
|
1197
|
+
if read1 and written:
|
1198
|
+
break
|
1199
|
+
|
1200
|
+
return written
|
1201
|
+
|
1202
|
+
def tell(self):
|
1203
|
+
# GH-95782: Keep return value non-negative
|
1204
|
+
return max(_BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos, 0)
|
1205
|
+
|
1206
|
+
def seek(self, pos, whence=0):
|
1207
|
+
if whence not in valid_seek_flags:
|
1208
|
+
raise ValueError('invalid whence value')
|
1209
|
+
self._checkClosed('seek of closed file')
|
1210
|
+
with self._read_lock:
|
1211
|
+
if whence == 1:
|
1212
|
+
pos -= len(self._read_buf) - self._read_pos
|
1213
|
+
pos = _BufferedIOMixin.seek(self, pos, whence)
|
1214
|
+
self._reset_read_buf()
|
1215
|
+
return pos
|
1216
|
+
|
1217
|
+
|
1218
|
+
class BufferedWriter(_BufferedIOMixin):
|
1219
|
+
"""
|
1220
|
+
A buffer for a writeable sequential RawIO object.
|
1221
|
+
|
1222
|
+
The constructor creates a BufferedWriter for the given writeable raw stream. If the buffer_size is not given, it
|
1223
|
+
defaults to DEFAULT_BUFFER_SIZE.
|
1224
|
+
"""
|
1225
|
+
|
1226
|
+
def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
|
1227
|
+
if not raw.writable():
|
1228
|
+
raise OSError('"raw" argument must be writable.')
|
1229
|
+
|
1230
|
+
_BufferedIOMixin.__init__(self, raw)
|
1231
|
+
if buffer_size <= 0:
|
1232
|
+
raise ValueError('invalid buffer size')
|
1233
|
+
self.buffer_size = buffer_size
|
1234
|
+
self._write_buf = bytearray()
|
1235
|
+
self._write_lock = threading.Lock()
|
1236
|
+
|
1237
|
+
def writable(self):
|
1238
|
+
return self.raw.writable()
|
1239
|
+
|
1240
|
+
def write(self, b):
|
1241
|
+
if isinstance(b, str):
|
1242
|
+
raise TypeError("can't write str to binary stream")
|
1243
|
+
with self._write_lock:
|
1244
|
+
if self.closed:
|
1245
|
+
raise ValueError('write to closed file')
|
1246
|
+
# XXX we can implement some more tricks to try and avoid partial writes
|
1247
|
+
if len(self._write_buf) > self.buffer_size:
|
1248
|
+
# We're full, so let's pre-flush the buffer. (This may raise BlockingIOError with characters_written ==
|
1249
|
+
# 0.)
|
1250
|
+
self._flush_unlocked()
|
1251
|
+
before = len(self._write_buf)
|
1252
|
+
self._write_buf.extend(b)
|
1253
|
+
written = len(self._write_buf) - before
|
1254
|
+
if len(self._write_buf) > self.buffer_size:
|
1255
|
+
try:
|
1256
|
+
self._flush_unlocked()
|
1257
|
+
except BlockingIOError as e:
|
1258
|
+
if len(self._write_buf) > self.buffer_size:
|
1259
|
+
# We've hit the buffer_size. We have to accept a partial write and cut back our buffer.
|
1260
|
+
overage = len(self._write_buf) - self.buffer_size
|
1261
|
+
written -= overage
|
1262
|
+
self._write_buf = self._write_buf[: self.buffer_size]
|
1263
|
+
raise BlockingIOError(e.errno, e.strerror, written)
|
1264
|
+
return written
|
1265
|
+
|
1266
|
+
def truncate(self, pos=None):
|
1267
|
+
with self._write_lock:
|
1268
|
+
self._flush_unlocked()
|
1269
|
+
if pos is None:
|
1270
|
+
pos = self.raw.tell()
|
1271
|
+
return self.raw.truncate(pos)
|
1272
|
+
|
1273
|
+
def flush(self):
|
1274
|
+
with self._write_lock:
|
1275
|
+
self._flush_unlocked()
|
1276
|
+
|
1277
|
+
def _flush_unlocked(self):
|
1278
|
+
if self.closed:
|
1279
|
+
raise ValueError('flush on closed file')
|
1280
|
+
while self._write_buf:
|
1281
|
+
try:
|
1282
|
+
n = self.raw.write(self._write_buf)
|
1283
|
+
except BlockingIOError:
|
1284
|
+
raise RuntimeError('self.raw should implement RawIOBase: it should not raise BlockingIOError')
|
1285
|
+
if n is None:
|
1286
|
+
raise BlockingIOError(errno.EAGAIN, 'write could not complete without blocking', 0)
|
1287
|
+
if n > len(self._write_buf) or n < 0:
|
1288
|
+
raise OSError('write() returned incorrect number of bytes')
|
1289
|
+
del self._write_buf[:n]
|
1290
|
+
|
1291
|
+
def tell(self):
|
1292
|
+
return _BufferedIOMixin.tell(self) + len(self._write_buf)
|
1293
|
+
|
1294
|
+
def seek(self, pos, whence=0):
|
1295
|
+
if whence not in valid_seek_flags:
|
1296
|
+
raise ValueError('invalid whence value')
|
1297
|
+
with self._write_lock:
|
1298
|
+
self._flush_unlocked()
|
1299
|
+
return _BufferedIOMixin.seek(self, pos, whence)
|
1300
|
+
|
1301
|
+
def close(self):
|
1302
|
+
with self._write_lock:
|
1303
|
+
if self.raw is None or self.closed:
|
1304
|
+
return
|
1305
|
+
# We have to release the lock and call self.flush() (which will probably just re-take the lock) in case flush
|
1306
|
+
# has been overridden in a subclass or the user set self.flush to something. This is the same behavior as the C
|
1307
|
+
# implementation.
|
1308
|
+
try:
|
1309
|
+
# may raise BlockingIOError or BrokenPipeError etc
|
1310
|
+
self.flush()
|
1311
|
+
finally:
|
1312
|
+
with self._write_lock:
|
1313
|
+
self.raw.close()
|
1314
|
+
|
1315
|
+
|
1316
|
+
class BufferedRWPair(BufferedIOBase):
|
1317
|
+
"""
|
1318
|
+
A buffered reader and writer object together.
|
1319
|
+
|
1320
|
+
A buffered reader object and buffered writer object put together to form a sequential IO object that can read and
|
1321
|
+
write. This is typically used with a socket or two-way pipe.
|
1322
|
+
|
1323
|
+
reader and writer are RawIOBase objects that are readable and writeable respectively. If the buffer_size is omitted
|
1324
|
+
it defaults to DEFAULT_BUFFER_SIZE.
|
1325
|
+
"""
|
1326
|
+
|
1327
|
+
# XXX The usefulness of this (compared to having two separate IO objects) is questionable.
|
1328
|
+
|
1329
|
+
def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
|
1330
|
+
"""
|
1331
|
+
Constructor.
|
1332
|
+
|
1333
|
+
The arguments are two RawIO instances.
|
1334
|
+
"""
|
1335
|
+
|
1336
|
+
if not reader.readable():
|
1337
|
+
raise OSError('"reader" argument must be readable.')
|
1338
|
+
|
1339
|
+
if not writer.writable():
|
1340
|
+
raise OSError('"writer" argument must be writable.')
|
1341
|
+
|
1342
|
+
self.reader = BufferedReader(reader, buffer_size)
|
1343
|
+
self.writer = BufferedWriter(writer, buffer_size)
|
1344
|
+
|
1345
|
+
def read(self, size=-1):
|
1346
|
+
if size is None:
|
1347
|
+
size = -1
|
1348
|
+
return self.reader.read(size)
|
1349
|
+
|
1350
|
+
def readinto(self, b):
|
1351
|
+
return self.reader.readinto(b)
|
1352
|
+
|
1353
|
+
def write(self, b):
|
1354
|
+
return self.writer.write(b)
|
1355
|
+
|
1356
|
+
def peek(self, size=0):
|
1357
|
+
return self.reader.peek(size)
|
1358
|
+
|
1359
|
+
def read1(self, size=-1):
|
1360
|
+
return self.reader.read1(size)
|
1361
|
+
|
1362
|
+
def readinto1(self, b):
|
1363
|
+
return self.reader.readinto1(b)
|
1364
|
+
|
1365
|
+
def readable(self):
|
1366
|
+
return self.reader.readable()
|
1367
|
+
|
1368
|
+
def writable(self):
|
1369
|
+
return self.writer.writable()
|
1370
|
+
|
1371
|
+
def flush(self):
|
1372
|
+
return self.writer.flush()
|
1373
|
+
|
1374
|
+
def close(self):
|
1375
|
+
try:
|
1376
|
+
self.writer.close()
|
1377
|
+
finally:
|
1378
|
+
self.reader.close()
|
1379
|
+
|
1380
|
+
def isatty(self):
|
1381
|
+
return self.reader.isatty() or self.writer.isatty()
|
1382
|
+
|
1383
|
+
@property
|
1384
|
+
def closed(self):
|
1385
|
+
return self.writer.closed
|
1386
|
+
|
1387
|
+
|
1388
|
+
class BufferedRandom(BufferedWriter, BufferedReader):
|
1389
|
+
"""
|
1390
|
+
A buffered interface to random access streams.
|
1391
|
+
|
1392
|
+
The constructor creates a reader and writer for a seekable stream, raw, given in the first argument. If the
|
1393
|
+
buffer_size is omitted it defaults to DEFAULT_BUFFER_SIZE.
|
1394
|
+
"""
|
1395
|
+
|
1396
|
+
def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
|
1397
|
+
raw._checkSeekable()
|
1398
|
+
BufferedReader.__init__(self, raw, buffer_size)
|
1399
|
+
BufferedWriter.__init__(self, raw, buffer_size)
|
1400
|
+
|
1401
|
+
def seek(self, pos, whence=0):
|
1402
|
+
if whence not in valid_seek_flags:
|
1403
|
+
raise ValueError('invalid whence value')
|
1404
|
+
self.flush()
|
1405
|
+
if self._read_buf:
|
1406
|
+
# Undo read ahead.
|
1407
|
+
with self._read_lock:
|
1408
|
+
self.raw.seek(self._read_pos - len(self._read_buf), 1)
|
1409
|
+
# First do the raw seek, then empty the read buffer, so that if the raw seek fails, we don't lose buffered data
|
1410
|
+
# forever.
|
1411
|
+
pos = self.raw.seek(pos, whence)
|
1412
|
+
with self._read_lock:
|
1413
|
+
self._reset_read_buf()
|
1414
|
+
if pos < 0:
|
1415
|
+
raise OSError('seek() returned invalid position')
|
1416
|
+
return pos
|
1417
|
+
|
1418
|
+
def tell(self):
|
1419
|
+
if self._write_buf:
|
1420
|
+
return BufferedWriter.tell(self)
|
1421
|
+
else:
|
1422
|
+
return BufferedReader.tell(self)
|
1423
|
+
|
1424
|
+
def truncate(self, pos=None):
|
1425
|
+
if pos is None:
|
1426
|
+
pos = self.tell()
|
1427
|
+
# Use seek to flush the read buffer.
|
1428
|
+
return BufferedWriter.truncate(self, pos)
|
1429
|
+
|
1430
|
+
def read(self, size=None):
|
1431
|
+
if size is None:
|
1432
|
+
size = -1
|
1433
|
+
self.flush()
|
1434
|
+
return BufferedReader.read(self, size)
|
1435
|
+
|
1436
|
+
def readinto(self, b):
|
1437
|
+
self.flush()
|
1438
|
+
return BufferedReader.readinto(self, b)
|
1439
|
+
|
1440
|
+
def peek(self, size=0):
|
1441
|
+
self.flush()
|
1442
|
+
return BufferedReader.peek(self, size)
|
1443
|
+
|
1444
|
+
def read1(self, size=-1):
|
1445
|
+
self.flush()
|
1446
|
+
return BufferedReader.read1(self, size)
|
1447
|
+
|
1448
|
+
def readinto1(self, b):
|
1449
|
+
self.flush()
|
1450
|
+
return BufferedReader.readinto1(self, b)
|
1451
|
+
|
1452
|
+
def write(self, b):
|
1453
|
+
if self._read_buf:
|
1454
|
+
# Undo readahead
|
1455
|
+
with self._read_lock:
|
1456
|
+
self.raw.seek(self._read_pos - len(self._read_buf), 1)
|
1457
|
+
self._reset_read_buf()
|
1458
|
+
return BufferedWriter.write(self, b)
|
1459
|
+
|
1460
|
+
|
1461
|
+
class FileIO(RawIOBase):
|
1462
|
+
_fd = -1
|
1463
|
+
_created = False
|
1464
|
+
_readable = False
|
1465
|
+
_writable = False
|
1466
|
+
_appending = False
|
1467
|
+
_seekable = None
|
1468
|
+
_closefd = True
|
1469
|
+
|
1470
|
+
def __init__(self, file, mode='r', closefd=True, opener=None):
|
1471
|
+
"""
|
1472
|
+
Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading, writing, exclusive creation or
|
1473
|
+
appending. The file will be created if it doesn't exist when opened for writing or appending; it will be
|
1474
|
+
truncated when opened for writing. A FileExistsError will be raised if it already exists when opened for
|
1475
|
+
creating. Opening a file for creating implies writing so this mode behaves in a similar way to 'w'. Add a '+' to
|
1476
|
+
the mode to allow simultaneous reading and writing. A custom opener can be used by passing a callable as
|
1477
|
+
*opener*. The underlying file descriptor for the file object is then obtained by calling opener with (*name*,
|
1478
|
+
*flags*). *opener* must return an open file descriptor (passing os.open as *opener* results in functionality
|
1479
|
+
similar to passing None).
|
1480
|
+
"""
|
1481
|
+
|
1482
|
+
if self._fd >= 0:
|
1483
|
+
# Have to close the existing file first.
|
1484
|
+
self._stat_atopen = None
|
1485
|
+
try:
|
1486
|
+
if self._closefd:
|
1487
|
+
os.close(self._fd)
|
1488
|
+
finally:
|
1489
|
+
self._fd = -1
|
1490
|
+
|
1491
|
+
if isinstance(file, float):
|
1492
|
+
raise TypeError('integer argument expected, got float')
|
1493
|
+
if isinstance(file, int):
|
1494
|
+
if isinstance(file, bool):
|
1495
|
+
warnings.warn('bool is used as a file descriptor', RuntimeWarning, stacklevel=2)
|
1496
|
+
file = int(file)
|
1497
|
+
fd = file
|
1498
|
+
if fd < 0:
|
1499
|
+
raise ValueError('negative file descriptor')
|
1500
|
+
else:
|
1501
|
+
fd = -1
|
1502
|
+
|
1503
|
+
if not isinstance(mode, str):
|
1504
|
+
raise TypeError('invalid mode: %s' % (mode,))
|
1505
|
+
if not set(mode) <= set('xrwab+'):
|
1506
|
+
raise ValueError('invalid mode: %s' % (mode,))
|
1507
|
+
if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1:
|
1508
|
+
raise ValueError('Must have exactly one of create/read/write/append mode and at most one plus')
|
1509
|
+
|
1510
|
+
if 'x' in mode:
|
1511
|
+
self._created = True
|
1512
|
+
self._writable = True
|
1513
|
+
flags = os.O_EXCL | os.O_CREAT
|
1514
|
+
elif 'r' in mode:
|
1515
|
+
self._readable = True
|
1516
|
+
flags = 0
|
1517
|
+
elif 'w' in mode:
|
1518
|
+
self._writable = True
|
1519
|
+
flags = os.O_CREAT | os.O_TRUNC
|
1520
|
+
elif 'a' in mode:
|
1521
|
+
self._writable = True
|
1522
|
+
self._appending = True
|
1523
|
+
flags = os.O_APPEND | os.O_CREAT
|
1524
|
+
|
1525
|
+
if '+' in mode:
|
1526
|
+
self._readable = True
|
1527
|
+
self._writable = True
|
1528
|
+
|
1529
|
+
if self._readable and self._writable:
|
1530
|
+
flags |= os.O_RDWR
|
1531
|
+
elif self._readable:
|
1532
|
+
flags |= os.O_RDONLY
|
1533
|
+
else:
|
1534
|
+
flags |= os.O_WRONLY
|
1535
|
+
|
1536
|
+
flags |= getattr(os, 'O_BINARY', 0)
|
1537
|
+
|
1538
|
+
noinherit_flag = getattr(os, 'O_NOINHERIT', 0) or getattr(os, 'O_CLOEXEC', 0)
|
1539
|
+
flags |= noinherit_flag
|
1540
|
+
|
1541
|
+
owned_fd = None
|
1542
|
+
try:
|
1543
|
+
if fd < 0:
|
1544
|
+
if not closefd:
|
1545
|
+
raise ValueError('Cannot use closefd=False with file name')
|
1546
|
+
if opener is None:
|
1547
|
+
fd = os.open(file, flags, 0o666)
|
1548
|
+
else:
|
1549
|
+
fd = opener(file, flags)
|
1550
|
+
if not isinstance(fd, int):
|
1551
|
+
raise TypeError('expected integer from opener')
|
1552
|
+
if fd < 0:
|
1553
|
+
raise OSError('Negative file descriptor')
|
1554
|
+
owned_fd = fd
|
1555
|
+
if not noinherit_flag:
|
1556
|
+
os.set_inheritable(fd, False)
|
1557
|
+
|
1558
|
+
self._closefd = closefd
|
1559
|
+
self._stat_atopen = os.fstat(fd)
|
1560
|
+
try:
|
1561
|
+
if stat.S_ISDIR(self._stat_atopen.st_mode):
|
1562
|
+
raise IsADirectoryError(
|
1563
|
+
errno.EISDIR, os.strerror(errno.EISDIR), file,
|
1564
|
+
)
|
1565
|
+
except AttributeError:
|
1566
|
+
# Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR don't exist.
|
1567
|
+
pass
|
1568
|
+
|
1569
|
+
self.name = file
|
1570
|
+
if self._appending:
|
1571
|
+
# For consistent behaviour, we explicitly seek to the end of file (otherwise, it might be done only on
|
1572
|
+
# the first write()).
|
1573
|
+
try:
|
1574
|
+
os.lseek(fd, 0, io.SEEK_END)
|
1575
|
+
except OSError as e:
|
1576
|
+
if e.errno != errno.ESPIPE:
|
1577
|
+
raise
|
1578
|
+
except: # noqa
|
1579
|
+
self._stat_atopen = None
|
1580
|
+
if owned_fd is not None:
|
1581
|
+
os.close(owned_fd)
|
1582
|
+
raise
|
1583
|
+
self._fd = fd
|
1584
|
+
|
1585
|
+
def __del__(self):
|
1586
|
+
if self._fd >= 0 and self._closefd and not self.closed:
|
1587
|
+
warnings.warn('unclosed file %r' % (self,), ResourceWarning, stacklevel=2, source=self)
|
1588
|
+
self.close()
|
1589
|
+
|
1590
|
+
def __getstate__(self):
|
1591
|
+
raise TypeError(f'cannot pickle {self.__class__.__name__!r} object')
|
1592
|
+
|
1593
|
+
def __repr__(self):
|
1594
|
+
class_name = '%s.%s' % (self.__class__.__module__, self.__class__.__qualname__)
|
1595
|
+
if self.closed:
|
1596
|
+
return '<%s [closed]>' % class_name
|
1597
|
+
try:
|
1598
|
+
name = self.name
|
1599
|
+
except AttributeError:
|
1600
|
+
return '<%s fd=%d mode=%r closefd=%r>' % (
|
1601
|
+
class_name,
|
1602
|
+
self._fd,
|
1603
|
+
self.mode,
|
1604
|
+
self._closefd,
|
1605
|
+
)
|
1606
|
+
else:
|
1607
|
+
return '<%s name=%r mode=%r closefd=%r>' % (
|
1608
|
+
class_name,
|
1609
|
+
name,
|
1610
|
+
self.mode,
|
1611
|
+
self._closefd,
|
1612
|
+
)
|
1613
|
+
|
1614
|
+
@property
|
1615
|
+
def _blksize(self):
|
1616
|
+
if self._stat_atopen is None:
|
1617
|
+
return DEFAULT_BUFFER_SIZE
|
1618
|
+
|
1619
|
+
blksize = getattr(self._stat_atopen, 'st_blksize', 0)
|
1620
|
+
# WASI sets blsize to 0
|
1621
|
+
if not blksize:
|
1622
|
+
return DEFAULT_BUFFER_SIZE
|
1623
|
+
return blksize
|
1624
|
+
|
1625
|
+
def _checkReadable(self):
|
1626
|
+
if not self._readable:
|
1627
|
+
raise UnsupportedOperation('File not open for reading')
|
1628
|
+
|
1629
|
+
def _checkWritable(self, msg=None):
|
1630
|
+
if not self._writable:
|
1631
|
+
raise UnsupportedOperation('File not open for writing')
|
1632
|
+
|
1633
|
+
def read(self, size=None):
|
1634
|
+
"""
|
1635
|
+
Read at most size bytes, returned as bytes.
|
1636
|
+
|
1637
|
+
Only makes one system call, so less data may be returned than requested In non-blocking mode, returns None if no
|
1638
|
+
data is available. Return an empty bytes object at EOF.
|
1639
|
+
"""
|
1640
|
+
|
1641
|
+
self._checkClosed()
|
1642
|
+
self._checkReadable()
|
1643
|
+
if size is None or size < 0:
|
1644
|
+
return self.readall()
|
1645
|
+
try:
|
1646
|
+
return os.read(self._fd, size)
|
1647
|
+
except BlockingIOError:
|
1648
|
+
return None
|
1649
|
+
|
1650
|
+
def readall(self):
|
1651
|
+
"""
|
1652
|
+
Read all data from the file, returned as bytes.
|
1653
|
+
|
1654
|
+
In non-blocking mode, returns as much as is immediately available, or None if no data is available. Return an
|
1655
|
+
empty bytes object at EOF.
|
1656
|
+
"""
|
1657
|
+
|
1658
|
+
self._checkClosed()
|
1659
|
+
self._checkReadable()
|
1660
|
+
if self._stat_atopen is None or self._stat_atopen.st_size <= 0:
|
1661
|
+
bufsize = DEFAULT_BUFFER_SIZE
|
1662
|
+
else:
|
1663
|
+
# In order to detect end of file, need a read() of at least 1 byte which returns size 0. Oversize the buffer
|
1664
|
+
# by 1 byte so the I/O can be completed with two read() calls (one for all data, one for EOF) without
|
1665
|
+
# needing to resize the buffer.
|
1666
|
+
bufsize = self._stat_atopen.st_size + 1
|
1667
|
+
|
1668
|
+
if self._stat_atopen.st_size > 65536:
|
1669
|
+
try:
|
1670
|
+
pos = os.lseek(self._fd, 0, io.SEEK_CUR)
|
1671
|
+
if self._stat_atopen.st_size >= pos:
|
1672
|
+
bufsize = self._stat_atopen.st_size - pos + 1
|
1673
|
+
except OSError:
|
1674
|
+
pass
|
1675
|
+
|
1676
|
+
result = bytearray()
|
1677
|
+
while True:
|
1678
|
+
if len(result) >= bufsize:
|
1679
|
+
bufsize = len(result)
|
1680
|
+
bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
|
1681
|
+
n = bufsize - len(result)
|
1682
|
+
try:
|
1683
|
+
chunk = os.read(self._fd, n)
|
1684
|
+
except BlockingIOError:
|
1685
|
+
if result:
|
1686
|
+
break
|
1687
|
+
return None
|
1688
|
+
if not chunk: # reached the end of the file
|
1689
|
+
break
|
1690
|
+
result += chunk
|
1691
|
+
|
1692
|
+
return bytes(result)
|
1693
|
+
|
1694
|
+
def readinto(self, b):
|
1695
|
+
"""Same as RawIOBase.readinto()."""
|
1696
|
+
|
1697
|
+
m = memoryview(b).cast('B')
|
1698
|
+
data = self.read(len(m))
|
1699
|
+
n = len(data)
|
1700
|
+
m[:n] = data
|
1701
|
+
return n
|
1702
|
+
|
1703
|
+
def write(self, b):
|
1704
|
+
"""
|
1705
|
+
Write bytes b to file, return number written.
|
1706
|
+
|
1707
|
+
Only makes one system call, so not all of the data may be written. The number of bytes actually written is
|
1708
|
+
returned. In non-blocking mode, returns None if the write would block.
|
1709
|
+
"""
|
1710
|
+
self._checkClosed()
|
1711
|
+
self._checkWritable()
|
1712
|
+
try:
|
1713
|
+
return os.write(self._fd, b)
|
1714
|
+
except BlockingIOError:
|
1715
|
+
return None
|
1716
|
+
|
1717
|
+
def seek(self, pos, whence=io.SEEK_SET):
|
1718
|
+
"""
|
1719
|
+
Move to new file position.
|
1720
|
+
|
1721
|
+
Argument offset is a byte count. Optional argument whence defaults to SEEK_SET or 0 (offset from start of file,
|
1722
|
+
offset should be >= 0); other values are SEEK_CUR or 1 (move relative to current position, positive or
|
1723
|
+
negative), and SEEK_END or 2 (move relative to end of file, usually negative, although many platforms allow
|
1724
|
+
seeking beyond the end of a file).
|
1725
|
+
|
1726
|
+
Note that not all file objects are seekable.
|
1727
|
+
"""
|
1728
|
+
|
1729
|
+
if isinstance(pos, float):
|
1730
|
+
raise TypeError('an integer is required')
|
1731
|
+
self._checkClosed()
|
1732
|
+
return os.lseek(self._fd, pos, whence)
|
1733
|
+
|
1734
|
+
def tell(self):
|
1735
|
+
"""
|
1736
|
+
tell() -> int. Current file position.
|
1737
|
+
|
1738
|
+
Can raise OSError for non seekable files."""
|
1739
|
+
|
1740
|
+
self._checkClosed()
|
1741
|
+
return os.lseek(self._fd, 0, io.SEEK_CUR)
|
1742
|
+
|
1743
|
+
def truncate(self, size=None):
|
1744
|
+
"""
|
1745
|
+
Truncate the file to at most size bytes.
|
1746
|
+
|
1747
|
+
Size defaults to the current file position, as returned by tell(). The current file position is changed to the
|
1748
|
+
value of size.
|
1749
|
+
"""
|
1750
|
+
|
1751
|
+
self._checkClosed()
|
1752
|
+
self._checkWritable()
|
1753
|
+
if size is None:
|
1754
|
+
size = self.tell()
|
1755
|
+
os.ftruncate(self._fd, size)
|
1756
|
+
self._stat_atopen = None
|
1757
|
+
return size
|
1758
|
+
|
1759
|
+
def close(self):
|
1760
|
+
"""
|
1761
|
+
Close the file.
|
1762
|
+
|
1763
|
+
A closed file cannot be used for further I/O operations. close() may be called more than once without error.
|
1764
|
+
"""
|
1765
|
+
|
1766
|
+
if not self.closed:
|
1767
|
+
self._stat_atopen = None
|
1768
|
+
try:
|
1769
|
+
if self._closefd:
|
1770
|
+
os.close(self._fd)
|
1771
|
+
finally:
|
1772
|
+
super().close()
|
1773
|
+
|
1774
|
+
def seekable(self):
|
1775
|
+
"""True if file supports random-access."""
|
1776
|
+
|
1777
|
+
self._checkClosed()
|
1778
|
+
if self._seekable is None:
|
1779
|
+
try:
|
1780
|
+
self.tell()
|
1781
|
+
except OSError:
|
1782
|
+
self._seekable = False
|
1783
|
+
else:
|
1784
|
+
self._seekable = True
|
1785
|
+
return self._seekable
|
1786
|
+
|
1787
|
+
def readable(self):
|
1788
|
+
"""True if file was opened in a read mode."""
|
1789
|
+
|
1790
|
+
self._checkClosed()
|
1791
|
+
return self._readable
|
1792
|
+
|
1793
|
+
def writable(self):
|
1794
|
+
"""True if file was opened in a write mode."""
|
1795
|
+
|
1796
|
+
self._checkClosed()
|
1797
|
+
return self._writable
|
1798
|
+
|
1799
|
+
def fileno(self):
|
1800
|
+
"""Return the underlying file descriptor (an integer)."""
|
1801
|
+
|
1802
|
+
self._checkClosed()
|
1803
|
+
return self._fd
|
1804
|
+
|
1805
|
+
def isatty(self):
|
1806
|
+
"""True if the file is connected to a TTY device."""
|
1807
|
+
|
1808
|
+
self._checkClosed()
|
1809
|
+
return os.isatty(self._fd)
|
1810
|
+
|
1811
|
+
def _isatty_open_only(self):
|
1812
|
+
"""
|
1813
|
+
Checks whether the file is a TTY using an open-only optimization.
|
1814
|
+
|
1815
|
+
TTYs are always character devices. If the interpreter knows a file is not a character device when it would call
|
1816
|
+
``isatty``, can skip that call. Inside ``open()`` there is a fresh stat result that contains that information.
|
1817
|
+
Use the stat result to skip a system call. Outside of that context TOCTOU issues (the fd could be arbitrarily
|
1818
|
+
modified by surrounding code).
|
1819
|
+
"""
|
1820
|
+
|
1821
|
+
if self._stat_atopen is not None and not stat.S_ISCHR(self._stat_atopen.st_mode):
|
1822
|
+
return False
|
1823
|
+
return os.isatty(self._fd)
|
1824
|
+
|
1825
|
+
@property
|
1826
|
+
def closefd(self):
|
1827
|
+
"""True if the file descriptor will be closed by close()."""
|
1828
|
+
|
1829
|
+
return self._closefd
|
1830
|
+
|
1831
|
+
@property
|
1832
|
+
def mode(self):
|
1833
|
+
"""String giving the file mode"""
|
1834
|
+
|
1835
|
+
if self._created:
|
1836
|
+
if self._readable:
|
1837
|
+
return 'xb+'
|
1838
|
+
else:
|
1839
|
+
return 'xb'
|
1840
|
+
elif self._appending:
|
1841
|
+
if self._readable:
|
1842
|
+
return 'ab+'
|
1843
|
+
else:
|
1844
|
+
return 'ab'
|
1845
|
+
elif self._readable:
|
1846
|
+
if self._writable:
|
1847
|
+
return 'rb+'
|
1848
|
+
else:
|
1849
|
+
return 'rb'
|
1850
|
+
else:
|
1851
|
+
return 'wb'
|
1852
|
+
|
1853
|
+
|
1854
|
+
@io.TextIOBase.register
|
1855
|
+
class TextIOBase(IOBase):
|
1856
|
+
"""
|
1857
|
+
Base class for text I/O.
|
1858
|
+
|
1859
|
+
This class provides a character and line based interface to stream I/O.
|
1860
|
+
"""
|
1861
|
+
|
1862
|
+
def read(self, size=-1):
|
1863
|
+
"""
|
1864
|
+
Read at most size characters from stream, where size is an int.
|
1865
|
+
|
1866
|
+
Read from underlying buffer until we have size characters or we hit EOF. If size is negative or omitted, read
|
1867
|
+
until EOF.
|
1868
|
+
|
1869
|
+
Returns a string.
|
1870
|
+
"""
|
1871
|
+
|
1872
|
+
self._unsupported('read')
|
1873
|
+
|
1874
|
+
def write(self, s):
|
1875
|
+
"""Write string s to stream and returning an int."""
|
1876
|
+
|
1877
|
+
self._unsupported('write')
|
1878
|
+
|
1879
|
+
def truncate(self, pos=None):
|
1880
|
+
"""Truncate size to pos, where pos is an int."""
|
1881
|
+
|
1882
|
+
self._unsupported('truncate')
|
1883
|
+
|
1884
|
+
def readline(self):
|
1885
|
+
"""
|
1886
|
+
Read until newline or EOF.
|
1887
|
+
|
1888
|
+
Returns an empty string if EOF is hit immediately.
|
1889
|
+
"""
|
1890
|
+
|
1891
|
+
self._unsupported('readline')
|
1892
|
+
|
1893
|
+
def detach(self):
|
1894
|
+
"""
|
1895
|
+
Separate the underlying buffer from the TextIOBase and return it.
|
1896
|
+
|
1897
|
+
After the underlying buffer has been detached, the TextIO is in an unusable state.
|
1898
|
+
"""
|
1899
|
+
|
1900
|
+
self._unsupported('detach')
|
1901
|
+
|
1902
|
+
@property
|
1903
|
+
def encoding(self):
|
1904
|
+
"""Subclasses should override."""
|
1905
|
+
|
1906
|
+
return None
|
1907
|
+
|
1908
|
+
@property
|
1909
|
+
def newlines(self):
|
1910
|
+
"""
|
1911
|
+
Line endings translated so far.
|
1912
|
+
|
1913
|
+
Only line endings translated during reading are considered.
|
1914
|
+
|
1915
|
+
Subclasses should override.
|
1916
|
+
"""
|
1917
|
+
|
1918
|
+
return None
|
1919
|
+
|
1920
|
+
@property
|
1921
|
+
def errors(self):
|
1922
|
+
"""
|
1923
|
+
Error setting of the decoder or encoder.
|
1924
|
+
|
1925
|
+
Subclasses should override.
|
1926
|
+
"""
|
1927
|
+
|
1928
|
+
return None
|
1929
|
+
|
1930
|
+
|
1931
|
+
class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
|
1932
|
+
r"""
|
1933
|
+
Codec used when reading a file in universal newlines mode. It wraps another incremental decoder, translating \r\n
|
1934
|
+
and \r into \n. It also records the types of newlines encountered. When used with translate=False, it ensures that
|
1935
|
+
the newline sequence is returned in one piece.
|
1936
|
+
"""
|
1937
|
+
|
1938
|
+
def __init__(self, decoder, translate, errors='strict'):
|
1939
|
+
codecs.IncrementalDecoder.__init__(self, errors=errors)
|
1940
|
+
self.translate = translate
|
1941
|
+
self.decoder = decoder
|
1942
|
+
self.seennl = 0
|
1943
|
+
self.pendingcr = False
|
1944
|
+
|
1945
|
+
def decode(self, input, final=False):
|
1946
|
+
# decode input (with the eventual \r from a previous pass)
|
1947
|
+
if self.decoder is None:
|
1948
|
+
output = input
|
1949
|
+
else:
|
1950
|
+
output = self.decoder.decode(input, final=final)
|
1951
|
+
if self.pendingcr and (output or final):
|
1952
|
+
output = '\r' + output
|
1953
|
+
self.pendingcr = False
|
1954
|
+
|
1955
|
+
# retain last \r even when not translating data: then readline() is sure to get \r\n in one pass
|
1956
|
+
if output.endswith('\r') and not final:
|
1957
|
+
output = output[:-1]
|
1958
|
+
self.pendingcr = True
|
1959
|
+
|
1960
|
+
# Record which newlines are read
|
1961
|
+
crlf = output.count('\r\n')
|
1962
|
+
cr = output.count('\r') - crlf
|
1963
|
+
lf = output.count('\n') - crlf
|
1964
|
+
self.seennl |= (lf and self._LF) | (cr and self._CR) | (crlf and self._CRLF)
|
1965
|
+
|
1966
|
+
if self.translate:
|
1967
|
+
if crlf:
|
1968
|
+
output = output.replace('\r\n', '\n')
|
1969
|
+
if cr:
|
1970
|
+
output = output.replace('\r', '\n')
|
1971
|
+
|
1972
|
+
return output
|
1973
|
+
|
1974
|
+
def getstate(self):
|
1975
|
+
if self.decoder is None:
|
1976
|
+
buf = b''
|
1977
|
+
flag = 0
|
1978
|
+
else:
|
1979
|
+
buf, flag = self.decoder.getstate()
|
1980
|
+
flag <<= 1
|
1981
|
+
if self.pendingcr:
|
1982
|
+
flag |= 1
|
1983
|
+
return buf, flag
|
1984
|
+
|
1985
|
+
def setstate(self, state):
|
1986
|
+
buf, flag = state
|
1987
|
+
self.pendingcr = bool(flag & 1)
|
1988
|
+
if self.decoder is not None:
|
1989
|
+
self.decoder.setstate((buf, flag >> 1))
|
1990
|
+
|
1991
|
+
def reset(self):
|
1992
|
+
self.seennl = 0
|
1993
|
+
self.pendingcr = False
|
1994
|
+
if self.decoder is not None:
|
1995
|
+
self.decoder.reset()
|
1996
|
+
|
1997
|
+
_LF = 1
|
1998
|
+
_CR = 2
|
1999
|
+
_CRLF = 4
|
2000
|
+
|
2001
|
+
@property
|
2002
|
+
def newlines(self):
|
2003
|
+
return (
|
2004
|
+
None,
|
2005
|
+
'\n',
|
2006
|
+
'\r',
|
2007
|
+
('\r', '\n'),
|
2008
|
+
'\r\n',
|
2009
|
+
('\n', '\r\n'),
|
2010
|
+
('\r', '\r\n'),
|
2011
|
+
('\r', '\n', '\r\n'),
|
2012
|
+
)[self.seennl]
|
2013
|
+
|
2014
|
+
|
2015
|
+
class TextIOWrapper(TextIOBase):
|
2016
|
+
r"""
|
2017
|
+
Character and line based layer over a BufferedIOBase object, buffer.
|
2018
|
+
|
2019
|
+
encoding gives the name of the encoding that the stream will be decoded or encoded with. It defaults to
|
2020
|
+
locale.getencoding().
|
2021
|
+
|
2022
|
+
errors determines the strictness of encoding and decoding (see the codecs.register) and defaults to "strict".
|
2023
|
+
|
2024
|
+
newline can be None, '', '\n', '\r', or '\r\n'. It controls the handling of line endings. If it is None, universal
|
2025
|
+
newlines is enabled. With this enabled, on input, the lines endings '\n', '\r', or '\r\n' are translated to '\n'
|
2026
|
+
before being returned to the caller. Conversely, on output, '\n' is translated to the system default line separator,
|
2027
|
+
os.linesep. If newline is any other of its legal values, that newline becomes the newline when the file is read and
|
2028
|
+
it is returned untranslated. On output, '\n' is converted to the newline.
|
2029
|
+
|
2030
|
+
If line_buffering is True, a call to flush is implied when a call to write contains a newline character.
|
2031
|
+
"""
|
2032
|
+
|
2033
|
+
_CHUNK_SIZE = 2048
|
2034
|
+
|
2035
|
+
# Initialize _buffer as soon as possible since it's used by __del__() which calls close()
|
2036
|
+
_buffer = None
|
2037
|
+
|
2038
|
+
# The write_through argument has no effect here since this implementation always writes through. The argument is
|
2039
|
+
# present only so that the signature can match the signature of the C version.
|
2040
|
+
def __init__(
|
2041
|
+
self,
|
2042
|
+
buffer,
|
2043
|
+
encoding=None,
|
2044
|
+
errors=None,
|
2045
|
+
newline=None,
|
2046
|
+
line_buffering=False,
|
2047
|
+
write_through=False,
|
2048
|
+
):
|
2049
|
+
self._check_newline(newline)
|
2050
|
+
encoding = text_encoding(encoding)
|
2051
|
+
|
2052
|
+
if encoding == 'locale':
|
2053
|
+
encoding = self._get_locale_encoding()
|
2054
|
+
|
2055
|
+
if not isinstance(encoding, str):
|
2056
|
+
raise ValueError('invalid encoding: %r' % encoding)
|
2057
|
+
|
2058
|
+
if not codecs.lookup(encoding)._is_text_encoding:
|
2059
|
+
msg = (
|
2060
|
+
'%r is not a text encoding; '
|
2061
|
+
'use codecs.open() to handle arbitrary codecs'
|
2062
|
+
)
|
2063
|
+
raise LookupError(msg % encoding)
|
2064
|
+
|
2065
|
+
if errors is None:
|
2066
|
+
errors = 'strict'
|
2067
|
+
else:
|
2068
|
+
if not isinstance(errors, str):
|
2069
|
+
raise ValueError('invalid errors: %r' % errors)
|
2070
|
+
if _CHECK_ERRORS:
|
2071
|
+
codecs.lookup_error(errors)
|
2072
|
+
|
2073
|
+
self._buffer = buffer
|
2074
|
+
self._decoded_chars = '' # buffer for text returned from decoder
|
2075
|
+
self._decoded_chars_used = 0 # offset into _decoded_chars for read()
|
2076
|
+
self._snapshot = None # info for reconstructing decoder state
|
2077
|
+
self._seekable = self._telling = self.buffer.seekable()
|
2078
|
+
self._has_read1 = hasattr(self.buffer, 'read1')
|
2079
|
+
self._configure(encoding, errors, newline, line_buffering, write_through)
|
2080
|
+
|
2081
|
+
def _check_newline(self, newline):
|
2082
|
+
if newline is not None and not isinstance(newline, str):
|
2083
|
+
raise TypeError('illegal newline type: %r' % (type(newline),))
|
2084
|
+
if newline not in (None, '', '\n', '\r', '\r\n'):
|
2085
|
+
raise ValueError('illegal newline value: %r' % (newline,))
|
2086
|
+
|
2087
|
+
def _configure(
|
2088
|
+
self,
|
2089
|
+
encoding=None,
|
2090
|
+
errors=None,
|
2091
|
+
newline=None,
|
2092
|
+
line_buffering=False,
|
2093
|
+
write_through=False,
|
2094
|
+
):
|
2095
|
+
self._encoding = encoding
|
2096
|
+
self._errors = errors
|
2097
|
+
self._encoder = None
|
2098
|
+
self._decoder = None
|
2099
|
+
self._b2cratio = 0.0
|
2100
|
+
|
2101
|
+
self._readuniversal = not newline
|
2102
|
+
self._readtranslate = newline is None
|
2103
|
+
self._readnl = newline
|
2104
|
+
self._writetranslate = newline != ''
|
2105
|
+
self._writenl = newline or os.linesep
|
2106
|
+
|
2107
|
+
self._line_buffering = line_buffering
|
2108
|
+
self._write_through = write_through
|
2109
|
+
|
2110
|
+
# don't write a BOM in the middle of a file
|
2111
|
+
if self._seekable and self.writable():
|
2112
|
+
position = self.buffer.tell()
|
2113
|
+
if position != 0:
|
2114
|
+
try:
|
2115
|
+
self._get_encoder().setstate(0)
|
2116
|
+
except LookupError:
|
2117
|
+
# Sometimes the encoder doesn't exist
|
2118
|
+
pass
|
2119
|
+
|
2120
|
+
# self._snapshot is either None, or a tuple (dec_flags, next_input) where dec_flags is the second (integer) item of
|
2121
|
+
# the decoder state and next_input is the chunk of input bytes that comes next after the snapshot point. We use
|
2122
|
+
# this to reconstruct decoder states in tell().
|
2123
|
+
|
2124
|
+
# Naming convention:
|
2125
|
+
# - "bytes_..." for integer variables that count input bytes
|
2126
|
+
# - "chars_..." for integer variables that count decoded characters
|
2127
|
+
|
2128
|
+
def __repr__(self):
|
2129
|
+
result = f'<{self.__class__.__module__}.{self.__class__.__qualname__}'
|
2130
|
+
try:
|
2131
|
+
name = self.name
|
2132
|
+
except AttributeError:
|
2133
|
+
pass
|
2134
|
+
else:
|
2135
|
+
result += f' name={name!r}'
|
2136
|
+
try:
|
2137
|
+
mode = self.mode
|
2138
|
+
except AttributeError:
|
2139
|
+
pass
|
2140
|
+
else:
|
2141
|
+
result += f' mode={mode!r}'
|
2142
|
+
return result + f' encoding={self.encoding!r}>'
|
2143
|
+
|
2144
|
+
@property
|
2145
|
+
def encoding(self):
|
2146
|
+
return self._encoding
|
2147
|
+
|
2148
|
+
@property
|
2149
|
+
def errors(self):
|
2150
|
+
return self._errors
|
2151
|
+
|
2152
|
+
@property
|
2153
|
+
def line_buffering(self):
|
2154
|
+
return self._line_buffering
|
2155
|
+
|
2156
|
+
@property
|
2157
|
+
def write_through(self):
|
2158
|
+
return self._write_through
|
2159
|
+
|
2160
|
+
@property
|
2161
|
+
def buffer(self):
|
2162
|
+
return self._buffer
|
2163
|
+
|
2164
|
+
def reconfigure(
|
2165
|
+
self,
|
2166
|
+
*,
|
2167
|
+
encoding=None,
|
2168
|
+
errors=None,
|
2169
|
+
newline=Ellipsis,
|
2170
|
+
line_buffering=None,
|
2171
|
+
write_through=None,
|
2172
|
+
):
|
2173
|
+
"""
|
2174
|
+
Reconfigure the text stream with new parameters.
|
2175
|
+
|
2176
|
+
This also flushes the stream.
|
2177
|
+
"""
|
2178
|
+
|
2179
|
+
if self._decoder is not None and (
|
2180
|
+
encoding is not None or errors is not None or newline is not Ellipsis
|
2181
|
+
):
|
2182
|
+
raise UnsupportedOperation(
|
2183
|
+
'It is not possible to set the encoding or newline of stream after the first read',
|
2184
|
+
)
|
2185
|
+
|
2186
|
+
if errors is None:
|
2187
|
+
if encoding is None:
|
2188
|
+
errors = self._errors
|
2189
|
+
else:
|
2190
|
+
errors = 'strict'
|
2191
|
+
elif not isinstance(errors, str):
|
2192
|
+
raise TypeError('invalid errors: %r' % errors)
|
2193
|
+
|
2194
|
+
if encoding is None:
|
2195
|
+
encoding = self._encoding
|
2196
|
+
else:
|
2197
|
+
if not isinstance(encoding, str):
|
2198
|
+
raise TypeError('invalid encoding: %r' % encoding)
|
2199
|
+
if encoding == 'locale':
|
2200
|
+
encoding = self._get_locale_encoding()
|
2201
|
+
|
2202
|
+
if newline is Ellipsis:
|
2203
|
+
newline = self._readnl
|
2204
|
+
self._check_newline(newline)
|
2205
|
+
|
2206
|
+
if line_buffering is None:
|
2207
|
+
line_buffering = self.line_buffering
|
2208
|
+
if write_through is None:
|
2209
|
+
write_through = self.write_through
|
2210
|
+
|
2211
|
+
self.flush()
|
2212
|
+
self._configure(encoding, errors, newline, line_buffering, write_through)
|
2213
|
+
|
2214
|
+
def seekable(self):
|
2215
|
+
if self.closed:
|
2216
|
+
raise ValueError('I/O operation on closed file.')
|
2217
|
+
return self._seekable
|
2218
|
+
|
2219
|
+
def readable(self):
|
2220
|
+
return self.buffer.readable()
|
2221
|
+
|
2222
|
+
def writable(self):
|
2223
|
+
return self.buffer.writable()
|
2224
|
+
|
2225
|
+
def flush(self):
|
2226
|
+
self.buffer.flush()
|
2227
|
+
self._telling = self._seekable
|
2228
|
+
|
2229
|
+
def close(self):
|
2230
|
+
if self.buffer is not None and not self.closed:
|
2231
|
+
try:
|
2232
|
+
self.flush()
|
2233
|
+
finally:
|
2234
|
+
self.buffer.close()
|
2235
|
+
|
2236
|
+
@property
|
2237
|
+
def closed(self):
|
2238
|
+
return self.buffer.closed
|
2239
|
+
|
2240
|
+
@property
|
2241
|
+
def name(self):
|
2242
|
+
return self.buffer.name
|
2243
|
+
|
2244
|
+
def fileno(self):
|
2245
|
+
return self.buffer.fileno()
|
2246
|
+
|
2247
|
+
def isatty(self):
|
2248
|
+
return self.buffer.isatty()
|
2249
|
+
|
2250
|
+
def write(self, s):
|
2251
|
+
"""Write data, where s is a str"""
|
2252
|
+
|
2253
|
+
if self.closed:
|
2254
|
+
raise ValueError('write to closed file')
|
2255
|
+
if not isinstance(s, str):
|
2256
|
+
raise TypeError("can't write %s to text stream" % s.__class__.__name__)
|
2257
|
+
length = len(s)
|
2258
|
+
haslf = (self._writetranslate or self._line_buffering) and '\n' in s
|
2259
|
+
if haslf and self._writetranslate and self._writenl != '\n':
|
2260
|
+
s = s.replace('\n', self._writenl)
|
2261
|
+
encoder = self._encoder or self._get_encoder()
|
2262
|
+
# XXX What if we were just reading?
|
2263
|
+
b = encoder.encode(s)
|
2264
|
+
self.buffer.write(b)
|
2265
|
+
if self._line_buffering and (haslf or '\r' in s):
|
2266
|
+
self.flush()
|
2267
|
+
if self._snapshot is not None:
|
2268
|
+
self._set_decoded_chars('')
|
2269
|
+
self._snapshot = None
|
2270
|
+
if self._decoder:
|
2271
|
+
self._decoder.reset()
|
2272
|
+
return length
|
2273
|
+
|
2274
|
+
def _get_encoder(self):
|
2275
|
+
make_encoder = codecs.getincrementalencoder(self._encoding)
|
2276
|
+
self._encoder = make_encoder(self._errors)
|
2277
|
+
return self._encoder
|
2278
|
+
|
2279
|
+
def _get_decoder(self):
|
2280
|
+
make_decoder = codecs.getincrementaldecoder(self._encoding)
|
2281
|
+
decoder = make_decoder(self._errors)
|
2282
|
+
if self._readuniversal:
|
2283
|
+
decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
|
2284
|
+
self._decoder = decoder
|
2285
|
+
return decoder
|
2286
|
+
|
2287
|
+
# The following three methods implement an ADT for _decoded_chars. Text returned from the decoder is buffered here
|
2288
|
+
# until the client requests it by calling our read() or readline() method.
|
2289
|
+
def _set_decoded_chars(self, chars):
|
2290
|
+
"""Set the _decoded_chars buffer."""
|
2291
|
+
|
2292
|
+
self._decoded_chars = chars
|
2293
|
+
self._decoded_chars_used = 0
|
2294
|
+
|
2295
|
+
def _get_decoded_chars(self, n=None):
|
2296
|
+
"""Advance into the _decoded_chars buffer."""
|
2297
|
+
|
2298
|
+
offset = self._decoded_chars_used
|
2299
|
+
if n is None:
|
2300
|
+
chars = self._decoded_chars[offset:]
|
2301
|
+
else:
|
2302
|
+
chars = self._decoded_chars[offset: offset + n]
|
2303
|
+
self._decoded_chars_used += len(chars)
|
2304
|
+
return chars
|
2305
|
+
|
2306
|
+
def _get_locale_encoding(self):
|
2307
|
+
return locale.getencoding()
|
2308
|
+
|
2309
|
+
def _rewind_decoded_chars(self, n):
|
2310
|
+
"""Rewind the _decoded_chars buffer."""
|
2311
|
+
|
2312
|
+
if self._decoded_chars_used < n:
|
2313
|
+
raise AssertionError('rewind decoded_chars out of bounds')
|
2314
|
+
self._decoded_chars_used -= n
|
2315
|
+
|
2316
|
+
def _read_chunk(self):
|
2317
|
+
"""Read and decode the next chunk of data from the BufferedReader."""
|
2318
|
+
|
2319
|
+
# The return value is True unless EOF was reached. The decoded string is placed in self._decoded_chars
|
2320
|
+
# (replacing its previous value). The entire input chunk is sent to the decoder, though some of it may remain
|
2321
|
+
# buffered in the decoder, yet to be converted.
|
2322
|
+
|
2323
|
+
if self._decoder is None:
|
2324
|
+
raise ValueError('no decoder')
|
2325
|
+
|
2326
|
+
if self._telling:
|
2327
|
+
# To prepare for tell(), we need to snapshot a point in the file where the decoder's input buffer is empty.
|
2328
|
+
|
2329
|
+
dec_buffer, dec_flags = self._decoder.getstate()
|
2330
|
+
|
2331
|
+
# Given this, we know there was a valid snapshot point len(dec_buffer) bytes ago with decoder state (b'',
|
2332
|
+
# dec_flags).
|
2333
|
+
|
2334
|
+
# Read a chunk, decode it, and put the result in self._decoded_chars.
|
2335
|
+
if self._has_read1:
|
2336
|
+
input_chunk = self.buffer.read1(self._CHUNK_SIZE)
|
2337
|
+
else:
|
2338
|
+
input_chunk = self.buffer.read(self._CHUNK_SIZE)
|
2339
|
+
eof = not input_chunk
|
2340
|
+
decoded_chars = self._decoder.decode(input_chunk, eof)
|
2341
|
+
self._set_decoded_chars(decoded_chars)
|
2342
|
+
if decoded_chars:
|
2343
|
+
self._b2cratio = len(input_chunk) / len(self._decoded_chars)
|
2344
|
+
else:
|
2345
|
+
self._b2cratio = 0.0
|
2346
|
+
|
2347
|
+
if self._telling:
|
2348
|
+
# At the snapshot point, len(dec_buffer) bytes before the read, the next input to be decoded is dec_buffer +
|
2349
|
+
# input_chunk.
|
2350
|
+
self._snapshot = (dec_flags, dec_buffer + input_chunk)
|
2351
|
+
|
2352
|
+
return not eof
|
2353
|
+
|
2354
|
+
def _pack_cookie(
|
2355
|
+
self,
|
2356
|
+
position,
|
2357
|
+
dec_flags=0,
|
2358
|
+
bytes_to_feed=0,
|
2359
|
+
need_eof=False,
|
2360
|
+
chars_to_skip=0,
|
2361
|
+
):
|
2362
|
+
# The meaning of a tell() cookie is: seek to position, set the decoder flags to dec_flags, read bytes_to_feed
|
2363
|
+
# bytes, feed them into the decoder with need_eof as the EOF flag, then skip chars_to_skip characters of the
|
2364
|
+
# decoded result. For most simple decoders, tell() will often just give a byte offset in the file.
|
2365
|
+
return (
|
2366
|
+
position
|
2367
|
+
| (dec_flags << 64)
|
2368
|
+
| (bytes_to_feed << 128)
|
2369
|
+
| (chars_to_skip << 192)
|
2370
|
+
| bool(need_eof) << 256
|
2371
|
+
)
|
2372
|
+
|
2373
|
+
def _unpack_cookie(self, bigint):
|
2374
|
+
rest, position = divmod(bigint, 1 << 64)
|
2375
|
+
rest, dec_flags = divmod(rest, 1 << 64)
|
2376
|
+
rest, bytes_to_feed = divmod(rest, 1 << 64)
|
2377
|
+
need_eof, chars_to_skip = divmod(rest, 1 << 64)
|
2378
|
+
return position, dec_flags, bytes_to_feed, bool(need_eof), chars_to_skip
|
2379
|
+
|
2380
|
+
def tell(self):
|
2381
|
+
if not self._seekable:
|
2382
|
+
raise UnsupportedOperation('underlying stream is not seekable')
|
2383
|
+
if not self._telling:
|
2384
|
+
raise OSError('telling position disabled by next() call')
|
2385
|
+
self.flush()
|
2386
|
+
position = self.buffer.tell()
|
2387
|
+
decoder = self._decoder
|
2388
|
+
if decoder is None or self._snapshot is None:
|
2389
|
+
if self._decoded_chars:
|
2390
|
+
# This should never happen.
|
2391
|
+
raise AssertionError('pending decoded text')
|
2392
|
+
return position
|
2393
|
+
|
2394
|
+
# Skip backward to the snapshot point (see _read_chunk).
|
2395
|
+
dec_flags, next_input = self._snapshot
|
2396
|
+
position -= len(next_input)
|
2397
|
+
|
2398
|
+
# How many decoded characters have been used up since the snapshot?
|
2399
|
+
chars_to_skip = self._decoded_chars_used
|
2400
|
+
if chars_to_skip == 0:
|
2401
|
+
# We haven't moved from the snapshot point.
|
2402
|
+
return self._pack_cookie(position, dec_flags)
|
2403
|
+
|
2404
|
+
# Starting from the snapshot position, we will walk the decoder forward until it gives us enough decoded
|
2405
|
+
# characters.
|
2406
|
+
saved_state = decoder.getstate()
|
2407
|
+
try:
|
2408
|
+
# Fast search for an acceptable start point, close to our current pos.
|
2409
|
+
# Rationale: calling decoder.decode() has a large overhead regardless of chunk size; we want the number of
|
2410
|
+
# such calls to be O(1) in most situations (common decoders, sensible input).
|
2411
|
+
# Actually, it will be exactly 1 for fixed-size codecs (all 8-bit codecs, also UTF-16 and UTF-32).
|
2412
|
+
skip_bytes = int(self._b2cratio * chars_to_skip)
|
2413
|
+
skip_back = 1
|
2414
|
+
assert skip_bytes <= len(next_input)
|
2415
|
+
while skip_bytes > 0:
|
2416
|
+
decoder.setstate((b'', dec_flags))
|
2417
|
+
# Decode up to temptative start point
|
2418
|
+
n = len(decoder.decode(next_input[:skip_bytes]))
|
2419
|
+
if n <= chars_to_skip:
|
2420
|
+
b, d = decoder.getstate()
|
2421
|
+
if not b:
|
2422
|
+
# Before pos and no bytes buffered in decoder => OK
|
2423
|
+
dec_flags = d
|
2424
|
+
chars_to_skip -= n
|
2425
|
+
break
|
2426
|
+
# Skip back by buffered amount and reset heuristic
|
2427
|
+
skip_bytes -= len(b)
|
2428
|
+
skip_back = 1
|
2429
|
+
else:
|
2430
|
+
# We're too far ahead, skip back a bit
|
2431
|
+
skip_bytes -= skip_back
|
2432
|
+
skip_back = skip_back * 2
|
2433
|
+
else:
|
2434
|
+
skip_bytes = 0
|
2435
|
+
decoder.setstate((b'', dec_flags))
|
2436
|
+
|
2437
|
+
# Note our initial start point.
|
2438
|
+
start_pos = position + skip_bytes
|
2439
|
+
start_flags = dec_flags
|
2440
|
+
if chars_to_skip == 0:
|
2441
|
+
# We haven't moved from the start point.
|
2442
|
+
return self._pack_cookie(start_pos, start_flags)
|
2443
|
+
|
2444
|
+
# Feed the decoder one byte at a time. As we go, note the nearest "safe start point" before the current
|
2445
|
+
# location (a point where the decoder has nothing buffered, so seek() can safely start from there and
|
2446
|
+
# advance to this location).
|
2447
|
+
bytes_fed = 0
|
2448
|
+
need_eof = False
|
2449
|
+
# Chars decoded since `start_pos`
|
2450
|
+
chars_decoded = 0
|
2451
|
+
for i in range(skip_bytes, len(next_input)):
|
2452
|
+
bytes_fed += 1
|
2453
|
+
chars_decoded += len(decoder.decode(next_input[i: i + 1]))
|
2454
|
+
dec_buffer, dec_flags = decoder.getstate()
|
2455
|
+
if not dec_buffer and chars_decoded <= chars_to_skip:
|
2456
|
+
# Decoder buffer is empty, so this is a safe start point.
|
2457
|
+
start_pos += bytes_fed
|
2458
|
+
chars_to_skip -= chars_decoded
|
2459
|
+
start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
|
2460
|
+
if chars_decoded >= chars_to_skip:
|
2461
|
+
break
|
2462
|
+
else:
|
2463
|
+
# We didn't get enough decoded data; signal EOF to get more.
|
2464
|
+
chars_decoded += len(decoder.decode(b'', final=True))
|
2465
|
+
need_eof = True
|
2466
|
+
if chars_decoded < chars_to_skip:
|
2467
|
+
raise OSError("can't reconstruct logical file position")
|
2468
|
+
|
2469
|
+
# The returned cookie corresponds to the last safe start point.
|
2470
|
+
return self._pack_cookie(
|
2471
|
+
start_pos,
|
2472
|
+
start_flags,
|
2473
|
+
bytes_fed,
|
2474
|
+
need_eof,
|
2475
|
+
chars_to_skip,
|
2476
|
+
)
|
2477
|
+
finally:
|
2478
|
+
decoder.setstate(saved_state)
|
2479
|
+
|
2480
|
+
def truncate(self, pos=None):
|
2481
|
+
self.flush()
|
2482
|
+
if pos is None:
|
2483
|
+
pos = self.tell()
|
2484
|
+
return self.buffer.truncate(pos)
|
2485
|
+
|
2486
|
+
def detach(self):
|
2487
|
+
if self.buffer is None:
|
2488
|
+
raise ValueError('buffer is already detached')
|
2489
|
+
self.flush()
|
2490
|
+
buffer = self._buffer
|
2491
|
+
self._buffer = None
|
2492
|
+
return buffer
|
2493
|
+
|
2494
|
+
def seek(self, cookie, whence=0):
|
2495
|
+
def _reset_encoder(position):
|
2496
|
+
"""Reset the encoder (merely useful for proper BOM handling)"""
|
2497
|
+
|
2498
|
+
try:
|
2499
|
+
encoder = self._encoder or self._get_encoder()
|
2500
|
+
except LookupError:
|
2501
|
+
# Sometimes the encoder doesn't exist
|
2502
|
+
pass
|
2503
|
+
else:
|
2504
|
+
if position != 0:
|
2505
|
+
encoder.setstate(0)
|
2506
|
+
else:
|
2507
|
+
encoder.reset()
|
2508
|
+
|
2509
|
+
if self.closed:
|
2510
|
+
raise ValueError('tell on closed file')
|
2511
|
+
if not self._seekable:
|
2512
|
+
raise UnsupportedOperation('underlying stream is not seekable')
|
2513
|
+
if whence == io.SEEK_CUR:
|
2514
|
+
if cookie != 0:
|
2515
|
+
raise UnsupportedOperation("can't do nonzero cur-relative seeks")
|
2516
|
+
# Seeking to the current position should attempt to sync the underlying buffer with the current position.
|
2517
|
+
whence = 0
|
2518
|
+
cookie = self.tell()
|
2519
|
+
elif whence == io.SEEK_END:
|
2520
|
+
if cookie != 0:
|
2521
|
+
raise UnsupportedOperation("can't do nonzero end-relative seeks")
|
2522
|
+
self.flush()
|
2523
|
+
position = self.buffer.seek(0, whence)
|
2524
|
+
self._set_decoded_chars('')
|
2525
|
+
self._snapshot = None
|
2526
|
+
if self._decoder:
|
2527
|
+
self._decoder.reset()
|
2528
|
+
_reset_encoder(position)
|
2529
|
+
return position
|
2530
|
+
if whence != 0:
|
2531
|
+
raise ValueError('unsupported whence (%r)' % (whence,))
|
2532
|
+
if cookie < 0:
|
2533
|
+
raise ValueError('negative seek position %r' % (cookie,))
|
2534
|
+
self.flush()
|
2535
|
+
|
2536
|
+
# The strategy of seek() is to go back to the safe start point and replay the effect of read(chars_to_skip) from
|
2537
|
+
# there.
|
2538
|
+
start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = self._unpack_cookie(cookie)
|
2539
|
+
|
2540
|
+
# Seek back to the safe start point.
|
2541
|
+
self.buffer.seek(start_pos)
|
2542
|
+
self._set_decoded_chars('')
|
2543
|
+
self._snapshot = None
|
2544
|
+
|
2545
|
+
# Restore the decoder to its state from the safe start point.
|
2546
|
+
if cookie == 0 and self._decoder:
|
2547
|
+
self._decoder.reset()
|
2548
|
+
elif self._decoder or dec_flags or chars_to_skip:
|
2549
|
+
self._decoder = self._decoder or self._get_decoder()
|
2550
|
+
self._decoder.setstate((b'', dec_flags))
|
2551
|
+
self._snapshot = (dec_flags, b'')
|
2552
|
+
|
2553
|
+
if chars_to_skip:
|
2554
|
+
# Just like _read_chunk, feed the decoder and save a snapshot.
|
2555
|
+
input_chunk = self.buffer.read(bytes_to_feed)
|
2556
|
+
self._set_decoded_chars(self._decoder.decode(input_chunk, need_eof))
|
2557
|
+
self._snapshot = (dec_flags, input_chunk)
|
2558
|
+
|
2559
|
+
# Skip chars_to_skip of the decoded characters.
|
2560
|
+
if len(self._decoded_chars) < chars_to_skip:
|
2561
|
+
raise OSError("can't restore logical file position")
|
2562
|
+
self._decoded_chars_used = chars_to_skip
|
2563
|
+
|
2564
|
+
_reset_encoder(cookie)
|
2565
|
+
return cookie
|
2566
|
+
|
2567
|
+
def read(self, size=None):
|
2568
|
+
self._checkReadable()
|
2569
|
+
|
2570
|
+
if size is None:
|
2571
|
+
size = -1
|
2572
|
+
else:
|
2573
|
+
try:
|
2574
|
+
size_index = size.__index__
|
2575
|
+
except AttributeError:
|
2576
|
+
raise TypeError(f'{size!r} is not an integer')
|
2577
|
+
else:
|
2578
|
+
size = size_index()
|
2579
|
+
|
2580
|
+
decoder = self._decoder or self._get_decoder()
|
2581
|
+
|
2582
|
+
if size < 0:
|
2583
|
+
# Read everything.
|
2584
|
+
result = self._get_decoded_chars() + decoder.decode(self.buffer.read(), final=True)
|
2585
|
+
if self._snapshot is not None:
|
2586
|
+
self._set_decoded_chars('')
|
2587
|
+
self._snapshot = None
|
2588
|
+
return result
|
2589
|
+
|
2590
|
+
else:
|
2591
|
+
# Keep reading chunks until we have size characters to return.
|
2592
|
+
eof = False
|
2593
|
+
result = self._get_decoded_chars(size)
|
2594
|
+
while len(result) < size and not eof:
|
2595
|
+
eof = not self._read_chunk()
|
2596
|
+
result += self._get_decoded_chars(size - len(result))
|
2597
|
+
return result
|
2598
|
+
|
2599
|
+
def __next__(self):
|
2600
|
+
self._telling = False
|
2601
|
+
line = self.readline()
|
2602
|
+
if not line:
|
2603
|
+
self._snapshot = None
|
2604
|
+
self._telling = self._seekable
|
2605
|
+
raise StopIteration
|
2606
|
+
return line
|
2607
|
+
|
2608
|
+
def readline(self, size=None):
|
2609
|
+
if self.closed:
|
2610
|
+
raise ValueError('read from closed file')
|
2611
|
+
if size is None:
|
2612
|
+
size = -1
|
2613
|
+
else:
|
2614
|
+
try:
|
2615
|
+
size_index = size.__index__
|
2616
|
+
except AttributeError:
|
2617
|
+
raise TypeError(f'{size!r} is not an integer')
|
2618
|
+
else:
|
2619
|
+
size = size_index()
|
2620
|
+
|
2621
|
+
# Grab all the decoded text (we will rewind any extra bits later).
|
2622
|
+
line = self._get_decoded_chars()
|
2623
|
+
|
2624
|
+
start = 0
|
2625
|
+
# Make the decoder if it doesn't already exist.
|
2626
|
+
if not self._decoder:
|
2627
|
+
self._get_decoder()
|
2628
|
+
|
2629
|
+
pos = endpos = None
|
2630
|
+
while True:
|
2631
|
+
if self._readtranslate:
|
2632
|
+
# Newlines are already translated, only search for \n
|
2633
|
+
pos = line.find('\n', start)
|
2634
|
+
if pos >= 0:
|
2635
|
+
endpos = pos + 1
|
2636
|
+
break
|
2637
|
+
else:
|
2638
|
+
start = len(line)
|
2639
|
+
|
2640
|
+
elif self._readuniversal:
|
2641
|
+
# Universal newline search. Find any of \r, \r\n, \n
|
2642
|
+
# The decoder ensures that \r\n are not split in two pieces
|
2643
|
+
|
2644
|
+
# In C we'd look for these in parallel of course.
|
2645
|
+
nlpos = line.find('\n', start)
|
2646
|
+
crpos = line.find('\r', start)
|
2647
|
+
if crpos == -1:
|
2648
|
+
if nlpos == -1:
|
2649
|
+
# Nothing found
|
2650
|
+
start = len(line)
|
2651
|
+
else:
|
2652
|
+
# Found \n
|
2653
|
+
endpos = nlpos + 1
|
2654
|
+
break
|
2655
|
+
elif nlpos == -1:
|
2656
|
+
# Found lone \r
|
2657
|
+
endpos = crpos + 1
|
2658
|
+
break
|
2659
|
+
elif nlpos < crpos:
|
2660
|
+
# Found \n
|
2661
|
+
endpos = nlpos + 1
|
2662
|
+
break
|
2663
|
+
elif nlpos == crpos + 1:
|
2664
|
+
# Found \r\n
|
2665
|
+
endpos = crpos + 2
|
2666
|
+
break
|
2667
|
+
else:
|
2668
|
+
# Found \r
|
2669
|
+
endpos = crpos + 1
|
2670
|
+
break
|
2671
|
+
else:
|
2672
|
+
# non-universal
|
2673
|
+
pos = line.find(self._readnl)
|
2674
|
+
if pos >= 0:
|
2675
|
+
endpos = pos + len(self._readnl)
|
2676
|
+
break
|
2677
|
+
|
2678
|
+
if size >= 0 and len(line) >= size:
|
2679
|
+
endpos = size # reached length size
|
2680
|
+
break
|
2681
|
+
|
2682
|
+
# No line ending seen yet - get more data'
|
2683
|
+
while self._read_chunk():
|
2684
|
+
if self._decoded_chars:
|
2685
|
+
break
|
2686
|
+
if self._decoded_chars:
|
2687
|
+
line += self._get_decoded_chars()
|
2688
|
+
else:
|
2689
|
+
# end of file
|
2690
|
+
self._set_decoded_chars('')
|
2691
|
+
self._snapshot = None
|
2692
|
+
return line
|
2693
|
+
|
2694
|
+
if size >= 0 and endpos > size:
|
2695
|
+
endpos = size # don't exceed size
|
2696
|
+
|
2697
|
+
# Rewind _decoded_chars to just after the line ending we found.
|
2698
|
+
self._rewind_decoded_chars(len(line) - endpos)
|
2699
|
+
return line[:endpos]
|
2700
|
+
|
2701
|
+
@property
|
2702
|
+
def newlines(self):
|
2703
|
+
return self._decoder.newlines if self._decoder else None
|
2704
|
+
|
2705
|
+
|
2706
|
+
class StringIO(TextIOWrapper):
|
2707
|
+
"""
|
2708
|
+
Text I/O implementation using an in-memory buffer.
|
2709
|
+
|
2710
|
+
The initial_value argument sets the value of object. The newline argument is like the one of TextIOWrapper's
|
2711
|
+
constructor.
|
2712
|
+
"""
|
2713
|
+
|
2714
|
+
def __init__(self, initial_value='', newline='\n'):
|
2715
|
+
super().__init__(
|
2716
|
+
BytesIO(),
|
2717
|
+
encoding='utf-8',
|
2718
|
+
errors='surrogatepass',
|
2719
|
+
newline=newline,
|
2720
|
+
)
|
2721
|
+
|
2722
|
+
# Issue #5645: make universal newlines semantics the same as in the C version, even under Windows.
|
2723
|
+
if newline is None:
|
2724
|
+
self._writetranslate = False
|
2725
|
+
if initial_value is not None:
|
2726
|
+
if not isinstance(initial_value, str):
|
2727
|
+
raise TypeError(
|
2728
|
+
f'initial_value must be str or None, not {type(initial_value).__name__}',
|
2729
|
+
)
|
2730
|
+
self.write(initial_value)
|
2731
|
+
self.seek(0)
|
2732
|
+
|
2733
|
+
def getvalue(self):
|
2734
|
+
self.flush()
|
2735
|
+
decoder = self._decoder or self._get_decoder()
|
2736
|
+
old_state = decoder.getstate()
|
2737
|
+
decoder.reset()
|
2738
|
+
try:
|
2739
|
+
return decoder.decode(self.buffer.getvalue(), final=True)
|
2740
|
+
finally:
|
2741
|
+
decoder.setstate(old_state)
|
2742
|
+
|
2743
|
+
def __repr__(self):
|
2744
|
+
# TextIOWrapper tells the encoding in its repr. In StringIO, that's an implementation detail.
|
2745
|
+
return object.__repr__(self)
|
2746
|
+
|
2747
|
+
@property
|
2748
|
+
def errors(self):
|
2749
|
+
return None
|
2750
|
+
|
2751
|
+
@property
|
2752
|
+
def encoding(self):
|
2753
|
+
return None
|
2754
|
+
|
2755
|
+
def detach(self):
|
2756
|
+
# This doesn't make sense on StringIO.
|
2757
|
+
self._unsupported('detach')
|