megfile 3.1.1__py3-none-any.whl → 3.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/conf.py +2 -4
- megfile/__init__.py +394 -203
- megfile/cli.py +258 -238
- megfile/config.py +25 -21
- megfile/errors.py +124 -114
- megfile/fs.py +174 -140
- megfile/fs_path.py +462 -354
- megfile/hdfs.py +133 -101
- megfile/hdfs_path.py +290 -236
- megfile/http.py +15 -14
- megfile/http_path.py +111 -107
- megfile/interfaces.py +70 -65
- megfile/lib/base_prefetch_reader.py +84 -65
- megfile/lib/combine_reader.py +12 -12
- megfile/lib/compare.py +17 -13
- megfile/lib/compat.py +1 -5
- megfile/lib/fnmatch.py +29 -30
- megfile/lib/glob.py +46 -54
- megfile/lib/hdfs_prefetch_reader.py +40 -25
- megfile/lib/hdfs_tools.py +1 -3
- megfile/lib/http_prefetch_reader.py +69 -46
- megfile/lib/joinpath.py +5 -5
- megfile/lib/lazy_handler.py +7 -3
- megfile/lib/s3_buffered_writer.py +58 -51
- megfile/lib/s3_cached_handler.py +13 -14
- megfile/lib/s3_limited_seekable_writer.py +37 -28
- megfile/lib/s3_memory_handler.py +34 -30
- megfile/lib/s3_pipe_handler.py +24 -25
- megfile/lib/s3_prefetch_reader.py +71 -52
- megfile/lib/s3_share_cache_reader.py +37 -24
- megfile/lib/shadow_handler.py +7 -3
- megfile/lib/stdio_handler.py +9 -8
- megfile/lib/url.py +3 -3
- megfile/pathlike.py +259 -228
- megfile/s3.py +220 -153
- megfile/s3_path.py +977 -802
- megfile/sftp.py +190 -156
- megfile/sftp_path.py +540 -450
- megfile/smart.py +397 -330
- megfile/smart_path.py +100 -105
- megfile/stdio.py +10 -9
- megfile/stdio_path.py +32 -35
- megfile/utils/__init__.py +73 -54
- megfile/utils/mutex.py +11 -14
- megfile/version.py +1 -1
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/METADATA +5 -8
- megfile-3.1.2.dist-info/RECORD +55 -0
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/WHEEL +1 -1
- scripts/convert_results_to_sarif.py +45 -78
- scripts/generate_file.py +140 -64
- megfile-3.1.1.dist-info/RECORD +0 -55
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE +0 -0
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE.pyre +0 -0
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/entry_points.txt +0 -0
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/top_level.txt +0 -0
megfile/interfaces.py
CHANGED
|
@@ -3,7 +3,16 @@ from abc import ABC, abstractmethod
|
|
|
3
3
|
from io import IOBase, UnsupportedOperation
|
|
4
4
|
from typing import IO, AnyStr, Iterable, List, Optional
|
|
5
5
|
|
|
6
|
-
from megfile.pathlike import
|
|
6
|
+
from megfile.pathlike import (
|
|
7
|
+
Access,
|
|
8
|
+
BasePath,
|
|
9
|
+
BaseURIPath,
|
|
10
|
+
FileEntry,
|
|
11
|
+
PathLike,
|
|
12
|
+
Self,
|
|
13
|
+
StatResult,
|
|
14
|
+
URIPath,
|
|
15
|
+
)
|
|
7
16
|
|
|
8
17
|
__all__ = [
|
|
9
18
|
"Access",
|
|
@@ -28,33 +37,32 @@ __all__ = [
|
|
|
28
37
|
def fullname(o):
|
|
29
38
|
klass = o.__class__
|
|
30
39
|
module = klass.__module__
|
|
31
|
-
if module ==
|
|
40
|
+
if module == "builtins":
|
|
32
41
|
return klass.__qualname__ # avoid outputs like 'builtins.str'
|
|
33
|
-
return module +
|
|
42
|
+
return module + "." + klass.__qualname__
|
|
34
43
|
|
|
35
44
|
|
|
36
45
|
# 1. Default value of closed is False
|
|
37
46
|
# 2. closed is set to True when close() are called
|
|
38
47
|
# 3. close() will only be called once
|
|
39
48
|
class Closable(ABC):
|
|
40
|
-
|
|
41
49
|
@property
|
|
42
50
|
def closed(self) -> bool:
|
|
43
|
-
|
|
44
|
-
return getattr(self,
|
|
51
|
+
"""Return True if the file-like object is closed."""
|
|
52
|
+
return getattr(self, "__closed__", False)
|
|
45
53
|
|
|
46
54
|
@abstractmethod
|
|
47
55
|
def _close(self) -> None:
|
|
48
56
|
pass # pragma: no cover
|
|
49
57
|
|
|
50
58
|
def close(self) -> None:
|
|
51
|
-
|
|
59
|
+
"""Flush and close the file-like object.
|
|
52
60
|
|
|
53
61
|
This method has no effect if the file is already closed.
|
|
54
|
-
|
|
55
|
-
if not getattr(self,
|
|
62
|
+
"""
|
|
63
|
+
if not getattr(self, "__closed__", False):
|
|
56
64
|
self._close()
|
|
57
|
-
setattr(self,
|
|
65
|
+
setattr(self, "__closed__", True)
|
|
58
66
|
|
|
59
67
|
def __enter__(self: Self) -> Self:
|
|
60
68
|
return self
|
|
@@ -64,23 +72,25 @@ class Closable(ABC):
|
|
|
64
72
|
|
|
65
73
|
|
|
66
74
|
class FileLike(Closable, IOBase, IO[AnyStr], ABC): # pytype: disable=signature-mismatch
|
|
67
|
-
|
|
68
75
|
def fileno(self) -> int:
|
|
69
|
-
raise UnsupportedOperation(
|
|
76
|
+
raise UnsupportedOperation("not a local file")
|
|
70
77
|
|
|
71
78
|
def isatty(self) -> bool:
|
|
72
79
|
return False
|
|
73
80
|
|
|
74
81
|
def __repr__(self) -> str:
|
|
75
|
-
return
|
|
76
|
-
fullname(self),
|
|
82
|
+
return "<%s name=%r mode=%r>" % (
|
|
83
|
+
fullname(self),
|
|
84
|
+
self.name,
|
|
85
|
+
self.mode,
|
|
86
|
+
) # pragma: no cover
|
|
77
87
|
|
|
78
88
|
def seekable(self) -> bool:
|
|
79
|
-
|
|
89
|
+
"""Return True if the file-like object can be sought."""
|
|
80
90
|
return False
|
|
81
91
|
|
|
82
92
|
def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
|
|
83
|
-
|
|
93
|
+
"""Change stream position.
|
|
84
94
|
|
|
85
95
|
Seek to byte `offset` relative to position indicated by `whence`:
|
|
86
96
|
0 Start of stream (the default). `offset` should be >= 0;
|
|
@@ -88,37 +98,37 @@ class FileLike(Closable, IOBase, IO[AnyStr], ABC): # pytype: disable=signature-
|
|
|
88
98
|
2 End of stream - `offset` usually negative.
|
|
89
99
|
|
|
90
100
|
Return the new absolute position.
|
|
91
|
-
|
|
92
|
-
raise UnsupportedOperation(
|
|
101
|
+
"""
|
|
102
|
+
raise UnsupportedOperation("not seekable") # pragma: no cover
|
|
93
103
|
|
|
94
104
|
def readable(self) -> bool:
|
|
95
|
-
|
|
105
|
+
"""Return True if the file-like object can be read."""
|
|
96
106
|
return False # pragma: no cover
|
|
97
107
|
|
|
98
108
|
def writable(self) -> bool:
|
|
99
|
-
|
|
109
|
+
"""Return True if the file-like object can be written."""
|
|
100
110
|
return False
|
|
101
111
|
|
|
102
112
|
def flush(self) -> None:
|
|
103
|
-
|
|
113
|
+
"""Flush write buffers, if applicable.
|
|
104
114
|
|
|
105
115
|
This is not implemented for read-only and non-blocking streams.
|
|
106
|
-
|
|
116
|
+
"""
|
|
107
117
|
|
|
108
118
|
def __del__(self) -> None:
|
|
109
|
-
# TODO: Next version should turn on __del__ for auto closing,
|
|
119
|
+
# TODO: Next version should turn on __del__ for auto closing,
|
|
120
|
+
# and disable this in child class like CombineReader
|
|
110
121
|
pass
|
|
111
122
|
|
|
112
123
|
|
|
113
124
|
class Seekable(FileLike, ABC):
|
|
114
|
-
|
|
115
125
|
def seekable(self) -> bool:
|
|
116
|
-
|
|
126
|
+
"""Return True if the file-like object can be sought."""
|
|
117
127
|
return True
|
|
118
128
|
|
|
119
129
|
@abstractmethod
|
|
120
130
|
def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
|
|
121
|
-
|
|
131
|
+
"""Change stream position.
|
|
122
132
|
|
|
123
133
|
Seek to byte offset `cookie` relative to position indicated by `whence`:
|
|
124
134
|
0 Start of stream (the default). `cookie` should be >= 0;
|
|
@@ -126,42 +136,41 @@ class Seekable(FileLike, ABC):
|
|
|
126
136
|
2 End of stream - `cookie` usually negative.
|
|
127
137
|
|
|
128
138
|
Return the new absolute position.
|
|
129
|
-
|
|
139
|
+
"""
|
|
130
140
|
|
|
131
141
|
|
|
132
142
|
class Readable(FileLike[AnyStr], ABC):
|
|
133
|
-
|
|
134
143
|
def readable(self) -> bool:
|
|
135
|
-
|
|
144
|
+
"""Return True if the file-like object can be read."""
|
|
136
145
|
return True
|
|
137
146
|
|
|
138
147
|
@abstractmethod
|
|
139
148
|
def read(self, size: Optional[int] = None) -> AnyStr:
|
|
140
|
-
|
|
149
|
+
"""Read at most `size` bytes or string, returned as a bytes or string object.
|
|
141
150
|
|
|
142
151
|
If the `size` argument is negative, read until EOF is reached.
|
|
143
152
|
Return an empty bytes or string object at EOF.
|
|
144
|
-
|
|
153
|
+
"""
|
|
145
154
|
|
|
146
155
|
@abstractmethod
|
|
147
156
|
def readline(self, size: Optional[int] = None) -> AnyStr: # pyre-ignore[15]
|
|
148
|
-
|
|
157
|
+
"""Next line from the file, as a bytes or string object.
|
|
149
158
|
|
|
150
|
-
Retain newline. A non-negative `size` argument limits the maximum number of
|
|
159
|
+
Retain newline. A non-negative `size` argument limits the maximum number of
|
|
160
|
+
bytes or string to return (an incomplete line may be returned then).
|
|
151
161
|
Return an empty bytes object at EOF.
|
|
152
|
-
|
|
162
|
+
"""
|
|
153
163
|
|
|
154
|
-
def readlines( # pyre-ignore[15]
|
|
155
|
-
|
|
156
|
-
'''Return a list of lines from the stream.'''
|
|
164
|
+
def readlines(self, hint: Optional[int] = None) -> List[AnyStr]: # pyre-ignore[15]
|
|
165
|
+
"""Return a list of lines from the stream."""
|
|
157
166
|
return self.read(size=hint).splitlines(True) # pyre-ignore[7]
|
|
158
167
|
|
|
159
168
|
def readinto(self, buffer: bytearray) -> int:
|
|
160
|
-
|
|
169
|
+
"""Read bytes into buffer.
|
|
161
170
|
|
|
162
171
|
Returns number of bytes read (0 for EOF), or None if the object
|
|
163
172
|
is set not to block and has no data to read.
|
|
164
|
-
|
|
173
|
+
"""
|
|
165
174
|
if "b" not in self.mode:
|
|
166
175
|
raise OSError("'readinto' only works on binary files")
|
|
167
176
|
|
|
@@ -180,41 +189,40 @@ class Readable(FileLike[AnyStr], ABC):
|
|
|
180
189
|
return self
|
|
181
190
|
|
|
182
191
|
def truncate(self, size: Optional[int] = None) -> int:
|
|
183
|
-
raise OSError(
|
|
192
|
+
raise OSError("not writable")
|
|
184
193
|
|
|
185
194
|
def write(self, data: AnyStr) -> int:
|
|
186
|
-
raise OSError(
|
|
195
|
+
raise OSError("not writable")
|
|
187
196
|
|
|
188
197
|
def writelines(self, lines: Iterable[AnyStr]) -> None: # pyre-ignore[14]
|
|
189
|
-
raise OSError(
|
|
198
|
+
raise OSError("not writable")
|
|
190
199
|
|
|
191
200
|
|
|
192
201
|
class Writable(FileLike[AnyStr], ABC):
|
|
193
|
-
|
|
194
202
|
def writable(self) -> bool:
|
|
195
|
-
|
|
203
|
+
"""Return True if the file-like object can be written."""
|
|
196
204
|
return True
|
|
197
205
|
|
|
198
206
|
@abstractmethod
|
|
199
207
|
def write(self, data: AnyStr) -> int:
|
|
200
|
-
|
|
208
|
+
"""Write bytes or string to file.
|
|
201
209
|
|
|
202
210
|
Return the number of bytes or string written.
|
|
203
|
-
|
|
211
|
+
"""
|
|
204
212
|
|
|
205
213
|
def writelines(self, lines: Iterable[AnyStr]) -> None: # pyre-ignore[14]
|
|
206
|
-
|
|
214
|
+
"""Write `lines` to the file.
|
|
207
215
|
|
|
208
|
-
Note that newlines are not added.
|
|
209
|
-
`lines` can be any iterable object producing bytes-like or string-like objects.
|
|
216
|
+
Note that newlines are not added.
|
|
217
|
+
`lines` can be any iterable object producing bytes-like or string-like objects.
|
|
210
218
|
This is equivalent to calling write() for each element.
|
|
211
|
-
|
|
219
|
+
"""
|
|
212
220
|
for line in lines:
|
|
213
221
|
self.write(line)
|
|
214
222
|
|
|
215
223
|
def truncate(self, size: Optional[int] = None) -> int:
|
|
216
224
|
"""
|
|
217
|
-
Resize the stream to the given size in bytes.
|
|
225
|
+
Resize the stream to the given size in bytes.
|
|
218
226
|
|
|
219
227
|
:param size: resize size, defaults to None
|
|
220
228
|
:type size: int, optional
|
|
@@ -223,21 +231,19 @@ class Writable(FileLike[AnyStr], ABC):
|
|
|
223
231
|
:return: The new file size.
|
|
224
232
|
:rtype: int
|
|
225
233
|
"""
|
|
226
|
-
raise UnsupportedOperation(
|
|
234
|
+
raise UnsupportedOperation("not support truncate")
|
|
227
235
|
|
|
228
236
|
def read(self, size: Optional[int] = None) -> AnyStr:
|
|
229
|
-
raise OSError(
|
|
237
|
+
raise OSError("not readable")
|
|
230
238
|
|
|
231
239
|
def readline(self, size: Optional[int] = None) -> AnyStr: # pyre-ignore[15]
|
|
232
|
-
raise OSError(
|
|
240
|
+
raise OSError("not readable")
|
|
233
241
|
|
|
234
|
-
def readlines( # pyre-ignore[15]
|
|
235
|
-
|
|
236
|
-
raise OSError('not readable')
|
|
242
|
+
def readlines(self, hint: Optional[int] = None) -> List[AnyStr]: # pyre-ignore[15]
|
|
243
|
+
raise OSError("not readable")
|
|
237
244
|
|
|
238
245
|
|
|
239
246
|
class FileCacher(ABC):
|
|
240
|
-
|
|
241
247
|
@property
|
|
242
248
|
@abstractmethod
|
|
243
249
|
def cache_path(self) -> str:
|
|
@@ -245,21 +251,21 @@ class FileCacher(ABC):
|
|
|
245
251
|
|
|
246
252
|
@property
|
|
247
253
|
def closed(self) -> bool:
|
|
248
|
-
|
|
249
|
-
return getattr(self,
|
|
254
|
+
"""Return True if the file-like object is closed."""
|
|
255
|
+
return getattr(self, "__closed__", False)
|
|
250
256
|
|
|
251
257
|
@abstractmethod
|
|
252
258
|
def _close(self) -> None:
|
|
253
259
|
pass # pragma: no cover
|
|
254
260
|
|
|
255
261
|
def close(self) -> None:
|
|
256
|
-
|
|
262
|
+
"""Flush and close the file-like object.
|
|
257
263
|
|
|
258
264
|
This method has no effect if the file is already closed.
|
|
259
|
-
|
|
260
|
-
if not getattr(self,
|
|
265
|
+
"""
|
|
266
|
+
if not getattr(self, "__closed__", False):
|
|
261
267
|
self._close()
|
|
262
|
-
setattr(self,
|
|
268
|
+
setattr(self, "__closed__", True)
|
|
263
269
|
|
|
264
270
|
def __enter__(self) -> str:
|
|
265
271
|
return self.cache_path
|
|
@@ -282,7 +288,6 @@ class NullCacher(FileCacher):
|
|
|
282
288
|
|
|
283
289
|
|
|
284
290
|
class ContextIterator(Closable):
|
|
285
|
-
|
|
286
291
|
def __init__(self, iterable: Iterable) -> None:
|
|
287
292
|
self._iter = iter(iterable)
|
|
288
293
|
|
|
@@ -8,15 +8,22 @@ from math import ceil
|
|
|
8
8
|
from statistics import mean
|
|
9
9
|
from typing import Optional
|
|
10
10
|
|
|
11
|
-
from megfile.config import
|
|
11
|
+
from megfile.config import (
|
|
12
|
+
BACKOFF_FACTOR,
|
|
13
|
+
BACKOFF_INITIAL,
|
|
14
|
+
DEFAULT_BLOCK_CAPACITY,
|
|
15
|
+
DEFAULT_BLOCK_SIZE,
|
|
16
|
+
DEFAULT_MAX_RETRY_TIMES,
|
|
17
|
+
GLOBAL_MAX_WORKERS,
|
|
18
|
+
NEWLINE,
|
|
19
|
+
)
|
|
12
20
|
from megfile.interfaces import Readable, Seekable
|
|
13
|
-
from megfile.utils import get_human_size, process_local
|
|
21
|
+
from megfile.utils import ProcessLocal, get_human_size, process_local
|
|
14
22
|
|
|
15
23
|
_logger = get_logger(__name__)
|
|
16
24
|
|
|
17
25
|
|
|
18
26
|
class SeekRecord:
|
|
19
|
-
|
|
20
27
|
def __init__(self, seek_index: int):
|
|
21
28
|
self.seek_index = seek_index
|
|
22
29
|
self.seek_count = 0
|
|
@@ -24,25 +31,25 @@ class SeekRecord:
|
|
|
24
31
|
|
|
25
32
|
|
|
26
33
|
class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
27
|
-
|
|
28
|
-
Reader to fast read the remote file content.
|
|
29
|
-
This will divide the file content into equal parts of block_size size,
|
|
34
|
+
"""
|
|
35
|
+
Reader to fast read the remote file content.
|
|
36
|
+
This will divide the file content into equal parts of block_size size,
|
|
30
37
|
and will use LRU to cache at most block_capacity blocks in memory.
|
|
31
|
-
open(), seek() and read() will trigger prefetch read.
|
|
32
|
-
The prefetch will cached block_forward blocks of data from offset position
|
|
38
|
+
open(), seek() and read() will trigger prefetch read.
|
|
39
|
+
The prefetch will cached block_forward blocks of data from offset position
|
|
33
40
|
(the position after reading if the called function is read).
|
|
34
|
-
|
|
41
|
+
"""
|
|
35
42
|
|
|
36
43
|
def __init__(
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
44
|
+
self,
|
|
45
|
+
*,
|
|
46
|
+
block_size: int = DEFAULT_BLOCK_SIZE,
|
|
47
|
+
block_capacity: int = DEFAULT_BLOCK_CAPACITY,
|
|
48
|
+
block_forward: Optional[int] = None,
|
|
49
|
+
max_retries: int = DEFAULT_MAX_RETRY_TIMES,
|
|
50
|
+
max_workers: Optional[int] = None,
|
|
51
|
+
**kwargs,
|
|
52
|
+
):
|
|
46
53
|
self._is_auto_scaling = block_forward is None
|
|
47
54
|
if block_forward is None:
|
|
48
55
|
block_forward = max(block_capacity - 1, 1)
|
|
@@ -50,9 +57,10 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
50
57
|
if block_capacity <= block_forward:
|
|
51
58
|
# TODO: replace AssertionError with ValueError in 4.0.0
|
|
52
59
|
raise AssertionError(
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
(block_capacity, block_forward)
|
|
60
|
+
"block_capacity should greater than block_forward, "
|
|
61
|
+
"got: block_capacity=%s, block_forward=%s"
|
|
62
|
+
% (block_capacity, block_forward)
|
|
63
|
+
)
|
|
56
64
|
|
|
57
65
|
# user maybe put block_size with 'numpy.uint64' type
|
|
58
66
|
block_size = int(block_size)
|
|
@@ -60,9 +68,12 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
60
68
|
self._max_retries = max_retries
|
|
61
69
|
self._block_size = block_size
|
|
62
70
|
self._block_capacity = block_capacity # Max number of blocks
|
|
63
|
-
self._block_forward = block_forward # Number of blocks every prefetch, which should be smaller than block_capacity
|
|
64
71
|
|
|
65
|
-
|
|
72
|
+
# Number of blocks every prefetch, which should be smaller than block_capacity
|
|
73
|
+
self._block_forward = block_forward
|
|
74
|
+
|
|
75
|
+
self._process_local = ProcessLocal()
|
|
76
|
+
|
|
66
77
|
self._content_size = self._get_content_size()
|
|
67
78
|
self._block_stop = ceil(self._content_size / block_size)
|
|
68
79
|
|
|
@@ -75,20 +86,25 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
75
86
|
self._is_global_executor = False
|
|
76
87
|
if max_workers is None:
|
|
77
88
|
self._executor = process_local(
|
|
78
|
-
f
|
|
89
|
+
f"{self.__class__.__name__}.executor",
|
|
79
90
|
ThreadPoolExecutor,
|
|
80
|
-
max_workers=GLOBAL_MAX_WORKERS
|
|
91
|
+
max_workers=GLOBAL_MAX_WORKERS,
|
|
92
|
+
)
|
|
81
93
|
self._is_global_executor = True
|
|
82
94
|
else:
|
|
83
95
|
self._executor = ThreadPoolExecutor(max_workers=max_workers)
|
|
84
96
|
self._seek_buffer(0)
|
|
85
97
|
|
|
86
|
-
_logger.debug(
|
|
98
|
+
_logger.debug("open file: %r, mode: %s" % (self.name, self.mode))
|
|
87
99
|
|
|
88
100
|
@abstractmethod
|
|
89
101
|
def _get_content_size(self):
|
|
90
102
|
pass
|
|
91
103
|
|
|
104
|
+
@property
|
|
105
|
+
def _futures(self):
|
|
106
|
+
return self._process_local("futures", self._get_futures)
|
|
107
|
+
|
|
92
108
|
def _get_futures(self):
|
|
93
109
|
return LRUCacheFutureManager()
|
|
94
110
|
|
|
@@ -99,7 +115,7 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
99
115
|
|
|
100
116
|
@property
|
|
101
117
|
def mode(self) -> str:
|
|
102
|
-
return
|
|
118
|
+
return "rb"
|
|
103
119
|
|
|
104
120
|
def tell(self) -> int:
|
|
105
121
|
return self._offset
|
|
@@ -112,15 +128,15 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
112
128
|
def _offset(self, value: int):
|
|
113
129
|
if value > self._backoff_size:
|
|
114
130
|
_logger.debug(
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
131
|
+
"reading file: %r, current offset / total size: %s / %s"
|
|
132
|
+
% (self.name, get_human_size(value), get_human_size(self._content_size))
|
|
133
|
+
)
|
|
118
134
|
while value > self._backoff_size:
|
|
119
135
|
self._backoff_size *= BACKOFF_FACTOR
|
|
120
136
|
self.__offset = value
|
|
121
137
|
|
|
122
138
|
def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
|
|
123
|
-
|
|
139
|
+
"""Change stream position.
|
|
124
140
|
|
|
125
141
|
Seek to byte offset pos relative to position indicated by whence:
|
|
126
142
|
|
|
@@ -129,10 +145,10 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
129
145
|
2 End of stream - pos usually negative.
|
|
130
146
|
|
|
131
147
|
Returns the new absolute position.
|
|
132
|
-
|
|
148
|
+
"""
|
|
133
149
|
offset = int(offset) # user maybe put offset with 'numpy.uint64' type
|
|
134
150
|
if self.closed:
|
|
135
|
-
raise IOError(
|
|
151
|
+
raise IOError("file already closed: %r" % self.name)
|
|
136
152
|
if whence == os.SEEK_CUR:
|
|
137
153
|
target_offset = self._offset + offset
|
|
138
154
|
elif whence == os.SEEK_END:
|
|
@@ -140,7 +156,7 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
140
156
|
elif whence == os.SEEK_SET:
|
|
141
157
|
target_offset = offset
|
|
142
158
|
else:
|
|
143
|
-
raise ValueError(
|
|
159
|
+
raise ValueError("invalid whence: %r" % whence)
|
|
144
160
|
|
|
145
161
|
if target_offset == self._offset:
|
|
146
162
|
return target_offset
|
|
@@ -152,19 +168,19 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
152
168
|
return self._offset
|
|
153
169
|
|
|
154
170
|
def read(self, size: Optional[int] = None) -> bytes:
|
|
155
|
-
|
|
171
|
+
"""Read at most size bytes, returned as a bytes object.
|
|
156
172
|
|
|
157
173
|
If the size argument is negative, read until EOF is reached.
|
|
158
174
|
Return an empty bytes object at EOF.
|
|
159
|
-
|
|
175
|
+
"""
|
|
160
176
|
if self.closed:
|
|
161
|
-
raise IOError(
|
|
177
|
+
raise IOError("file already closed: %r" % self.name)
|
|
162
178
|
|
|
163
179
|
if len(self._seek_history) > 0:
|
|
164
180
|
self._seek_history[-1].read_count += 1
|
|
165
181
|
|
|
166
182
|
if self._offset >= self._content_size:
|
|
167
|
-
return b
|
|
183
|
+
return b""
|
|
168
184
|
|
|
169
185
|
if size is None or size < 0:
|
|
170
186
|
size = self._content_size - self._offset
|
|
@@ -174,12 +190,12 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
174
190
|
if self._block_forward == 1:
|
|
175
191
|
block_index = self._offset // self._block_size
|
|
176
192
|
if len(self._seek_history) > 0:
|
|
177
|
-
mean_read_count = mean(
|
|
178
|
-
item.read_count for item in self._seek_history)
|
|
193
|
+
mean_read_count = mean(item.read_count for item in self._seek_history)
|
|
179
194
|
else:
|
|
180
195
|
mean_read_count = 0
|
|
181
196
|
if block_index not in self._futures and mean_read_count < 3:
|
|
182
|
-
# No using LRP will be better if read() are always called less than 3
|
|
197
|
+
# No using LRP will be better if read() are always called less than 3
|
|
198
|
+
# times after seek()
|
|
183
199
|
return self._read(size)
|
|
184
200
|
|
|
185
201
|
data = self._buffer.read(size)
|
|
@@ -198,20 +214,20 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
198
214
|
return buffer.getvalue()
|
|
199
215
|
|
|
200
216
|
def readline(self, size: Optional[int] = None) -> bytes:
|
|
201
|
-
|
|
217
|
+
"""Next line from the file, as a bytes object.
|
|
202
218
|
|
|
203
219
|
Retain newline. A non-negative size argument limits the maximum
|
|
204
220
|
number of bytes to return (an incomplete line may be returned then).
|
|
205
221
|
If the size argument is negative, read until EOF is reached.
|
|
206
222
|
Return an empty bytes object at EOF.
|
|
207
|
-
|
|
223
|
+
"""
|
|
208
224
|
if self.closed:
|
|
209
|
-
raise IOError(
|
|
225
|
+
raise IOError("file already closed: %r" % self.name)
|
|
210
226
|
|
|
211
227
|
if len(self._seek_history) > 0:
|
|
212
228
|
self._seek_history[-1].read_count += 1
|
|
213
229
|
if self._offset >= self._content_size:
|
|
214
|
-
return b
|
|
230
|
+
return b""
|
|
215
231
|
|
|
216
232
|
if size is None or size < 0:
|
|
217
233
|
size = self._content_size - self._offset
|
|
@@ -237,21 +253,22 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
237
253
|
|
|
238
254
|
def _read(self, size: int) -> bytes:
|
|
239
255
|
if size == 0 or self._offset >= self._content_size:
|
|
240
|
-
return b
|
|
256
|
+
return b""
|
|
241
257
|
|
|
242
|
-
data = self._fetch_response(
|
|
243
|
-
|
|
258
|
+
data = self._fetch_response(start=self._offset, end=self._offset + size - 1)[
|
|
259
|
+
"Body"
|
|
260
|
+
].read()
|
|
244
261
|
self.seek(size, os.SEEK_CUR)
|
|
245
262
|
return data
|
|
246
263
|
|
|
247
264
|
def readinto(self, buffer: bytearray) -> int:
|
|
248
|
-
|
|
265
|
+
"""Read bytes into buffer.
|
|
249
266
|
|
|
250
267
|
Returns number of bytes read (0 for EOF), or None if the object
|
|
251
268
|
is set not to block and has no data to read.
|
|
252
|
-
|
|
269
|
+
"""
|
|
253
270
|
if self.closed:
|
|
254
|
-
raise IOError(
|
|
271
|
+
raise IOError("file already closed: %r" % self.name)
|
|
255
272
|
|
|
256
273
|
if self._offset >= self._content_size:
|
|
257
274
|
return 0
|
|
@@ -260,7 +277,7 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
260
277
|
size = min(size, self._content_size - self._offset)
|
|
261
278
|
|
|
262
279
|
data = self._buffer.read(size)
|
|
263
|
-
buffer[:len(data)] = data
|
|
280
|
+
buffer[: len(data)] = data
|
|
264
281
|
if len(data) == size:
|
|
265
282
|
self._offset += len(data)
|
|
266
283
|
return size
|
|
@@ -269,7 +286,7 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
269
286
|
while offset < size:
|
|
270
287
|
remain_size = size - offset
|
|
271
288
|
data = self._next_buffer.read(remain_size)
|
|
272
|
-
buffer[offset:offset + len(data)] = data
|
|
289
|
+
buffer[offset : offset + len(data)] = data
|
|
273
290
|
offset += len(data)
|
|
274
291
|
|
|
275
292
|
self._offset += offset
|
|
@@ -306,8 +323,11 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
306
323
|
|
|
307
324
|
@property
|
|
308
325
|
def _next_buffer(self) -> BytesIO:
|
|
309
|
-
# Get next buffer by this function when finished reading current buffer
|
|
310
|
-
#
|
|
326
|
+
# Get next buffer by this function when finished reading current buffer
|
|
327
|
+
# (self._buffer)
|
|
328
|
+
#
|
|
329
|
+
# Make sure that _buffer is used before using _next_buffer(), or will make
|
|
330
|
+
# _cached_offset invalid
|
|
311
331
|
self._block_index += 1
|
|
312
332
|
self._cached_offset = 0
|
|
313
333
|
return self._buffer
|
|
@@ -319,7 +339,8 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
319
339
|
history = []
|
|
320
340
|
for item in self._seek_history:
|
|
321
341
|
if item.seek_count > self._block_capacity * 2:
|
|
322
|
-
# seek interval is bigger than self._block_capacity * 2, drop it
|
|
342
|
+
# seek interval is bigger than self._block_capacity * 2, drop it
|
|
343
|
+
# from self._seek_history
|
|
323
344
|
continue
|
|
324
345
|
if index - 1 < item.seek_index < index + 2:
|
|
325
346
|
continue
|
|
@@ -328,23 +349,22 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
328
349
|
history.append(SeekRecord(index))
|
|
329
350
|
self._seek_history = history
|
|
330
351
|
self._block_forward = max(
|
|
331
|
-
(self._block_capacity - 1) // len(self._seek_history), 1
|
|
352
|
+
(self._block_capacity - 1) // len(self._seek_history), 1
|
|
353
|
+
)
|
|
332
354
|
|
|
333
355
|
self._cached_offset = offset
|
|
334
356
|
self._block_index = index
|
|
335
357
|
|
|
336
358
|
@abstractmethod
|
|
337
359
|
def _fetch_response(
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
end: Optional[int] = None) -> dict:
|
|
360
|
+
self, start: Optional[int] = None, end: Optional[int] = None
|
|
361
|
+
) -> dict:
|
|
341
362
|
pass
|
|
342
363
|
|
|
343
364
|
def _fetch_buffer(self, index: int) -> BytesIO:
|
|
344
|
-
start, end = index * self._block_size, (
|
|
345
|
-
index + 1) * self._block_size - 1
|
|
365
|
+
start, end = index * self._block_size, (index + 1) * self._block_size - 1
|
|
346
366
|
response = self._fetch_response(start=start, end=end)
|
|
347
|
-
return response[
|
|
367
|
+
return response["Body"]
|
|
348
368
|
|
|
349
369
|
def _submit_future(self, index: int):
|
|
350
370
|
if index < 0 or index >= self._block_stop:
|
|
@@ -361,7 +381,7 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
361
381
|
self._futures.cleanup(self._block_capacity)
|
|
362
382
|
|
|
363
383
|
def _close(self):
|
|
364
|
-
_logger.debug(
|
|
384
|
+
_logger.debug("close file: %r" % self.name)
|
|
365
385
|
|
|
366
386
|
if not self._is_global_executor:
|
|
367
387
|
self._executor.shutdown()
|
|
@@ -369,7 +389,6 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
369
389
|
|
|
370
390
|
|
|
371
391
|
class LRUCacheFutureManager(OrderedDict):
|
|
372
|
-
|
|
373
392
|
def __init__(self):
|
|
374
393
|
super().__init__()
|
|
375
394
|
|