megfile 3.0.6.post1__py3-none-any.whl → 3.1.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/conf.py +67 -0
- megfile/cli.py +16 -16
- megfile/config.py +37 -6
- megfile/errors.py +26 -20
- megfile/fs.py +13 -8
- megfile/fs_path.py +69 -49
- megfile/hdfs.py +13 -8
- megfile/hdfs_path.py +49 -41
- megfile/http.py +1 -1
- megfile/http_path.py +35 -28
- megfile/interfaces.py +119 -48
- megfile/lib/base_prefetch_reader.py +9 -8
- megfile/lib/combine_reader.py +7 -7
- megfile/lib/fnmatch.py +2 -2
- megfile/lib/glob.py +3 -3
- megfile/lib/hdfs_prefetch_reader.py +2 -1
- megfile/lib/http_prefetch_reader.py +3 -2
- megfile/lib/lazy_handler.py +6 -5
- megfile/lib/s3_buffered_writer.py +8 -7
- megfile/lib/s3_cached_handler.py +3 -4
- megfile/lib/s3_limited_seekable_writer.py +5 -3
- megfile/lib/s3_memory_handler.py +10 -6
- megfile/lib/s3_pipe_handler.py +1 -1
- megfile/lib/s3_prefetch_reader.py +7 -5
- megfile/lib/s3_share_cache_reader.py +2 -2
- megfile/lib/shadow_handler.py +5 -5
- megfile/lib/stdio_handler.py +3 -3
- megfile/pathlike.py +156 -170
- megfile/s3.py +19 -13
- megfile/s3_path.py +98 -83
- megfile/sftp.py +25 -16
- megfile/sftp_path.py +109 -94
- megfile/smart.py +38 -28
- megfile/smart_path.py +6 -6
- megfile/stdio.py +3 -3
- megfile/stdio_path.py +5 -5
- megfile/utils/__init__.py +8 -27
- megfile/version.py +1 -1
- {megfile-3.0.6.post1.dist-info → megfile-3.1.0.post1.dist-info}/METADATA +4 -5
- megfile-3.1.0.post1.dist-info/RECORD +55 -0
- {megfile-3.0.6.post1.dist-info → megfile-3.1.0.post1.dist-info}/WHEEL +1 -1
- megfile-3.1.0.post1.dist-info/top_level.txt +7 -0
- scripts/convert_results_to_sarif.py +124 -0
- scripts/generate_file.py +268 -0
- megfile-3.0.6.post1.dist-info/RECORD +0 -52
- megfile-3.0.6.post1.dist-info/top_level.txt +0 -1
- {megfile-3.0.6.post1.dist-info → megfile-3.1.0.post1.dist-info}/LICENSE +0 -0
- {megfile-3.0.6.post1.dist-info → megfile-3.1.0.post1.dist-info}/LICENSE.pyre +0 -0
- {megfile-3.0.6.post1.dist-info → megfile-3.1.0.post1.dist-info}/entry_points.txt +0 -0
megfile/http.py
CHANGED
megfile/http_path.py
CHANGED
|
@@ -33,8 +33,8 @@ max_retries = HTTP_MAX_RETRY_TIMES
|
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
def get_http_session(
|
|
36
|
-
|
|
37
|
-
|
|
36
|
+
timeout: Optional[Union[int, Tuple[int, int]]] = DEFAULT_TIMEOUT,
|
|
37
|
+
status_forcelist: Iterable[int] = (500, 502, 503, 504)
|
|
38
38
|
) -> requests.Session:
|
|
39
39
|
session = requests.Session()
|
|
40
40
|
|
|
@@ -49,24 +49,24 @@ def get_http_session(
|
|
|
49
49
|
kwargs)
|
|
50
50
|
|
|
51
51
|
def retry_callback(
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
52
|
+
error,
|
|
53
|
+
method,
|
|
54
|
+
url,
|
|
55
|
+
params=None,
|
|
56
|
+
data=None,
|
|
57
|
+
headers=None,
|
|
58
|
+
cookies=None,
|
|
59
|
+
files=None,
|
|
60
|
+
auth=None,
|
|
61
|
+
timeout=None,
|
|
62
|
+
allow_redirects=True,
|
|
63
|
+
proxies=None,
|
|
64
|
+
hooks=None,
|
|
65
|
+
stream=None,
|
|
66
|
+
verify=None,
|
|
67
|
+
cert=None,
|
|
68
|
+
json=None,
|
|
69
|
+
**kwargs,
|
|
70
70
|
):
|
|
71
71
|
if data and hasattr(data, 'seek'):
|
|
72
72
|
data.seek(0)
|
|
@@ -86,7 +86,7 @@ def get_http_session(
|
|
|
86
86
|
return BytesIO(f.read())
|
|
87
87
|
else:
|
|
88
88
|
_logger.warning(
|
|
89
|
-
f'Can not retry http request, because the file object is not seekable and
|
|
89
|
+
f'Can not retry http request, because the file object is not seekable and not support "name"'
|
|
90
90
|
)
|
|
91
91
|
raise
|
|
92
92
|
|
|
@@ -243,8 +243,8 @@ class HttpPath(URIPath):
|
|
|
243
243
|
block_forward=block_forward,
|
|
244
244
|
block_size=block_size,
|
|
245
245
|
)
|
|
246
|
-
if _is_pickle(reader):
|
|
247
|
-
reader = BufferedReader(reader) #
|
|
246
|
+
if _is_pickle(reader):
|
|
247
|
+
reader = BufferedReader(reader) # type: ignore
|
|
248
248
|
return reader
|
|
249
249
|
|
|
250
250
|
response.raw.name = self.path_with_protocol
|
|
@@ -252,7 +252,7 @@ class HttpPath(URIPath):
|
|
|
252
252
|
# response.raw.auto_close = False
|
|
253
253
|
# response.raw.decode_content = True
|
|
254
254
|
# return BufferedReader(response.raw)
|
|
255
|
-
return BufferedReader(Response(response.raw)) #
|
|
255
|
+
return BufferedReader(Response(response.raw)) # type: ignore
|
|
256
256
|
|
|
257
257
|
def stat(self, follow_symlinks=True) -> StatResult:
|
|
258
258
|
'''
|
|
@@ -279,15 +279,22 @@ class HttpPath(URIPath):
|
|
|
279
279
|
size = headers.get('Content-Length')
|
|
280
280
|
if size:
|
|
281
281
|
size = int(size)
|
|
282
|
+
else:
|
|
283
|
+
size = 0
|
|
282
284
|
|
|
283
285
|
last_modified = headers.get('Last-Modified')
|
|
284
286
|
if last_modified:
|
|
285
287
|
last_modified = time.mktime(
|
|
286
288
|
time.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z"))
|
|
289
|
+
else:
|
|
290
|
+
last_modified = 0.0
|
|
287
291
|
|
|
288
|
-
return StatResult(
|
|
289
|
-
size=size,
|
|
290
|
-
|
|
292
|
+
return StatResult(
|
|
293
|
+
size=size,
|
|
294
|
+
mtime=last_modified,
|
|
295
|
+
isdir=False,
|
|
296
|
+
islnk=False,
|
|
297
|
+
extra=headers)
|
|
291
298
|
|
|
292
299
|
def getsize(self, follow_symlinks: bool = False) -> int:
|
|
293
300
|
'''
|
|
@@ -342,7 +349,7 @@ class HttpsPath(HttpPath):
|
|
|
342
349
|
protocol = "https"
|
|
343
350
|
|
|
344
351
|
|
|
345
|
-
class Response(Readable):
|
|
352
|
+
class Response(Readable[bytes]):
|
|
346
353
|
|
|
347
354
|
def __init__(self, raw: HTTPResponse) -> None:
|
|
348
355
|
super().__init__()
|
megfile/interfaces.py
CHANGED
|
@@ -1,9 +1,28 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
|
-
from io import UnsupportedOperation
|
|
4
|
-
from typing import
|
|
5
|
-
|
|
6
|
-
from megfile.pathlike import Access, BasePath, BaseURIPath, FileEntry, PathLike, StatResult, URIPath
|
|
3
|
+
from io import IOBase, UnsupportedOperation
|
|
4
|
+
from typing import IO, AnyStr, Iterable, List, Optional
|
|
5
|
+
|
|
6
|
+
from megfile.pathlike import Access, BasePath, BaseURIPath, FileEntry, PathLike, Self, StatResult, URIPath
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"Access",
|
|
10
|
+
"BasePath",
|
|
11
|
+
"BaseURIPath",
|
|
12
|
+
"FileEntry",
|
|
13
|
+
"PathLike",
|
|
14
|
+
"StatResult",
|
|
15
|
+
"URIPath",
|
|
16
|
+
"fullname",
|
|
17
|
+
"Closable",
|
|
18
|
+
"FileLike",
|
|
19
|
+
"Seekable",
|
|
20
|
+
"Readable",
|
|
21
|
+
"Writable",
|
|
22
|
+
"FileCacher",
|
|
23
|
+
"NullCacher",
|
|
24
|
+
"ContextIterator",
|
|
25
|
+
]
|
|
7
26
|
|
|
8
27
|
|
|
9
28
|
def fullname(o):
|
|
@@ -37,52 +56,41 @@ class Closable(ABC):
|
|
|
37
56
|
self._close()
|
|
38
57
|
setattr(self, '__closed__', True)
|
|
39
58
|
|
|
40
|
-
def __enter__(self) ->
|
|
59
|
+
def __enter__(self: Self) -> Self:
|
|
41
60
|
return self
|
|
42
61
|
|
|
43
62
|
def __exit__(self, type, value, traceback) -> None:
|
|
44
63
|
self.close()
|
|
45
64
|
|
|
46
65
|
|
|
47
|
-
class FileLike(Closable, ABC):
|
|
48
|
-
|
|
49
|
-
@property
|
|
50
|
-
@abstractmethod
|
|
51
|
-
def name(self) -> str:
|
|
52
|
-
pass # pragma: no cover
|
|
53
|
-
|
|
54
|
-
@property
|
|
55
|
-
@abstractmethod
|
|
56
|
-
def mode(self) -> str:
|
|
57
|
-
pass # pragma: no cover
|
|
66
|
+
class FileLike(Closable, IOBase, IO[AnyStr], ABC): # pytype: disable=signature-mismatch
|
|
58
67
|
|
|
59
68
|
def fileno(self) -> int:
|
|
60
69
|
raise UnsupportedOperation('not a local file')
|
|
61
70
|
|
|
71
|
+
def isatty(self) -> bool:
|
|
72
|
+
return False
|
|
73
|
+
|
|
62
74
|
def __repr__(self) -> str:
|
|
63
75
|
return '<%s name=%r mode=%r>' % (
|
|
64
76
|
fullname(self), self.name, self.mode) # pragma: no cover
|
|
65
77
|
|
|
66
78
|
def seekable(self) -> bool:
|
|
67
|
-
'''Return True if the file-like object can be
|
|
79
|
+
'''Return True if the file-like object can be sought.'''
|
|
68
80
|
return False
|
|
69
81
|
|
|
70
|
-
def seek(self,
|
|
82
|
+
def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
|
|
71
83
|
'''Change stream position.
|
|
72
84
|
|
|
73
|
-
Seek to byte offset
|
|
74
|
-
0 Start of stream (the default). `
|
|
75
|
-
1 Current position - `
|
|
76
|
-
2 End of stream - `
|
|
85
|
+
Seek to byte `offset` relative to position indicated by `whence`:
|
|
86
|
+
0 Start of stream (the default). `offset` should be >= 0;
|
|
87
|
+
1 Current position - `offset` may be negative;
|
|
88
|
+
2 End of stream - `offset` usually negative.
|
|
77
89
|
|
|
78
90
|
Return the new absolute position.
|
|
79
91
|
'''
|
|
80
92
|
raise UnsupportedOperation('not seekable') # pragma: no cover
|
|
81
93
|
|
|
82
|
-
@abstractmethod
|
|
83
|
-
def tell(self) -> int:
|
|
84
|
-
'''Return current stream position.'''
|
|
85
|
-
|
|
86
94
|
def readable(self) -> bool:
|
|
87
95
|
'''Return True if the file-like object can be read.'''
|
|
88
96
|
return False # pragma: no cover
|
|
@@ -97,15 +105,19 @@ class FileLike(Closable, ABC):
|
|
|
97
105
|
This is not implemented for read-only and non-blocking streams.
|
|
98
106
|
'''
|
|
99
107
|
|
|
108
|
+
def __del__(self) -> None:
|
|
109
|
+
# TODO: Next version should turn on __del__ for auto closing, and disable this in child class like CombineReader
|
|
110
|
+
pass
|
|
111
|
+
|
|
100
112
|
|
|
101
113
|
class Seekable(FileLike, ABC):
|
|
102
114
|
|
|
103
115
|
def seekable(self) -> bool:
|
|
104
|
-
'''Return True if the file-like object can be
|
|
116
|
+
'''Return True if the file-like object can be sought.'''
|
|
105
117
|
return True
|
|
106
118
|
|
|
107
119
|
@abstractmethod
|
|
108
|
-
def seek(self,
|
|
120
|
+
def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
|
|
109
121
|
'''Change stream position.
|
|
110
122
|
|
|
111
123
|
Seek to byte offset `cookie` relative to position indicated by `whence`:
|
|
@@ -117,31 +129,32 @@ class Seekable(FileLike, ABC):
|
|
|
117
129
|
'''
|
|
118
130
|
|
|
119
131
|
|
|
120
|
-
class Readable(FileLike, ABC):
|
|
132
|
+
class Readable(FileLike[AnyStr], ABC):
|
|
121
133
|
|
|
122
134
|
def readable(self) -> bool:
|
|
123
135
|
'''Return True if the file-like object can be read.'''
|
|
124
136
|
return True
|
|
125
137
|
|
|
126
138
|
@abstractmethod
|
|
127
|
-
def read(self, size: Optional[int] = None) ->
|
|
128
|
-
'''Read at most `size` bytes, returned as a bytes object.
|
|
139
|
+
def read(self, size: Optional[int] = None) -> AnyStr:
|
|
140
|
+
'''Read at most `size` bytes or string, returned as a bytes or string object.
|
|
129
141
|
|
|
130
142
|
If the `size` argument is negative, read until EOF is reached.
|
|
131
|
-
Return an empty bytes object at EOF.
|
|
143
|
+
Return an empty bytes or string object at EOF.
|
|
132
144
|
'''
|
|
133
145
|
|
|
134
146
|
@abstractmethod
|
|
135
|
-
def readline(self, size: Optional[int] = None) ->
|
|
136
|
-
'''Next line from the file, as a bytes object.
|
|
147
|
+
def readline(self, size: Optional[int] = None) -> AnyStr: # pyre-ignore[15]
|
|
148
|
+
'''Next line from the file, as a bytes or string object.
|
|
137
149
|
|
|
138
|
-
Retain newline. A non-negative `size` argument limits the maximum number of bytes to return (an incomplete line may be returned then).
|
|
150
|
+
Retain newline. A non-negative `size` argument limits the maximum number of bytes or string to return (an incomplete line may be returned then).
|
|
139
151
|
Return an empty bytes object at EOF.
|
|
140
152
|
'''
|
|
141
153
|
|
|
142
|
-
def readlines(
|
|
154
|
+
def readlines( # pyre-ignore[15]
|
|
155
|
+
self, hint: Optional[int] = None) -> List[AnyStr]:
|
|
143
156
|
'''Return a list of lines from the stream.'''
|
|
144
|
-
return self.read().splitlines(True)
|
|
157
|
+
return self.read(size=hint).splitlines(True) # pyre-ignore[7]
|
|
145
158
|
|
|
146
159
|
def readinto(self, buffer: bytearray) -> int:
|
|
147
160
|
'''Read bytes into buffer.
|
|
@@ -149,53 +162,111 @@ class Readable(FileLike, ABC):
|
|
|
149
162
|
Returns number of bytes read (0 for EOF), or None if the object
|
|
150
163
|
is set not to block and has no data to read.
|
|
151
164
|
'''
|
|
165
|
+
if "b" not in self.mode:
|
|
166
|
+
raise OSError("'readinto' only works on binary files")
|
|
167
|
+
|
|
152
168
|
data = self.read(len(buffer))
|
|
153
169
|
size = len(data)
|
|
154
|
-
buffer[:size] = data
|
|
170
|
+
buffer[:size] = data # pyre-ignore[6]
|
|
155
171
|
return size
|
|
156
172
|
|
|
157
|
-
def __next__(self) ->
|
|
173
|
+
def __next__(self) -> AnyStr: # pyre-ignore[15]
|
|
158
174
|
line = self.readline()
|
|
159
175
|
if not line:
|
|
160
176
|
raise StopIteration
|
|
161
177
|
return line
|
|
162
178
|
|
|
163
|
-
def __iter__(self) ->
|
|
179
|
+
def __iter__(self: Self) -> Self: # pyre-ignore[15]
|
|
164
180
|
return self
|
|
165
181
|
|
|
182
|
+
def truncate(self, size: Optional[int] = None) -> int:
|
|
183
|
+
raise OSError('not writable')
|
|
184
|
+
|
|
185
|
+
def write(self, data: AnyStr) -> int:
|
|
186
|
+
raise OSError('not writable')
|
|
187
|
+
|
|
188
|
+
def writelines(self, lines: Iterable[AnyStr]) -> None: # pyre-ignore[14]
|
|
189
|
+
raise OSError('not writable')
|
|
166
190
|
|
|
167
|
-
|
|
191
|
+
|
|
192
|
+
class Writable(FileLike[AnyStr], ABC):
|
|
168
193
|
|
|
169
194
|
def writable(self) -> bool:
|
|
170
195
|
'''Return True if the file-like object can be written.'''
|
|
171
196
|
return True
|
|
172
197
|
|
|
173
198
|
@abstractmethod
|
|
174
|
-
def write(self, data:
|
|
175
|
-
'''Write bytes to file.
|
|
199
|
+
def write(self, data: AnyStr) -> int:
|
|
200
|
+
'''Write bytes or string to file.
|
|
176
201
|
|
|
177
|
-
Return the number of bytes written.
|
|
202
|
+
Return the number of bytes or string written.
|
|
178
203
|
'''
|
|
179
204
|
|
|
180
|
-
def writelines(self, lines: Iterable[
|
|
205
|
+
def writelines(self, lines: Iterable[AnyStr]) -> None: # pyre-ignore[14]
|
|
181
206
|
'''Write `lines` to the file.
|
|
182
207
|
|
|
183
|
-
Note that newlines are not added.
|
|
208
|
+
Note that newlines are not added.
|
|
209
|
+
`lines` can be any iterable object producing bytes-like or string-like objects.
|
|
210
|
+
This is equivalent to calling write() for each element.
|
|
184
211
|
'''
|
|
185
212
|
for line in lines:
|
|
186
213
|
self.write(line)
|
|
187
214
|
|
|
215
|
+
def truncate(self, size: Optional[int] = None) -> int:
|
|
216
|
+
"""
|
|
217
|
+
Resize the stream to the given size in bytes.
|
|
218
|
+
|
|
219
|
+
:param size: resize size, defaults to None
|
|
220
|
+
:type size: int, optional
|
|
221
|
+
|
|
222
|
+
:raises OSError: When the stream is not support truncate.
|
|
223
|
+
:return: The new file size.
|
|
224
|
+
:rtype: int
|
|
225
|
+
"""
|
|
226
|
+
raise UnsupportedOperation('not support truncate')
|
|
188
227
|
|
|
189
|
-
|
|
228
|
+
def read(self, size: Optional[int] = None) -> AnyStr:
|
|
229
|
+
raise OSError('not readable')
|
|
230
|
+
|
|
231
|
+
def readline(self, size: Optional[int] = None) -> AnyStr: # pyre-ignore[15]
|
|
232
|
+
raise OSError('not readable')
|
|
233
|
+
|
|
234
|
+
def readlines( # pyre-ignore[15]
|
|
235
|
+
self, hint: Optional[int] = None) -> List[AnyStr]:
|
|
236
|
+
raise OSError('not readable')
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
class FileCacher(ABC):
|
|
190
240
|
|
|
191
241
|
@property
|
|
192
242
|
@abstractmethod
|
|
193
243
|
def cache_path(self) -> str:
|
|
194
244
|
pass # pragma: no cover
|
|
195
245
|
|
|
196
|
-
|
|
246
|
+
@property
|
|
247
|
+
def closed(self) -> bool:
|
|
248
|
+
'''Return True if the file-like object is closed.'''
|
|
249
|
+
return getattr(self, '__closed__', False)
|
|
250
|
+
|
|
251
|
+
@abstractmethod
|
|
252
|
+
def _close(self) -> None:
|
|
253
|
+
pass # pragma: no cover
|
|
254
|
+
|
|
255
|
+
def close(self) -> None:
|
|
256
|
+
'''Flush and close the file-like object.
|
|
257
|
+
|
|
258
|
+
This method has no effect if the file is already closed.
|
|
259
|
+
'''
|
|
260
|
+
if not getattr(self, '__closed__', False):
|
|
261
|
+
self._close()
|
|
262
|
+
setattr(self, '__closed__', True)
|
|
263
|
+
|
|
264
|
+
def __enter__(self) -> str:
|
|
197
265
|
return self.cache_path
|
|
198
266
|
|
|
267
|
+
def __exit__(self, type, value, traceback) -> None:
|
|
268
|
+
self.close()
|
|
269
|
+
|
|
199
270
|
def __del__(self):
|
|
200
271
|
self.close()
|
|
201
272
|
|
|
@@ -23,7 +23,7 @@ class SeekRecord:
|
|
|
23
23
|
self.read_count = 0
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
class BasePrefetchReader(Readable, Seekable, ABC):
|
|
26
|
+
class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
27
27
|
'''
|
|
28
28
|
Reader to fast read the remote file content.
|
|
29
29
|
This will divide the file content into equal parts of block_size size,
|
|
@@ -112,7 +112,7 @@ class BasePrefetchReader(Readable, Seekable, ABC):
|
|
|
112
112
|
self._backoff_size *= BACKOFF_FACTOR
|
|
113
113
|
self.__offset = value
|
|
114
114
|
|
|
115
|
-
def seek(self,
|
|
115
|
+
def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
|
|
116
116
|
'''Change stream position.
|
|
117
117
|
|
|
118
118
|
Seek to byte offset pos relative to position indicated by whence:
|
|
@@ -127,11 +127,11 @@ class BasePrefetchReader(Readable, Seekable, ABC):
|
|
|
127
127
|
raise IOError('file already closed: %r' % self.name)
|
|
128
128
|
|
|
129
129
|
if whence == os.SEEK_CUR:
|
|
130
|
-
target_offset = self._offset +
|
|
130
|
+
target_offset = self._offset + offset
|
|
131
131
|
elif whence == os.SEEK_END:
|
|
132
|
-
target_offset = self._content_size +
|
|
132
|
+
target_offset = self._content_size + offset
|
|
133
133
|
elif whence == os.SEEK_SET:
|
|
134
|
-
target_offset =
|
|
134
|
+
target_offset = offset
|
|
135
135
|
else:
|
|
136
136
|
raise ValueError('invalid whence: %r' % whence)
|
|
137
137
|
|
|
@@ -271,7 +271,7 @@ class BasePrefetchReader(Readable, Seekable, ABC):
|
|
|
271
271
|
|
|
272
272
|
@property
|
|
273
273
|
def _is_alive(self):
|
|
274
|
-
return not self._executor._shutdown
|
|
274
|
+
return not self._executor._shutdown
|
|
275
275
|
|
|
276
276
|
@property
|
|
277
277
|
def _is_downloading(self):
|
|
@@ -307,7 +307,7 @@ class BasePrefetchReader(Readable, Seekable, ABC):
|
|
|
307
307
|
return self._buffer
|
|
308
308
|
|
|
309
309
|
def _seek_buffer(self, index: int, offset: int = 0):
|
|
310
|
-
# The corresponding block is probably not downloaded when
|
|
310
|
+
# The corresponding block is probably not downloaded when seek to a new position
|
|
311
311
|
# So record the offset first, set it when it is accessed
|
|
312
312
|
if self._is_auto_scaling: # When user doesn't define forward
|
|
313
313
|
history = []
|
|
@@ -329,7 +329,8 @@ class BasePrefetchReader(Readable, Seekable, ABC):
|
|
|
329
329
|
|
|
330
330
|
@abstractmethod
|
|
331
331
|
def _fetch_response(
|
|
332
|
-
self,
|
|
332
|
+
self,
|
|
333
|
+
start: Optional[int] = None,
|
|
333
334
|
end: Optional[int] = None) -> dict:
|
|
334
335
|
pass
|
|
335
336
|
|
megfile/lib/combine_reader.py
CHANGED
|
@@ -49,17 +49,17 @@ class CombineReader(Readable, Seekable):
|
|
|
49
49
|
def tell(self) -> int:
|
|
50
50
|
return self._offset
|
|
51
51
|
|
|
52
|
-
def _empty_bytes(self) -> AnyStr: #
|
|
52
|
+
def _empty_bytes(self) -> AnyStr: # pyre-ignore[34]
|
|
53
53
|
if 'b' in self._mode:
|
|
54
|
-
return b''
|
|
55
|
-
return ''
|
|
54
|
+
return b'' # pyre-ignore[7]
|
|
55
|
+
return '' # pyre-ignore[7]
|
|
56
56
|
|
|
57
57
|
def _empty_buffer(self) -> Union[BytesIO, StringIO]:
|
|
58
58
|
if 'b' in self._mode:
|
|
59
59
|
return BytesIO()
|
|
60
60
|
return StringIO()
|
|
61
61
|
|
|
62
|
-
def read(self, size: Optional[int] = None) -> AnyStr: #
|
|
62
|
+
def read(self, size: Optional[int] = None) -> AnyStr: # pyre-ignore[34]
|
|
63
63
|
if self._offset >= self._content_size:
|
|
64
64
|
return self._empty_bytes()
|
|
65
65
|
if size is None or size < 0:
|
|
@@ -72,9 +72,9 @@ class CombineReader(Readable, Seekable):
|
|
|
72
72
|
buffer.write(data)
|
|
73
73
|
size -= len(data)
|
|
74
74
|
self._offset += len(data)
|
|
75
|
-
return buffer.getvalue()
|
|
75
|
+
return buffer.getvalue() # pyre-ignore[7]
|
|
76
76
|
|
|
77
|
-
def readline(self, size: Optional[int] = None) -> AnyStr: #
|
|
77
|
+
def readline(self, size: Optional[int] = None) -> AnyStr: # pyre-ignore[34]
|
|
78
78
|
if self._offset >= self._content_size:
|
|
79
79
|
return self._empty_bytes()
|
|
80
80
|
if size is None or size < 0:
|
|
@@ -96,7 +96,7 @@ class CombineReader(Readable, Seekable):
|
|
|
96
96
|
self._offset += len(data)
|
|
97
97
|
if buffer.tell() == size or data[-1] == NEWLINE:
|
|
98
98
|
break
|
|
99
|
-
return buffer.getvalue()
|
|
99
|
+
return buffer.getvalue() # pyre-ignore[7]
|
|
100
100
|
|
|
101
101
|
def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
|
|
102
102
|
if whence == os.SEEK_SET:
|
megfile/lib/fnmatch.py
CHANGED
|
@@ -14,7 +14,6 @@ corresponding to PATTERN. (It does not compile it.)
|
|
|
14
14
|
import functools
|
|
15
15
|
import os
|
|
16
16
|
import re
|
|
17
|
-
import sys
|
|
18
17
|
from typing import Callable, List, Match, Optional
|
|
19
18
|
|
|
20
19
|
|
|
@@ -127,7 +126,8 @@ def translate(pat: str) -> str:
|
|
|
127
126
|
res = res + r'\{'
|
|
128
127
|
else:
|
|
129
128
|
stuff = pat[i:j].replace('\\', r'\\')
|
|
130
|
-
stuff = r'|'.join(
|
|
129
|
+
stuff = r'|'.join(
|
|
130
|
+
map(re.escape, stuff.split(','))) # pyre-ignore[6]
|
|
131
131
|
res = r'%s(%s)' % (res, stuff)
|
|
132
132
|
i = j + 1
|
|
133
133
|
else:
|
megfile/lib/glob.py
CHANGED
|
@@ -5,7 +5,7 @@ import os
|
|
|
5
5
|
import re
|
|
6
6
|
from collections import OrderedDict
|
|
7
7
|
from collections import namedtuple as NamedTuple
|
|
8
|
-
from typing import
|
|
8
|
+
from typing import Iterator, List, Tuple
|
|
9
9
|
|
|
10
10
|
from megfile.lib import fnmatch
|
|
11
11
|
|
|
@@ -131,7 +131,7 @@ def _glob1(dirname: str, pattern: str, dironly: bool, fs: FSFunc) -> List[str]:
|
|
|
131
131
|
names = list(_iterdir(dirname, dironly, fs))
|
|
132
132
|
if not _ishidden(pattern):
|
|
133
133
|
names = (x for x in names if not _ishidden(x))
|
|
134
|
-
return fnmatch.filter(names, pattern)
|
|
134
|
+
return fnmatch.filter(names, pattern) # pyre-ignore[6]
|
|
135
135
|
|
|
136
136
|
|
|
137
137
|
def _glob0(dirname: str, basename: str, dironly: bool, fs: FSFunc) -> List[str]:
|
|
@@ -165,7 +165,7 @@ def _iterdir(dirname: str, dironly: bool, fs: FSFunc) -> Iterator[str]:
|
|
|
165
165
|
for name, isdir in fs.scandir(dirname):
|
|
166
166
|
try:
|
|
167
167
|
if not dironly or isdir:
|
|
168
|
-
yield name
|
|
168
|
+
yield name
|
|
169
169
|
except OSError:
|
|
170
170
|
pass
|
|
171
171
|
except OSError:
|
|
@@ -44,7 +44,8 @@ class HdfsPrefetchReader(BasePrefetchReader):
|
|
|
44
44
|
f"+{self._profile_name}" if self._profile_name else "", self._path)
|
|
45
45
|
|
|
46
46
|
def _fetch_response(
|
|
47
|
-
self,
|
|
47
|
+
self,
|
|
48
|
+
start: Optional[int] = None,
|
|
48
49
|
end: Optional[int] = None) -> dict:
|
|
49
50
|
with raise_hdfs_error(self.name):
|
|
50
51
|
with self._client.read(self._path, offset=start or 0, length=end -
|
|
@@ -58,13 +58,14 @@ class HttpPrefetchReader(BasePrefetchReader):
|
|
|
58
58
|
return fspath(self._url)
|
|
59
59
|
|
|
60
60
|
def _fetch_response(
|
|
61
|
-
self,
|
|
61
|
+
self,
|
|
62
|
+
start: Optional[int] = None,
|
|
62
63
|
end: Optional[int] = None) -> dict:
|
|
63
64
|
|
|
64
65
|
def fetch_response() -> dict:
|
|
65
66
|
request_kwargs = {}
|
|
66
67
|
if hasattr(self._url, 'request_kwargs'):
|
|
67
|
-
request_kwargs = self._url.request_kwargs
|
|
68
|
+
request_kwargs = self._url.request_kwargs # pyre-ignore[16]
|
|
68
69
|
timeout = request_kwargs.pop('timeout', DEFAULT_TIMEOUT)
|
|
69
70
|
stream = request_kwargs.pop('stream', True)
|
|
70
71
|
|
megfile/lib/lazy_handler.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from functools import cached_property
|
|
2
3
|
from typing import AnyStr, Callable, Optional
|
|
3
4
|
|
|
4
5
|
from megfile.interfaces import Readable, Seekable, Writable
|
|
5
|
-
from megfile.utils import
|
|
6
|
+
from megfile.utils import get_content_size
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
class LazyHandler(Readable, Seekable, Writable):
|
|
@@ -24,7 +25,7 @@ class LazyHandler(Readable, Seekable, Writable):
|
|
|
24
25
|
def mode(self) -> str:
|
|
25
26
|
return self._mode
|
|
26
27
|
|
|
27
|
-
@
|
|
28
|
+
@cached_property
|
|
28
29
|
def _file_object(self):
|
|
29
30
|
return self._open_func(self._path, self._mode, **self._options)
|
|
30
31
|
|
|
@@ -41,16 +42,16 @@ class LazyHandler(Readable, Seekable, Writable):
|
|
|
41
42
|
def readable(self) -> bool:
|
|
42
43
|
return self._file_object.readable()
|
|
43
44
|
|
|
44
|
-
def read(self, size: Optional[int] = None) -> AnyStr: #
|
|
45
|
+
def read(self, size: Optional[int] = None) -> AnyStr: # pyre-ignore[34]
|
|
45
46
|
return self._file_object.read(size)
|
|
46
47
|
|
|
47
|
-
def readline(self, size: Optional[int] = None) -> AnyStr: #
|
|
48
|
+
def readline(self, size: Optional[int] = None) -> AnyStr: # pyre-ignore[34]
|
|
48
49
|
return self._file_object.readline(size)
|
|
49
50
|
|
|
50
51
|
def writable(self) -> bool:
|
|
51
52
|
return self._file_object.writable()
|
|
52
53
|
|
|
53
|
-
def write(self, data: AnyStr):
|
|
54
|
+
def write(self, data: AnyStr):
|
|
54
55
|
return self._file_object.write(data)
|
|
55
56
|
|
|
56
57
|
def _close(self):
|
|
@@ -5,7 +5,7 @@ from logging import getLogger as get_logger
|
|
|
5
5
|
from threading import Lock
|
|
6
6
|
from typing import NamedTuple, Optional
|
|
7
7
|
|
|
8
|
-
from megfile.config import BACKOFF_FACTOR, BACKOFF_INITIAL,
|
|
8
|
+
from megfile.config import BACKOFF_FACTOR, BACKOFF_INITIAL, DEFAULT_MAX_BLOCK_SIZE, DEFAULT_MAX_BUFFER_SIZE, DEFAULT_MIN_BLOCK_SIZE, GLOBAL_MAX_WORKERS
|
|
9
9
|
from megfile.errors import raise_s3_error
|
|
10
10
|
from megfile.interfaces import Writable
|
|
11
11
|
from megfile.utils import get_human_size, process_local
|
|
@@ -34,7 +34,7 @@ class PartResult(_PartResult):
|
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
class S3BufferedWriter(Writable):
|
|
37
|
+
class S3BufferedWriter(Writable[bytes]):
|
|
38
38
|
|
|
39
39
|
def __init__(
|
|
40
40
|
self,
|
|
@@ -42,7 +42,7 @@ class S3BufferedWriter(Writable):
|
|
|
42
42
|
key: str,
|
|
43
43
|
*,
|
|
44
44
|
s3_client,
|
|
45
|
-
block_size: int =
|
|
45
|
+
block_size: int = DEFAULT_MIN_BLOCK_SIZE,
|
|
46
46
|
max_block_size: int = DEFAULT_MAX_BLOCK_SIZE,
|
|
47
47
|
max_buffer_size: int = DEFAULT_MAX_BUFFER_SIZE,
|
|
48
48
|
max_workers: Optional[int] = None,
|
|
@@ -137,10 +137,11 @@ class S3BufferedWriter(Writable):
|
|
|
137
137
|
@property
|
|
138
138
|
def _multipart_upload(self):
|
|
139
139
|
return {
|
|
140
|
-
'Parts':
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
140
|
+
'Parts':
|
|
141
|
+
[
|
|
142
|
+
future.result().asdict()
|
|
143
|
+
for _, future in sorted(self._futures.items())
|
|
144
|
+
],
|
|
144
145
|
}
|
|
145
146
|
|
|
146
147
|
def _upload_buffer(self, part_number, content):
|