megfile 4.0.4__py3-none-any.whl → 4.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megfile/errors.py +2 -2
- megfile/fs.py +1 -1
- megfile/fs_path.py +18 -11
- megfile/hdfs.py +7 -7
- megfile/hdfs_path.py +39 -25
- megfile/lib/base_prefetch_reader.py +7 -13
- megfile/lib/combine_reader.py +1 -1
- megfile/lib/glob.py +6 -16
- megfile/lib/s3_cached_handler.py +3 -3
- megfile/lib/s3_limited_seekable_writer.py +1 -1
- megfile/lib/s3_memory_handler.py +3 -3
- megfile/lib/s3_pipe_handler.py +4 -4
- megfile/pathlike.py +5 -5
- megfile/s3.py +11 -21
- megfile/s3_path.py +174 -164
- megfile/sftp.py +7 -4
- megfile/sftp_path.py +33 -27
- megfile/smart.py +3 -4
- megfile/stdio.py +2 -1
- megfile/stdio_path.py +1 -0
- megfile/version.py +1 -1
- {megfile-4.0.4.dist-info → megfile-4.1.0.dist-info}/METADATA +2 -2
- {megfile-4.0.4.dist-info → megfile-4.1.0.dist-info}/RECORD +28 -28
- {megfile-4.0.4.dist-info → megfile-4.1.0.dist-info}/WHEEL +1 -1
- {megfile-4.0.4.dist-info → megfile-4.1.0.dist-info}/LICENSE +0 -0
- {megfile-4.0.4.dist-info → megfile-4.1.0.dist-info}/LICENSE.pyre +0 -0
- {megfile-4.0.4.dist-info → megfile-4.1.0.dist-info}/entry_points.txt +0 -0
- {megfile-4.0.4.dist-info → megfile-4.1.0.dist-info}/top_level.txt +0 -0
megfile/errors.py
CHANGED
|
@@ -413,12 +413,12 @@ def translate_http_error(http_error: Exception, http_url: str) -> Exception:
|
|
|
413
413
|
|
|
414
414
|
|
|
415
415
|
@contextmanager
|
|
416
|
-
def raise_s3_error(s3_url: PathLike,
|
|
416
|
+
def raise_s3_error(s3_url: PathLike, suppress_error_callback=None):
|
|
417
417
|
try:
|
|
418
418
|
yield
|
|
419
419
|
except Exception as error:
|
|
420
420
|
error = translate_s3_error(error, s3_url)
|
|
421
|
-
if
|
|
421
|
+
if suppress_error_callback and suppress_error_callback(error):
|
|
422
422
|
return
|
|
423
423
|
raise error
|
|
424
424
|
|
megfile/fs.py
CHANGED
|
@@ -317,7 +317,7 @@ def fs_walk(
|
|
|
317
317
|
return FSPath(path).walk(followlinks)
|
|
318
318
|
|
|
319
319
|
|
|
320
|
-
def fs_getmd5(path: PathLike, recalculate: bool = False, followlinks: bool =
|
|
320
|
+
def fs_getmd5(path: PathLike, recalculate: bool = False, followlinks: bool = False):
|
|
321
321
|
"""
|
|
322
322
|
Calculate the md5 value of the file
|
|
323
323
|
|
megfile/fs_path.py
CHANGED
|
@@ -392,13 +392,15 @@ class FSPath(URIPath):
|
|
|
392
392
|
|
|
393
393
|
def iterdir(self) -> Iterator["FSPath"]:
|
|
394
394
|
"""
|
|
395
|
-
Get all contents of given fs path.
|
|
396
|
-
The result is in ascending alphabetical order.
|
|
395
|
+
Get all contents of given fs path. The order of result is in arbitrary order.
|
|
397
396
|
|
|
398
|
-
:returns: All contents have in the path
|
|
397
|
+
:returns: All contents have in the path.
|
|
399
398
|
"""
|
|
400
|
-
|
|
401
|
-
|
|
399
|
+
self._check_int_path()
|
|
400
|
+
for path in pathlib.Path(
|
|
401
|
+
self.path_without_protocol # pyre-ignore[6]
|
|
402
|
+
).iterdir():
|
|
403
|
+
yield self.from_path(fspath(path))
|
|
402
404
|
|
|
403
405
|
def load(self) -> BinaryIO:
|
|
404
406
|
"""Read all content on specified path and write into memory
|
|
@@ -568,7 +570,7 @@ class FSPath(URIPath):
|
|
|
568
570
|
"No match any file in: %r" % self.path_without_protocol
|
|
569
571
|
)
|
|
570
572
|
|
|
571
|
-
def scandir(self) ->
|
|
573
|
+
def scandir(self) -> ContextIterator:
|
|
572
574
|
"""
|
|
573
575
|
Get all content of given file path.
|
|
574
576
|
|
|
@@ -704,7 +706,7 @@ class FSPath(URIPath):
|
|
|
704
706
|
)
|
|
705
707
|
)
|
|
706
708
|
|
|
707
|
-
def md5(self, recalculate: bool = False, followlinks: bool =
|
|
709
|
+
def md5(self, recalculate: bool = False, followlinks: bool = False):
|
|
708
710
|
"""
|
|
709
711
|
Calculate the md5 value of the file
|
|
710
712
|
|
|
@@ -713,11 +715,11 @@ class FSPath(URIPath):
|
|
|
713
715
|
|
|
714
716
|
returns: md5 of file
|
|
715
717
|
"""
|
|
716
|
-
if
|
|
718
|
+
if self.is_dir():
|
|
717
719
|
hash_md5 = hashlib.md5() # nosec
|
|
718
720
|
for file_name in self.listdir():
|
|
719
721
|
chunk = (
|
|
720
|
-
|
|
722
|
+
self.joinpath(file_name)
|
|
721
723
|
.md5(recalculate=recalculate, followlinks=followlinks)
|
|
722
724
|
.encode()
|
|
723
725
|
)
|
|
@@ -794,8 +796,13 @@ class FSPath(URIPath):
|
|
|
794
796
|
except FileNotFoundError as error:
|
|
795
797
|
# Prevent the dst_path directory from being created when src_path does not
|
|
796
798
|
# exist
|
|
797
|
-
|
|
798
|
-
|
|
799
|
+
dst_parent_dir = os.path.dirname(dst_path)
|
|
800
|
+
if (
|
|
801
|
+
dst_parent_dir
|
|
802
|
+
and dst_parent_dir != "."
|
|
803
|
+
and error.filename in (dst_path, dst_parent_dir)
|
|
804
|
+
):
|
|
805
|
+
self.from_path(dst_parent_dir).mkdir(parents=True, exist_ok=True)
|
|
799
806
|
self._copyfile(dst_path, callback=callback, followlinks=followlinks)
|
|
800
807
|
else:
|
|
801
808
|
raise
|
megfile/hdfs.py
CHANGED
|
@@ -125,7 +125,7 @@ def hdfs_isfile(path: PathLike, followlinks: bool = False) -> bool:
|
|
|
125
125
|
return HdfsPath(path).is_file(followlinks)
|
|
126
126
|
|
|
127
127
|
|
|
128
|
-
def hdfs_listdir(path: PathLike
|
|
128
|
+
def hdfs_listdir(path: PathLike) -> List[str]:
|
|
129
129
|
"""
|
|
130
130
|
Get all contents of given path.
|
|
131
131
|
|
|
@@ -133,10 +133,10 @@ def hdfs_listdir(path: PathLike, followlinks: bool = False) -> List[str]:
|
|
|
133
133
|
:returns: All contents have prefix of path.
|
|
134
134
|
:raises: FileNotFoundError, NotADirectoryError
|
|
135
135
|
"""
|
|
136
|
-
return HdfsPath(path).listdir(
|
|
136
|
+
return HdfsPath(path).listdir()
|
|
137
137
|
|
|
138
138
|
|
|
139
|
-
def hdfs_load_from(path: PathLike
|
|
139
|
+
def hdfs_load_from(path: PathLike) -> BinaryIO:
|
|
140
140
|
"""Read all content in binary on specified path and write into memory
|
|
141
141
|
|
|
142
142
|
User should close the BinaryIO manually
|
|
@@ -144,7 +144,7 @@ def hdfs_load_from(path: PathLike, followlinks: bool = False) -> BinaryIO:
|
|
|
144
144
|
:param path: Given path
|
|
145
145
|
:returns: BinaryIO
|
|
146
146
|
"""
|
|
147
|
-
return HdfsPath(path).load(
|
|
147
|
+
return HdfsPath(path).load()
|
|
148
148
|
|
|
149
149
|
|
|
150
150
|
def hdfs_move(src_path: PathLike, dst_path: PathLike, overwrite: bool = True) -> None:
|
|
@@ -209,15 +209,15 @@ def hdfs_scan_stat(
|
|
|
209
209
|
return HdfsPath(path).scan_stat(missing_ok, followlinks)
|
|
210
210
|
|
|
211
211
|
|
|
212
|
-
def hdfs_scandir(path: PathLike
|
|
212
|
+
def hdfs_scandir(path: PathLike) -> Iterator[FileEntry]:
|
|
213
213
|
"""
|
|
214
|
-
Get all contents of given path, the order of result is
|
|
214
|
+
Get all contents of given path, the order of result is in arbitrary order.
|
|
215
215
|
|
|
216
216
|
:param path: Given path
|
|
217
217
|
:returns: All contents have prefix of path
|
|
218
218
|
:raises: FileNotFoundError, NotADirectoryError
|
|
219
219
|
"""
|
|
220
|
-
return HdfsPath(path).scandir(
|
|
220
|
+
return HdfsPath(path).scandir()
|
|
221
221
|
|
|
222
222
|
|
|
223
223
|
def hdfs_unlink(path: PathLike, missing_ok: bool = False) -> None:
|
megfile/hdfs_path.py
CHANGED
|
@@ -12,7 +12,7 @@ from megfile.config import (
|
|
|
12
12
|
READER_MAX_BUFFER_SIZE,
|
|
13
13
|
)
|
|
14
14
|
from megfile.errors import _create_missing_ok_generator, raise_hdfs_error
|
|
15
|
-
from megfile.interfaces import FileEntry, PathLike, StatResult, URIPath
|
|
15
|
+
from megfile.interfaces import ContextIterator, FileEntry, PathLike, StatResult, URIPath
|
|
16
16
|
from megfile.lib.compat import fspath
|
|
17
17
|
from megfile.lib.glob import FSFunc, iglob
|
|
18
18
|
from megfile.lib.hdfs_prefetch_reader import HdfsPrefetchReader
|
|
@@ -299,7 +299,12 @@ class HdfsPath(URIPath):
|
|
|
299
299
|
Because hdfs symlink not support dir.
|
|
300
300
|
:returns: True if path is hdfs directory, else False
|
|
301
301
|
"""
|
|
302
|
-
|
|
302
|
+
try:
|
|
303
|
+
stat = self.stat(follow_symlinks=followlinks)
|
|
304
|
+
return stat.is_dir()
|
|
305
|
+
except FileNotFoundError:
|
|
306
|
+
pass
|
|
307
|
+
return False
|
|
303
308
|
|
|
304
309
|
def is_file(self, followlinks: bool = False) -> bool:
|
|
305
310
|
"""
|
|
@@ -307,9 +312,14 @@ class HdfsPath(URIPath):
|
|
|
307
312
|
|
|
308
313
|
:returns: True if path is hdfs file, else False
|
|
309
314
|
"""
|
|
310
|
-
|
|
315
|
+
try:
|
|
316
|
+
stat = self.stat(follow_symlinks=followlinks)
|
|
317
|
+
return stat.is_file()
|
|
318
|
+
except FileNotFoundError:
|
|
319
|
+
pass
|
|
320
|
+
return False
|
|
311
321
|
|
|
312
|
-
def listdir(self
|
|
322
|
+
def listdir(self) -> List[str]:
|
|
313
323
|
"""
|
|
314
324
|
Get all contents of given path.
|
|
315
325
|
|
|
@@ -319,19 +329,19 @@ class HdfsPath(URIPath):
|
|
|
319
329
|
if not self.is_dir():
|
|
320
330
|
raise NotADirectoryError("Not a directory: %r" % self.path)
|
|
321
331
|
with raise_hdfs_error(self.path_with_protocol):
|
|
322
|
-
return self._client.list(self.path_without_protocol)
|
|
332
|
+
return sorted(self._client.list(self.path_without_protocol))
|
|
323
333
|
|
|
324
|
-
def iterdir(self
|
|
334
|
+
def iterdir(self) -> Iterator["HdfsPath"]:
|
|
325
335
|
"""
|
|
326
336
|
Get all contents of given path.
|
|
327
337
|
|
|
328
338
|
:returns: All contents have prefix of path.
|
|
329
339
|
:raises: FileNotFoundError, NotADirectoryError
|
|
330
340
|
"""
|
|
331
|
-
for filename in self.listdir(
|
|
341
|
+
for filename in self.listdir():
|
|
332
342
|
yield self.joinpath(filename)
|
|
333
343
|
|
|
334
|
-
def load(self
|
|
344
|
+
def load(self) -> BinaryIO:
|
|
335
345
|
"""Read all content in binary on specified path and write into memory
|
|
336
346
|
|
|
337
347
|
User should close the BinaryIO manually
|
|
@@ -466,28 +476,32 @@ class HdfsPath(URIPath):
|
|
|
466
476
|
),
|
|
467
477
|
)
|
|
468
478
|
|
|
469
|
-
def scandir(self
|
|
479
|
+
def scandir(self) -> ContextIterator:
|
|
470
480
|
"""
|
|
471
|
-
Get all contents of given path, the order of result is
|
|
481
|
+
Get all contents of given path, the order of result is in arbitrary order.
|
|
472
482
|
|
|
473
483
|
:returns: All contents have prefix of path
|
|
474
484
|
:raises: FileNotFoundError, NotADirectoryError
|
|
475
485
|
"""
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
486
|
+
|
|
487
|
+
def create_generator():
|
|
488
|
+
with raise_hdfs_error(self.path_with_protocol):
|
|
489
|
+
for filename, stat_data in self._client.list(
|
|
490
|
+
self.path_without_protocol, status=True
|
|
491
|
+
):
|
|
492
|
+
yield FileEntry(
|
|
493
|
+
name=filename,
|
|
494
|
+
path=self.joinpath(filename).path_with_protocol,
|
|
495
|
+
stat=StatResult(
|
|
496
|
+
size=stat_data["length"],
|
|
497
|
+
mtime=stat_data["modificationTime"] / 1000,
|
|
498
|
+
isdir=stat_data["type"] == "DIRECTORY",
|
|
499
|
+
islnk=False,
|
|
500
|
+
extra=stat_data,
|
|
501
|
+
),
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
return ContextIterator(create_generator())
|
|
491
505
|
|
|
492
506
|
def unlink(self, missing_ok: bool = False) -> None:
|
|
493
507
|
"""
|
|
@@ -5,7 +5,6 @@ from concurrent.futures import Future, ThreadPoolExecutor
|
|
|
5
5
|
from io import BytesIO
|
|
6
6
|
from logging import getLogger as get_logger
|
|
7
7
|
from math import ceil
|
|
8
|
-
from statistics import mean
|
|
9
8
|
from typing import Optional
|
|
10
9
|
|
|
11
10
|
from megfile.config import (
|
|
@@ -92,7 +91,7 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
92
91
|
|
|
93
92
|
@abstractmethod
|
|
94
93
|
def _get_content_size(self):
|
|
95
|
-
pass
|
|
94
|
+
pass # pragma: no cover
|
|
96
95
|
|
|
97
96
|
@property
|
|
98
97
|
def _futures(self) -> "LRUCacheFutureManager":
|
|
@@ -104,7 +103,7 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
104
103
|
@property
|
|
105
104
|
@abstractmethod
|
|
106
105
|
def name(self) -> str:
|
|
107
|
-
pass
|
|
106
|
+
pass # pragma: no cover
|
|
108
107
|
|
|
109
108
|
@property
|
|
110
109
|
def mode(self) -> str:
|
|
@@ -238,13 +237,7 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
238
237
|
|
|
239
238
|
if self._block_forward == 0:
|
|
240
239
|
block_index = self._offset // self._block_size
|
|
241
|
-
if
|
|
242
|
-
mean_read_count = mean(item.read_count for item in self._seek_history)
|
|
243
|
-
else:
|
|
244
|
-
mean_read_count = 0
|
|
245
|
-
if block_index not in self._futures and mean_read_count < 3:
|
|
246
|
-
# No using LRP will be better if read() are always called less than 3
|
|
247
|
-
# times after seek()
|
|
240
|
+
if block_index not in self._futures:
|
|
248
241
|
buffer[:size] = self._read(size)
|
|
249
242
|
return size
|
|
250
243
|
|
|
@@ -329,8 +322,9 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
329
322
|
history.append(item)
|
|
330
323
|
history.append(SeekRecord(index))
|
|
331
324
|
self._seek_history = history
|
|
332
|
-
self._block_forward =
|
|
333
|
-
self._block_capacity // len(self._seek_history), 0
|
|
325
|
+
self._block_forward = min(
|
|
326
|
+
max(self._block_capacity // len(self._seek_history), 0),
|
|
327
|
+
self._block_capacity - 1,
|
|
334
328
|
)
|
|
335
329
|
if self._block_forward == 0:
|
|
336
330
|
self._is_auto_scaling = False
|
|
@@ -343,7 +337,7 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
343
337
|
def _fetch_response(
|
|
344
338
|
self, start: Optional[int] = None, end: Optional[int] = None
|
|
345
339
|
) -> dict:
|
|
346
|
-
pass
|
|
340
|
+
pass # pragma: no cover
|
|
347
341
|
|
|
348
342
|
def _fetch_buffer(self, index: int) -> BytesIO:
|
|
349
343
|
start, end = index * self._block_size, (index + 1) * self._block_size - 1
|
megfile/lib/combine_reader.py
CHANGED
|
@@ -36,7 +36,7 @@ class CombineReader(Readable, Seekable):
|
|
|
36
36
|
for index, size in enumerate(self._blocks_sizes):
|
|
37
37
|
if self._offset < size:
|
|
38
38
|
return index - 1, self._offset - self._blocks_sizes[index - 1]
|
|
39
|
-
raise IOError("offset out of range: %d" % self._offset)
|
|
39
|
+
raise IOError("offset out of range: %d" % self._offset) # pragma: no cover
|
|
40
40
|
|
|
41
41
|
@property
|
|
42
42
|
def name(self) -> str:
|
megfile/lib/glob.py
CHANGED
|
@@ -5,22 +5,15 @@
|
|
|
5
5
|
import os
|
|
6
6
|
import re
|
|
7
7
|
from collections import OrderedDict
|
|
8
|
-
from
|
|
9
|
-
from typing import Iterator, List, Tuple
|
|
8
|
+
from typing import Callable, Iterator, List, NamedTuple, Tuple
|
|
10
9
|
|
|
11
10
|
from megfile.lib import fnmatch
|
|
12
11
|
|
|
13
|
-
|
|
14
|
-
"""
|
|
12
|
+
|
|
15
13
|
class FSFunc(NamedTuple):
|
|
16
14
|
exists: Callable[[str], bool]
|
|
17
15
|
isdir: Callable[[str], bool]
|
|
18
|
-
scandir: Callable[[str], Iterator[Tuple[str, bool]]]
|
|
19
|
-
|
|
20
|
-
in Python 3.6+
|
|
21
|
-
"""
|
|
22
|
-
|
|
23
|
-
FSFunc = NamedTuple("FSFunc", ["exists", "isdir", "scandir"])
|
|
16
|
+
scandir: Callable[[str], Iterator[Tuple[str, bool]]]
|
|
24
17
|
|
|
25
18
|
|
|
26
19
|
def _exists(path: str) -> bool:
|
|
@@ -72,7 +65,7 @@ def iglob(
|
|
|
72
65
|
if recursive and _isrecursive(pathname):
|
|
73
66
|
s = next(it) # skip empty string
|
|
74
67
|
if s:
|
|
75
|
-
raise OSError("iglob with recursive=True error")
|
|
68
|
+
raise OSError("iglob with recursive=True error") # pragma: no cover
|
|
76
69
|
return it
|
|
77
70
|
|
|
78
71
|
|
|
@@ -161,11 +154,8 @@ def _iterdir(dirname: str, dironly: bool, fs: FSFunc) -> Iterator[str]:
|
|
|
161
154
|
try:
|
|
162
155
|
# dirname may be non-existent, raise OSError
|
|
163
156
|
for name, isdir in fs.scandir(dirname):
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
yield name
|
|
167
|
-
except OSError:
|
|
168
|
-
pass
|
|
157
|
+
if not dironly or isdir:
|
|
158
|
+
yield name
|
|
169
159
|
except OSError:
|
|
170
160
|
return
|
|
171
161
|
|
megfile/lib/s3_cached_handler.py
CHANGED
|
@@ -18,15 +18,15 @@ class S3CachedHandler(S3MemoryHandler):
|
|
|
18
18
|
remove_cache_when_open: bool = True,
|
|
19
19
|
profile_name: Optional[str] = None,
|
|
20
20
|
):
|
|
21
|
-
if mode not in ("rb", "wb", "ab", "rb+", "wb+", "ab+"):
|
|
22
|
-
raise ValueError("unacceptable mode: %r" % mode)
|
|
23
|
-
|
|
24
21
|
self._bucket = bucket
|
|
25
22
|
self._key = key
|
|
26
23
|
self._mode = mode
|
|
27
24
|
self._client = s3_client
|
|
28
25
|
self._profile_name = profile_name
|
|
29
26
|
|
|
27
|
+
if mode not in ("rb", "wb", "ab", "rb+", "wb+", "ab+"):
|
|
28
|
+
raise ValueError("unacceptable mode: %r" % mode)
|
|
29
|
+
|
|
30
30
|
if cache_path is None:
|
|
31
31
|
cache_path = generate_cache_path(self.name)
|
|
32
32
|
|
|
@@ -137,7 +137,7 @@ class S3LimitedSeekableWriter(S3BufferedWriter, Seekable):
|
|
|
137
137
|
def _submit_futures(self):
|
|
138
138
|
content = self._buffer.getvalue()
|
|
139
139
|
if len(content) == 0:
|
|
140
|
-
return
|
|
140
|
+
return # pragma: no cover
|
|
141
141
|
offset = len(content) - self._tail_block_size
|
|
142
142
|
self._buffer = BytesIO(content[offset:])
|
|
143
143
|
self._buffer.seek(0, os.SEEK_END)
|
megfile/lib/s3_memory_handler.py
CHANGED
|
@@ -21,15 +21,15 @@ class S3MemoryHandler(Readable[bytes], Seekable, Writable[bytes]):
|
|
|
21
21
|
s3_client,
|
|
22
22
|
profile_name: Optional[str] = None,
|
|
23
23
|
):
|
|
24
|
-
if mode not in ("rb", "wb", "ab", "rb+", "wb+", "ab+"):
|
|
25
|
-
raise ValueError("unacceptable mode: %r" % mode)
|
|
26
|
-
|
|
27
24
|
self._bucket = bucket
|
|
28
25
|
self._key = key
|
|
29
26
|
self._mode = mode
|
|
30
27
|
self._client = s3_client
|
|
31
28
|
self._profile_name = profile_name
|
|
32
29
|
|
|
30
|
+
if mode not in ("rb", "wb", "ab", "rb+", "wb+", "ab+"):
|
|
31
|
+
raise ValueError("unacceptable mode: %r" % mode)
|
|
32
|
+
|
|
33
33
|
self._fileobj = BytesIO()
|
|
34
34
|
self._download_fileobj()
|
|
35
35
|
|
megfile/lib/s3_pipe_handler.py
CHANGED
|
@@ -34,9 +34,6 @@ class S3PipeHandler(Readable[bytes], Writable[bytes]):
|
|
|
34
34
|
join_thread: bool = True,
|
|
35
35
|
profile_name: Optional[str] = None,
|
|
36
36
|
):
|
|
37
|
-
if mode not in ("rb", "wb"):
|
|
38
|
-
raise ValueError("unacceptable mode: %r" % mode)
|
|
39
|
-
|
|
40
37
|
self._bucket = bucket
|
|
41
38
|
self._key = key
|
|
42
39
|
self._mode = mode
|
|
@@ -45,6 +42,9 @@ class S3PipeHandler(Readable[bytes], Writable[bytes]):
|
|
|
45
42
|
self._offset = 0
|
|
46
43
|
self._profile_name = profile_name
|
|
47
44
|
|
|
45
|
+
if mode not in ("rb", "wb"):
|
|
46
|
+
raise ValueError("unacceptable mode: %r" % mode)
|
|
47
|
+
|
|
48
48
|
self._exc = None
|
|
49
49
|
self._pipe = os.pipe()
|
|
50
50
|
_s3_opened_pipes.append(self._pipe)
|
|
@@ -76,7 +76,7 @@ class S3PipeHandler(Readable[bytes], Writable[bytes]):
|
|
|
76
76
|
try:
|
|
77
77
|
with os.fdopen(self._pipe[1], "wb") as buffer:
|
|
78
78
|
self._client.download_fileobj(self._bucket, self._key, buffer)
|
|
79
|
-
except BrokenPipeError:
|
|
79
|
+
except BrokenPipeError: # pragma: no cover
|
|
80
80
|
if self._fileobj.closed:
|
|
81
81
|
return
|
|
82
82
|
raise
|
megfile/pathlike.py
CHANGED
|
@@ -412,7 +412,7 @@ class BasePath:
|
|
|
412
412
|
raw_suffix = self.suffix
|
|
413
413
|
return self.from_path(path[: len(path) - len(raw_suffix)] + suffix)
|
|
414
414
|
|
|
415
|
-
def relpath(self, start=None):
|
|
415
|
+
def relpath(self, start: Optional[str] = None):
|
|
416
416
|
"""Return the relative path."""
|
|
417
417
|
if start is None:
|
|
418
418
|
raise TypeError("start is required")
|
|
@@ -485,7 +485,7 @@ class BasePath:
|
|
|
485
485
|
"""Return the canonical path of the path."""
|
|
486
486
|
return self.path_with_protocol
|
|
487
487
|
|
|
488
|
-
def resolve(self):
|
|
488
|
+
def resolve(self, strict=False):
|
|
489
489
|
"""Alias of realpath."""
|
|
490
490
|
return self.path_with_protocol
|
|
491
491
|
|
|
@@ -615,18 +615,18 @@ class BasePath:
|
|
|
615
615
|
"""Return the names of the entries in the directory the path points to."""
|
|
616
616
|
raise NotImplementedError('method "listdir" not implemented: %r' % self)
|
|
617
617
|
|
|
618
|
-
def scandir(self)
|
|
618
|
+
def scandir(self):
|
|
619
619
|
"""
|
|
620
620
|
Return an iterator of FileEntry objects corresponding to the entries
|
|
621
621
|
in the directory.
|
|
622
622
|
"""
|
|
623
623
|
raise NotImplementedError('method "scandir" not implemented: %r' % self)
|
|
624
624
|
|
|
625
|
-
def getsize(self, follow_symlinks: bool =
|
|
625
|
+
def getsize(self, follow_symlinks: bool = False) -> int:
|
|
626
626
|
"""Return the size, in bytes."""
|
|
627
627
|
raise NotImplementedError('method "getsize" not implemented: %r' % self)
|
|
628
628
|
|
|
629
|
-
def getmtime(self, follow_symlinks: bool =
|
|
629
|
+
def getmtime(self, follow_symlinks: bool = False) -> float:
|
|
630
630
|
"""Return the time of last modification."""
|
|
631
631
|
raise NotImplementedError('method "getmtime" not implemented: %r' % self)
|
|
632
632
|
|
megfile/s3.py
CHANGED
|
@@ -73,9 +73,7 @@ __all__ = [
|
|
|
73
73
|
]
|
|
74
74
|
|
|
75
75
|
|
|
76
|
-
def s3_access(
|
|
77
|
-
path: PathLike, mode: Access = Access.READ, followlinks: bool = False
|
|
78
|
-
) -> bool:
|
|
76
|
+
def s3_access(path: PathLike, mode: Access = Access.READ) -> bool:
|
|
79
77
|
"""
|
|
80
78
|
Test if path has access permission described by mode
|
|
81
79
|
|
|
@@ -83,7 +81,7 @@ def s3_access(
|
|
|
83
81
|
:param mode: access mode
|
|
84
82
|
:returns: bool, if the bucket of s3_url has read/write access.
|
|
85
83
|
"""
|
|
86
|
-
return S3Path(path).access(mode
|
|
84
|
+
return S3Path(path).access(mode)
|
|
87
85
|
|
|
88
86
|
|
|
89
87
|
def s3_exists(path: PathLike, followlinks: bool = False) -> bool:
|
|
@@ -161,9 +159,7 @@ def s3_isfile(path: PathLike, followlinks: bool = False) -> bool:
|
|
|
161
159
|
return S3Path(path).is_file(followlinks)
|
|
162
160
|
|
|
163
161
|
|
|
164
|
-
def s3_listdir(
|
|
165
|
-
path: PathLike, followlinks: bool = False, missing_ok: bool = True
|
|
166
|
-
) -> List[str]:
|
|
162
|
+
def s3_listdir(path: PathLike) -> List[str]:
|
|
167
163
|
"""
|
|
168
164
|
Get all contents of given s3_url. The result is in ascending alphabetical order.
|
|
169
165
|
|
|
@@ -171,10 +167,10 @@ def s3_listdir(
|
|
|
171
167
|
:returns: All contents have prefix of s3_url in ascending alphabetical order
|
|
172
168
|
:raises: S3FileNotFoundError, S3NotADirectoryError
|
|
173
169
|
"""
|
|
174
|
-
return S3Path(path).listdir(
|
|
170
|
+
return S3Path(path).listdir()
|
|
175
171
|
|
|
176
172
|
|
|
177
|
-
def s3_load_from(path: PathLike
|
|
173
|
+
def s3_load_from(path: PathLike) -> BinaryIO:
|
|
178
174
|
"""Read all content in binary on specified path and write into memory
|
|
179
175
|
|
|
180
176
|
User should close the BinaryIO manually
|
|
@@ -182,7 +178,7 @@ def s3_load_from(path: PathLike, followlinks: bool = False) -> BinaryIO:
|
|
|
182
178
|
:param path: Given path
|
|
183
179
|
:returns: BinaryIO
|
|
184
180
|
"""
|
|
185
|
-
return S3Path(path).load(
|
|
181
|
+
return S3Path(path).load()
|
|
186
182
|
|
|
187
183
|
|
|
188
184
|
def s3_hasbucket(path: PathLike) -> bool:
|
|
@@ -262,17 +258,15 @@ def s3_scan_stat(
|
|
|
262
258
|
return S3Path(path).scan_stat(missing_ok, followlinks)
|
|
263
259
|
|
|
264
260
|
|
|
265
|
-
def s3_scandir(
|
|
266
|
-
path: PathLike, followlinks: bool = False, missing_ok: bool = True
|
|
267
|
-
) -> Iterator[FileEntry]:
|
|
261
|
+
def s3_scandir(path: PathLike) -> Iterator[FileEntry]:
|
|
268
262
|
"""
|
|
269
|
-
Get all contents of given s3_url, the order of result is
|
|
263
|
+
Get all contents of given s3_url, the order of result is in arbitrary order.
|
|
270
264
|
|
|
271
265
|
:param path: Given path
|
|
272
266
|
:returns: All contents have prefix of s3_url
|
|
273
267
|
:raises: S3FileNotFoundError, S3NotADirectoryError
|
|
274
268
|
"""
|
|
275
|
-
return S3Path(path).scandir(
|
|
269
|
+
return S3Path(path).scandir()
|
|
276
270
|
|
|
277
271
|
|
|
278
272
|
def s3_stat(path: PathLike, follow_symlinks=True) -> StatResult:
|
|
@@ -457,7 +451,6 @@ def s3_glob(
|
|
|
457
451
|
path: PathLike,
|
|
458
452
|
recursive: bool = True,
|
|
459
453
|
missing_ok: bool = True,
|
|
460
|
-
followlinks: bool = False,
|
|
461
454
|
) -> List[str]:
|
|
462
455
|
"""Return s3 path list in ascending alphabetical order,
|
|
463
456
|
in which path matches glob pattern
|
|
@@ -476,7 +469,6 @@ def s3_glob(
|
|
|
476
469
|
path=path,
|
|
477
470
|
recursive=recursive,
|
|
478
471
|
missing_ok=missing_ok,
|
|
479
|
-
followlinks=followlinks,
|
|
480
472
|
)
|
|
481
473
|
)
|
|
482
474
|
|
|
@@ -485,7 +477,6 @@ def s3_glob_stat(
|
|
|
485
477
|
path: PathLike,
|
|
486
478
|
recursive: bool = True,
|
|
487
479
|
missing_ok: bool = True,
|
|
488
|
-
followlinks: bool = False,
|
|
489
480
|
) -> Iterator[FileEntry]:
|
|
490
481
|
"""Return a generator contains tuples of path and file stat,
|
|
491
482
|
in ascending alphabetical order, in which path matches glob pattern
|
|
@@ -501,7 +492,7 @@ def s3_glob_stat(
|
|
|
501
492
|
in which paths match `s3_pathname`
|
|
502
493
|
"""
|
|
503
494
|
return S3Path(path).glob_stat(
|
|
504
|
-
pattern="", recursive=recursive, missing_ok=missing_ok
|
|
495
|
+
pattern="", recursive=recursive, missing_ok=missing_ok
|
|
505
496
|
)
|
|
506
497
|
|
|
507
498
|
|
|
@@ -509,7 +500,6 @@ def s3_iglob(
|
|
|
509
500
|
path: PathLike,
|
|
510
501
|
recursive: bool = True,
|
|
511
502
|
missing_ok: bool = True,
|
|
512
|
-
followlinks: bool = False,
|
|
513
503
|
) -> Iterator[str]:
|
|
514
504
|
"""Return s3 path iterator in ascending alphabetical order,
|
|
515
505
|
in which path matches glob pattern
|
|
@@ -524,7 +514,7 @@ def s3_iglob(
|
|
|
524
514
|
:returns: An iterator contains paths match `s3_pathname`
|
|
525
515
|
"""
|
|
526
516
|
for path_obj in S3Path(path).iglob(
|
|
527
|
-
pattern="", recursive=recursive, missing_ok=missing_ok
|
|
517
|
+
pattern="", recursive=recursive, missing_ok=missing_ok
|
|
528
518
|
):
|
|
529
519
|
yield path_obj.path_with_protocol
|
|
530
520
|
|