megfile 3.0.6.post1__py3-none-any.whl → 3.1.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/conf.py +67 -0
- megfile/cli.py +16 -16
- megfile/config.py +37 -6
- megfile/errors.py +26 -20
- megfile/fs.py +13 -8
- megfile/fs_path.py +69 -49
- megfile/hdfs.py +13 -8
- megfile/hdfs_path.py +49 -41
- megfile/http.py +1 -1
- megfile/http_path.py +35 -28
- megfile/interfaces.py +119 -48
- megfile/lib/base_prefetch_reader.py +9 -8
- megfile/lib/combine_reader.py +7 -7
- megfile/lib/fnmatch.py +2 -2
- megfile/lib/glob.py +3 -3
- megfile/lib/hdfs_prefetch_reader.py +2 -1
- megfile/lib/http_prefetch_reader.py +3 -2
- megfile/lib/lazy_handler.py +6 -5
- megfile/lib/s3_buffered_writer.py +8 -7
- megfile/lib/s3_cached_handler.py +3 -4
- megfile/lib/s3_limited_seekable_writer.py +5 -3
- megfile/lib/s3_memory_handler.py +10 -6
- megfile/lib/s3_pipe_handler.py +1 -1
- megfile/lib/s3_prefetch_reader.py +7 -5
- megfile/lib/s3_share_cache_reader.py +2 -2
- megfile/lib/shadow_handler.py +5 -5
- megfile/lib/stdio_handler.py +3 -3
- megfile/pathlike.py +156 -170
- megfile/s3.py +19 -13
- megfile/s3_path.py +98 -83
- megfile/sftp.py +25 -16
- megfile/sftp_path.py +109 -94
- megfile/smart.py +38 -28
- megfile/smart_path.py +6 -6
- megfile/stdio.py +3 -3
- megfile/stdio_path.py +5 -5
- megfile/utils/__init__.py +8 -27
- megfile/version.py +1 -1
- {megfile-3.0.6.post1.dist-info → megfile-3.1.0.post1.dist-info}/METADATA +4 -5
- megfile-3.1.0.post1.dist-info/RECORD +55 -0
- {megfile-3.0.6.post1.dist-info → megfile-3.1.0.post1.dist-info}/WHEEL +1 -1
- megfile-3.1.0.post1.dist-info/top_level.txt +7 -0
- scripts/convert_results_to_sarif.py +124 -0
- scripts/generate_file.py +268 -0
- megfile-3.0.6.post1.dist-info/RECORD +0 -52
- megfile-3.0.6.post1.dist-info/top_level.txt +0 -1
- {megfile-3.0.6.post1.dist-info → megfile-3.1.0.post1.dist-info}/LICENSE +0 -0
- {megfile-3.0.6.post1.dist-info → megfile-3.1.0.post1.dist-info}/LICENSE.pyre +0 -0
- {megfile-3.0.6.post1.dist-info → megfile-3.1.0.post1.dist-info}/entry_points.txt +0 -0
megfile/lib/s3_cached_handler.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from
|
|
3
|
-
from typing import Iterable, List, Optional
|
|
2
|
+
from typing import Optional
|
|
4
3
|
|
|
5
4
|
from megfile.errors import translate_fs_error, translate_s3_error
|
|
6
5
|
from megfile.lib.s3_memory_handler import S3MemoryHandler
|
|
@@ -36,13 +35,13 @@ class S3CachedHandler(S3MemoryHandler):
|
|
|
36
35
|
self._download_fileobj()
|
|
37
36
|
|
|
38
37
|
if remove_cache_when_open:
|
|
39
|
-
os.unlink(self._cache_path)
|
|
38
|
+
os.unlink(self._cache_path) # pyre-ignore[6]
|
|
40
39
|
|
|
41
40
|
def fileno(self) -> int:
|
|
42
41
|
# allow numpy.array to create a memmaped ndarray
|
|
43
42
|
return self._fileobj.fileno()
|
|
44
43
|
|
|
45
44
|
def _translate_error(self, error: Exception):
|
|
46
|
-
error = translate_fs_error(error, self._cache_path)
|
|
45
|
+
error = translate_fs_error(error, self._cache_path) # pyre-ignore[6]
|
|
47
46
|
error = translate_s3_error(error, self.name)
|
|
48
47
|
return error
|
|
@@ -3,7 +3,7 @@ from io import BytesIO
|
|
|
3
3
|
from logging import getLogger as get_logger
|
|
4
4
|
from typing import Optional
|
|
5
5
|
|
|
6
|
-
from megfile.config import
|
|
6
|
+
from megfile.config import DEFAULT_MAX_BLOCK_SIZE, DEFAULT_MAX_BUFFER_SIZE, DEFAULT_MIN_BLOCK_SIZE
|
|
7
7
|
from megfile.errors import raise_s3_error
|
|
8
8
|
from megfile.interfaces import Seekable
|
|
9
9
|
from megfile.lib.s3_buffered_writer import S3BufferedWriter
|
|
@@ -11,7 +11,7 @@ from megfile.lib.s3_buffered_writer import S3BufferedWriter
|
|
|
11
11
|
_logger = get_logger(__name__)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
class S3LimitedSeekableWriter(
|
|
14
|
+
class S3LimitedSeekableWriter(S3BufferedWriter, Seekable):
|
|
15
15
|
''' For file format like msgpack and mp4, it's a pain that you need to write
|
|
16
16
|
header before writing the data. So it's kind of hard to make streaming write
|
|
17
17
|
to unseekable file system like s3. In this case, we will try to keep the first
|
|
@@ -25,7 +25,7 @@ class S3LimitedSeekableWriter(Seekable, S3BufferedWriter):
|
|
|
25
25
|
key: str,
|
|
26
26
|
*,
|
|
27
27
|
s3_client,
|
|
28
|
-
block_size: int =
|
|
28
|
+
block_size: int = DEFAULT_MIN_BLOCK_SIZE,
|
|
29
29
|
head_block_size: Optional[int] = None,
|
|
30
30
|
tail_block_size: Optional[int] = None,
|
|
31
31
|
max_block_size: int = DEFAULT_MAX_BLOCK_SIZE,
|
|
@@ -69,6 +69,8 @@ class S3LimitedSeekableWriter(Seekable, S3BufferedWriter):
|
|
|
69
69
|
target_offset = self._offset + offset
|
|
70
70
|
elif whence == os.SEEK_END:
|
|
71
71
|
target_offset = self._content_size + offset
|
|
72
|
+
else:
|
|
73
|
+
raise OSError('Unsupported whence value: %d' % whence)
|
|
72
74
|
|
|
73
75
|
if target_offset < self._head_block_size:
|
|
74
76
|
self._head_buffer.seek(target_offset)
|
megfile/lib/s3_memory_handler.py
CHANGED
|
@@ -2,11 +2,11 @@ import os
|
|
|
2
2
|
from io import BytesIO, UnsupportedOperation
|
|
3
3
|
from typing import Iterable, List, Optional
|
|
4
4
|
|
|
5
|
-
from megfile.errors import S3ConfigError, UnknownError, raise_s3_error,
|
|
5
|
+
from megfile.errors import S3ConfigError, UnknownError, raise_s3_error, translate_s3_error
|
|
6
6
|
from megfile.interfaces import Readable, Seekable, Writable
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
class S3MemoryHandler(Readable, Seekable, Writable):
|
|
9
|
+
class S3MemoryHandler(Readable[bytes], Seekable, Writable[bytes]):
|
|
10
10
|
|
|
11
11
|
def __init__(
|
|
12
12
|
self,
|
|
@@ -41,8 +41,8 @@ class S3MemoryHandler(Readable, Seekable, Writable):
|
|
|
41
41
|
def tell(self) -> int:
|
|
42
42
|
return self._fileobj.tell()
|
|
43
43
|
|
|
44
|
-
def seek(self,
|
|
45
|
-
return self._fileobj.seek(
|
|
44
|
+
def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
|
|
45
|
+
return self._fileobj.seek(offset, whence)
|
|
46
46
|
|
|
47
47
|
def readable(self) -> bool:
|
|
48
48
|
return self._mode[0] == 'r' or self._mode[-1] == '+'
|
|
@@ -55,12 +55,16 @@ class S3MemoryHandler(Readable, Seekable, Writable):
|
|
|
55
55
|
def readline(self, size: Optional[int] = None) -> bytes:
|
|
56
56
|
if not self.readable():
|
|
57
57
|
raise UnsupportedOperation('not readable')
|
|
58
|
+
if size is None:
|
|
59
|
+
size = -1
|
|
58
60
|
return self._fileobj.readline(size)
|
|
59
61
|
|
|
60
|
-
def readlines(self) -> List[bytes]:
|
|
62
|
+
def readlines(self, hint: Optional[int] = None) -> List[bytes]:
|
|
61
63
|
if not self.readable():
|
|
62
64
|
raise UnsupportedOperation('not readable')
|
|
63
|
-
|
|
65
|
+
if hint is None:
|
|
66
|
+
hint = -1
|
|
67
|
+
return self._fileobj.readlines(hint)
|
|
64
68
|
|
|
65
69
|
def writable(self) -> bool:
|
|
66
70
|
return self._mode[0] == 'w' or \
|
megfile/lib/s3_pipe_handler.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import os
|
|
2
1
|
from concurrent.futures import Future
|
|
3
2
|
from io import BytesIO
|
|
4
3
|
from typing import Optional
|
|
@@ -42,6 +41,8 @@ class S3PrefetchReader(BasePrefetchReader):
|
|
|
42
41
|
self._key = key
|
|
43
42
|
self._client = s3_client
|
|
44
43
|
self._profile_name = profile_name
|
|
44
|
+
self._content_etag = None
|
|
45
|
+
self._content_info = None
|
|
45
46
|
|
|
46
47
|
super().__init__(
|
|
47
48
|
block_size=block_size,
|
|
@@ -58,7 +59,7 @@ class S3PrefetchReader(BasePrefetchReader):
|
|
|
58
59
|
first_index_response['ContentRange'].split('/')[-1])
|
|
59
60
|
except S3InvalidRangeError:
|
|
60
61
|
# usually when read a empty file
|
|
61
|
-
#
|
|
62
|
+
# can use minio test empty file: https://hub.docker.com/r/minio/minio
|
|
62
63
|
first_index_response = self._fetch_response()
|
|
63
64
|
content_size = int(first_index_response['ContentLength'])
|
|
64
65
|
|
|
@@ -76,7 +77,8 @@ class S3PrefetchReader(BasePrefetchReader):
|
|
|
76
77
|
self._bucket, self._key)
|
|
77
78
|
|
|
78
79
|
def _fetch_response(
|
|
79
|
-
self,
|
|
80
|
+
self,
|
|
81
|
+
start: Optional[int] = None,
|
|
80
82
|
end: Optional[int] = None) -> dict:
|
|
81
83
|
|
|
82
84
|
def fetch_response() -> dict:
|
|
@@ -103,9 +105,9 @@ class S3PrefetchReader(BasePrefetchReader):
|
|
|
103
105
|
index + 1) * self._block_size - 1
|
|
104
106
|
response = self._fetch_response(start=start, end=end)
|
|
105
107
|
etag = response.get('ETag', None)
|
|
106
|
-
if etag is not None and etag != self._content_etag:
|
|
108
|
+
if etag is not None and etag != self._content_etag:
|
|
107
109
|
raise S3FileChangedError(
|
|
108
110
|
'File changed: %r, etag before: %s, after: %s' %
|
|
109
|
-
(self.name, self._content_info, response))
|
|
111
|
+
(self.name, self._content_info, response))
|
|
110
112
|
|
|
111
113
|
return response['Body']
|
|
@@ -44,14 +44,14 @@ class S3ShareCacheReader(S3PrefetchReader):
|
|
|
44
44
|
profile_name=profile_name,
|
|
45
45
|
)
|
|
46
46
|
|
|
47
|
-
def _get_futures(self):
|
|
47
|
+
def _get_futures(self) -> 'ShareCacheFutureManager':
|
|
48
48
|
futures = thread_local(
|
|
49
49
|
'S3ShareCacheReader.' + self._cache_key, ShareCacheFutureManager)
|
|
50
50
|
futures.register(self.name)
|
|
51
51
|
return futures
|
|
52
52
|
|
|
53
53
|
def _seek_buffer(self, index: int, offset: int = 0):
|
|
54
|
-
# The corresponding block is probably not downloaded when
|
|
54
|
+
# The corresponding block is probably not downloaded when sought to a new position
|
|
55
55
|
# So record the offset first, set it when it is accessed
|
|
56
56
|
self._cached_offset = offset
|
|
57
57
|
self._block_index = index
|
megfile/lib/shadow_handler.py
CHANGED
|
@@ -58,18 +58,18 @@ class ShadowHandler(Readable, Seekable, Writable, BaseShadowHandler):
|
|
|
58
58
|
def readable(self) -> bool:
|
|
59
59
|
return is_readable(self._file_object)
|
|
60
60
|
|
|
61
|
-
def read(self, size: Optional[int] = None) -> AnyStr: #
|
|
61
|
+
def read(self, size: Optional[int] = None) -> AnyStr: # pyre-ignore[34]
|
|
62
62
|
with self._ensure_offset():
|
|
63
|
-
return self._file_object.read(size)
|
|
63
|
+
return self._file_object.read(size) # pyre-ignore[6]
|
|
64
64
|
|
|
65
|
-
def readline(self, size: Optional[int] = None) -> AnyStr: #
|
|
65
|
+
def readline(self, size: Optional[int] = None) -> AnyStr: # pyre-ignore[34]
|
|
66
66
|
with self._ensure_offset():
|
|
67
|
-
return self._file_object.readline(size)
|
|
67
|
+
return self._file_object.readline(size) # pyre-ignore[6]
|
|
68
68
|
|
|
69
69
|
def writable(self) -> bool:
|
|
70
70
|
return is_writable(self._file_object)
|
|
71
71
|
|
|
72
|
-
def write(self, data: AnyStr):
|
|
72
|
+
def write(self, data: AnyStr):
|
|
73
73
|
with self._ensure_offset():
|
|
74
74
|
return self._file_object.write(data)
|
|
75
75
|
|
megfile/lib/stdio_handler.py
CHANGED
|
@@ -40,10 +40,10 @@ class STDReader(STDHandler, Readable):
|
|
|
40
40
|
def name(self) -> str:
|
|
41
41
|
return "stdin"
|
|
42
42
|
|
|
43
|
-
def read(self, size: Optional[int] = None) -> AnyStr: #
|
|
43
|
+
def read(self, size: Optional[int] = None) -> AnyStr: # pyre-ignore[34]
|
|
44
44
|
return self._handler.read(size)
|
|
45
45
|
|
|
46
|
-
def readline(self, size: Optional[int] = None) -> AnyStr: #
|
|
46
|
+
def readline(self, size: Optional[int] = None) -> AnyStr: # pyre-ignore[34]
|
|
47
47
|
return self._handler.readline()
|
|
48
48
|
|
|
49
49
|
|
|
@@ -79,5 +79,5 @@ class STDWriter(STDHandler, Writable):
|
|
|
79
79
|
def name(self) -> str:
|
|
80
80
|
return self._name
|
|
81
81
|
|
|
82
|
-
def write(self, data: AnyStr) -> int:
|
|
82
|
+
def write(self, data: AnyStr) -> int:
|
|
83
83
|
return self._handler.write(data)
|