megfile 2.2.7__py3-none-any.whl → 2.2.8.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megfile/__init__.py +26 -0
- megfile/cli.py +57 -5
- megfile/errors.py +25 -0
- megfile/fs.py +2 -12
- megfile/fs_path.py +11 -8
- megfile/hdfs.py +269 -0
- megfile/hdfs_path.py +630 -0
- megfile/http_path.py +61 -2
- megfile/lib/hdfs_prefetch_reader.py +51 -0
- megfile/lib/hdfs_tools.py +21 -0
- megfile/pathlike.py +4 -0
- megfile/s3.py +2 -7
- megfile/s3_path.py +17 -5
- megfile/sftp.py +2 -12
- megfile/sftp_path.py +12 -8
- megfile/version.py +1 -1
- {megfile-2.2.7.dist-info → megfile-2.2.8.post1.dist-info}/METADATA +41 -87
- {megfile-2.2.7.dist-info → megfile-2.2.8.post1.dist-info}/RECORD +23 -19
- {megfile-2.2.7.dist-info → megfile-2.2.8.post1.dist-info}/WHEEL +1 -1
- {megfile-2.2.7.dist-info → megfile-2.2.8.post1.dist-info}/LICENSE +0 -0
- {megfile-2.2.7.dist-info → megfile-2.2.8.post1.dist-info}/LICENSE.pyre +0 -0
- {megfile-2.2.7.dist-info → megfile-2.2.8.post1.dist-info}/entry_points.txt +0 -0
- {megfile-2.2.7.dist-info → megfile-2.2.8.post1.dist-info}/top_level.txt +0 -0
megfile/http_path.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
import io
|
|
1
2
|
import time
|
|
2
3
|
from functools import partial
|
|
3
4
|
from io import BufferedReader
|
|
4
5
|
from logging import getLogger as get_logger
|
|
5
|
-
from typing import Iterable, Optional, Union
|
|
6
|
+
from typing import Iterable, Iterator, Optional, Union
|
|
6
7
|
|
|
7
8
|
import requests
|
|
8
9
|
|
|
@@ -31,7 +32,8 @@ max_retries = 10
|
|
|
31
32
|
|
|
32
33
|
def get_http_session(
|
|
33
34
|
timeout: int = 10,
|
|
34
|
-
status_forcelist: Iterable[int] = (502, 503,
|
|
35
|
+
status_forcelist: Iterable[int] = (500, 502, 503,
|
|
36
|
+
504)) -> requests.Session:
|
|
35
37
|
session = requests.Session()
|
|
36
38
|
|
|
37
39
|
def after_callback(response, *args, **kwargs):
|
|
@@ -44,12 +46,69 @@ def get_http_session(
|
|
|
44
46
|
'send http request: %s %r, with parameters: %s', method, url,
|
|
45
47
|
kwargs)
|
|
46
48
|
|
|
49
|
+
def retry_callback(
|
|
50
|
+
error,
|
|
51
|
+
method,
|
|
52
|
+
url,
|
|
53
|
+
params=None,
|
|
54
|
+
data=None,
|
|
55
|
+
headers=None,
|
|
56
|
+
cookies=None,
|
|
57
|
+
files=None,
|
|
58
|
+
auth=None,
|
|
59
|
+
timeout=None,
|
|
60
|
+
allow_redirects=True,
|
|
61
|
+
proxies=None,
|
|
62
|
+
hooks=None,
|
|
63
|
+
stream=None,
|
|
64
|
+
verify=None,
|
|
65
|
+
cert=None,
|
|
66
|
+
json=None,
|
|
67
|
+
**kwargs,
|
|
68
|
+
):
|
|
69
|
+
if data and hasattr(data, 'seek'):
|
|
70
|
+
data.seek(0)
|
|
71
|
+
elif isinstance(data, Iterator):
|
|
72
|
+
_logger.warning(f'Can not retry http request with iterator data')
|
|
73
|
+
raise
|
|
74
|
+
if files:
|
|
75
|
+
|
|
76
|
+
def seek_or_reopen(file_object):
|
|
77
|
+
if isinstance(file_object, (str, bytes)):
|
|
78
|
+
return file_object
|
|
79
|
+
elif hasattr(file_object, 'seek'):
|
|
80
|
+
file_object.seek(0)
|
|
81
|
+
return file_object
|
|
82
|
+
elif hasattr(file_object, 'name'):
|
|
83
|
+
with SmartPath(file_object.name).open('rb') as f:
|
|
84
|
+
return io.BytesIO(f.read())
|
|
85
|
+
else:
|
|
86
|
+
_logger.warning(
|
|
87
|
+
f'Can not retry http request, because the file object is not seekable and unsupport "name"'
|
|
88
|
+
)
|
|
89
|
+
raise
|
|
90
|
+
|
|
91
|
+
for key, file_info in files.items():
|
|
92
|
+
if hasattr(file_info, 'seek'):
|
|
93
|
+
file_info.seek(0)
|
|
94
|
+
elif isinstance(file_info,
|
|
95
|
+
(tuple, list)) and len(file_info) >= 2:
|
|
96
|
+
file_info = list(file_info)
|
|
97
|
+
if isinstance(file_info[1],
|
|
98
|
+
(tuple, list)) and len(file_info[1]) >= 2:
|
|
99
|
+
file_info[1] = list(file_info[1])
|
|
100
|
+
file_info[1] = seek_or_reopen(file_info[1])
|
|
101
|
+
else:
|
|
102
|
+
file_info[1] = seek_or_reopen(file_info[1])
|
|
103
|
+
files[key] = file_info
|
|
104
|
+
|
|
47
105
|
session.request = patch_method(
|
|
48
106
|
partial(session.request, timeout=timeout),
|
|
49
107
|
max_retries=max_retries,
|
|
50
108
|
should_retry=http_should_retry,
|
|
51
109
|
before_callback=before_callback,
|
|
52
110
|
after_callback=after_callback,
|
|
111
|
+
retry_callback=retry_callback,
|
|
53
112
|
)
|
|
54
113
|
return session
|
|
55
114
|
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from io import BytesIO
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from megfile.errors import raise_hdfs_error
|
|
5
|
+
from megfile.lib.base_prefetch_reader import DEFAULT_BLOCK_CAPACITY, DEFAULT_BLOCK_SIZE, BasePrefetchReader
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class HdfsPrefetchReader(BasePrefetchReader):
|
|
9
|
+
'''
|
|
10
|
+
Reader to fast read the hdfs content. This will divide the file content into equal parts of block_size size, and will use LRU to cache at most block_capacity blocks in memory.
|
|
11
|
+
open(), seek() and read() will trigger prefetch read. The prefetch will cached block_forward blocks of data from offset position (the position after reading if the called function is read).
|
|
12
|
+
'''
|
|
13
|
+
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
hdfs_path: str,
|
|
17
|
+
*,
|
|
18
|
+
client,
|
|
19
|
+
block_size: int = DEFAULT_BLOCK_SIZE,
|
|
20
|
+
block_capacity: int = DEFAULT_BLOCK_CAPACITY,
|
|
21
|
+
block_forward: Optional[int] = None,
|
|
22
|
+
max_retries: int = 10,
|
|
23
|
+
max_workers: Optional[int] = None,
|
|
24
|
+
profile_name: Optional[str] = None):
|
|
25
|
+
self._path = hdfs_path
|
|
26
|
+
self._client = client
|
|
27
|
+
self._profile_name = profile_name
|
|
28
|
+
|
|
29
|
+
super().__init__(
|
|
30
|
+
block_size=block_size,
|
|
31
|
+
block_capacity=block_capacity,
|
|
32
|
+
block_forward=block_forward,
|
|
33
|
+
max_retries=max_retries,
|
|
34
|
+
max_workers=max_workers)
|
|
35
|
+
|
|
36
|
+
def _get_content_size(self):
|
|
37
|
+
with raise_hdfs_error(self._path):
|
|
38
|
+
return self._client.status(self._path)['length']
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def name(self) -> str:
|
|
42
|
+
return 'hdfs%s://%s' % (
|
|
43
|
+
f"+{self._profile_name}" if self._profile_name else "", self._path)
|
|
44
|
+
|
|
45
|
+
def _fetch_response(
|
|
46
|
+
self, start: Optional[int] = None,
|
|
47
|
+
end: Optional[int] = None) -> dict:
|
|
48
|
+
with raise_hdfs_error(self.name):
|
|
49
|
+
with self._client.read(self._path, offset=start or 0, length=end -
|
|
50
|
+
start if start and end else None) as f:
|
|
51
|
+
return {'Body': BytesIO(f.read())}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
try:
|
|
2
|
+
import hdfs as hdfs_api
|
|
3
|
+
except ImportError: # pragma: no cover
|
|
4
|
+
hdfs_api = None
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
'hdfs_api',
|
|
8
|
+
]
|
|
9
|
+
|
|
10
|
+
if hdfs_api:
|
|
11
|
+
_to_error = hdfs_api.client._to_error
|
|
12
|
+
|
|
13
|
+
def _patch_to_error(response):
|
|
14
|
+
try:
|
|
15
|
+
err = _to_error(response)
|
|
16
|
+
except hdfs_api.HdfsError as e:
|
|
17
|
+
err = e
|
|
18
|
+
err.status_code = response.status_code
|
|
19
|
+
return err
|
|
20
|
+
|
|
21
|
+
hdfs_api.client._to_error = _patch_to_error
|
megfile/pathlike.py
CHANGED
|
@@ -864,6 +864,10 @@ class URIPath(BaseURIPath):
|
|
|
864
864
|
"""
|
|
865
865
|
raise NotImplementedError(f"'utime' is unsupported on '{type(self)}'")
|
|
866
866
|
|
|
867
|
+
def lstat(self) -> StatResult:
|
|
868
|
+
'''Like stat() but, if the path points to a symbolic link, return the symbolic link’s information rather than its target’s.'''
|
|
869
|
+
return self.stat(follow_symlinks=False)
|
|
870
|
+
|
|
867
871
|
|
|
868
872
|
class URIPathParents(Sequence):
|
|
869
873
|
|
megfile/s3.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import BinaryIO, Callable, Iterator, List, Optional, Tuple
|
|
2
2
|
|
|
3
3
|
from megfile.interfaces import Access, FileEntry, PathLike, StatResult
|
|
4
|
-
from megfile.s3_path import S3BufferedWriter, S3Cacher, S3LimitedSeekableWriter, S3Path, S3PrefetchReader, S3ShareCacheReader, get_endpoint_url, get_s3_client, get_s3_session, is_s3, parse_s3_url, s3_buffered_open, s3_cached_open, s3_concat, s3_download, s3_glob, s3_glob_stat, s3_iglob, s3_load_content, s3_makedirs, s3_memory_open, s3_open, s3_path_join, s3_pipe_open, s3_prefetch_open, s3_readlink, s3_rename, s3_share_cache_open, s3_upload
|
|
4
|
+
from megfile.s3_path import S3BufferedWriter, S3Cacher, S3LimitedSeekableWriter, S3Path, S3PrefetchReader, S3ShareCacheReader, get_endpoint_url, get_s3_client, get_s3_session, is_s3, parse_s3_url, s3_buffered_open, s3_cached_open, s3_concat, s3_download, s3_glob, s3_glob_stat, s3_iglob, s3_load_content, s3_lstat, s3_makedirs, s3_memory_open, s3_open, s3_path_join, s3_pipe_open, s3_prefetch_open, s3_readlink, s3_rename, s3_share_cache_open, s3_upload
|
|
5
5
|
|
|
6
6
|
__all__ = [
|
|
7
7
|
'parse_s3_url',
|
|
@@ -32,6 +32,7 @@ __all__ = [
|
|
|
32
32
|
's3_rename',
|
|
33
33
|
's3_makedirs',
|
|
34
34
|
's3_concat',
|
|
35
|
+
's3_lstat',
|
|
35
36
|
's3_access',
|
|
36
37
|
's3_exists',
|
|
37
38
|
's3_getmtime',
|
|
@@ -47,7 +48,6 @@ __all__ = [
|
|
|
47
48
|
's3_scan_stat',
|
|
48
49
|
's3_scandir',
|
|
49
50
|
's3_stat',
|
|
50
|
-
's3_lstat',
|
|
51
51
|
's3_unlink',
|
|
52
52
|
's3_walk',
|
|
53
53
|
's3_getmd5',
|
|
@@ -251,11 +251,6 @@ def s3_stat(path: PathLike, follow_symlinks=True) -> StatResult:
|
|
|
251
251
|
return S3Path(path).stat(follow_symlinks)
|
|
252
252
|
|
|
253
253
|
|
|
254
|
-
def s3_lstat(path: PathLike) -> StatResult:
|
|
255
|
-
'''Like Path.stat() but, if the path points to a symbolic link, return the symbolic link’s information rather than its target’s.'''
|
|
256
|
-
return S3Path(path).lstat()
|
|
257
|
-
|
|
258
|
-
|
|
259
254
|
def s3_unlink(path: PathLike, missing_ok: bool = False) -> None:
|
|
260
255
|
'''
|
|
261
256
|
Remove the file on s3
|
megfile/s3_path.py
CHANGED
|
@@ -30,7 +30,7 @@ from megfile.lib.s3_prefetch_reader import DEFAULT_BLOCK_SIZE, S3PrefetchReader
|
|
|
30
30
|
from megfile.lib.s3_share_cache_reader import S3ShareCacheReader
|
|
31
31
|
from megfile.lib.url import get_url_scheme
|
|
32
32
|
from megfile.smart_path import SmartPath
|
|
33
|
-
from megfile.utils import cachedproperty, calculate_md5, generate_cache_path, get_binary_mode, get_content_offset, is_readable, necessary_params, thread_local
|
|
33
|
+
from megfile.utils import cachedproperty, calculate_md5, classproperty, generate_cache_path, get_binary_mode, get_content_offset, is_readable, necessary_params, thread_local
|
|
34
34
|
|
|
35
35
|
__all__ = [
|
|
36
36
|
'S3Path',
|
|
@@ -62,6 +62,7 @@ __all__ = [
|
|
|
62
62
|
's3_rename',
|
|
63
63
|
's3_makedirs',
|
|
64
64
|
's3_concat',
|
|
65
|
+
's3_lstat',
|
|
65
66
|
]
|
|
66
67
|
_logger = get_logger(__name__)
|
|
67
68
|
content_md5_header = 'megfile-content-md5'
|
|
@@ -1246,6 +1247,11 @@ def s3_concat(
|
|
|
1246
1247
|
executor.submit(writer.upload_part_by_paths, index, group)
|
|
1247
1248
|
|
|
1248
1249
|
|
|
1250
|
+
def s3_lstat(path: PathLike) -> StatResult:
|
|
1251
|
+
'''Like Path.stat() but, if the path points to a symbolic link, return the symbolic link’s information rather than its target’s.'''
|
|
1252
|
+
return S3Path(path).lstat()
|
|
1253
|
+
|
|
1254
|
+
|
|
1249
1255
|
@SmartPath.register
|
|
1250
1256
|
class S3Path(URIPath):
|
|
1251
1257
|
|
|
@@ -1283,6 +1289,16 @@ class S3Path(URIPath):
|
|
|
1283
1289
|
path = path[len(protocol_prefix):]
|
|
1284
1290
|
return path
|
|
1285
1291
|
|
|
1292
|
+
@cachedproperty
|
|
1293
|
+
def parts(self) -> Tuple[str]:
|
|
1294
|
+
'''A tuple giving access to the path’s various components'''
|
|
1295
|
+
parts = [f"{self._protocol_with_profile}://"]
|
|
1296
|
+
path = self.path_without_protocol
|
|
1297
|
+
path = path.lstrip('/')
|
|
1298
|
+
if path != '':
|
|
1299
|
+
parts.extend(path.split('/'))
|
|
1300
|
+
return tuple(parts)
|
|
1301
|
+
|
|
1286
1302
|
@cachedproperty
|
|
1287
1303
|
def _client(self):
|
|
1288
1304
|
return get_s3_client(profile_name=self._profile_name)
|
|
@@ -1945,10 +1961,6 @@ class S3Path(URIPath):
|
|
|
1945
1961
|
extra=content)
|
|
1946
1962
|
return stat_record
|
|
1947
1963
|
|
|
1948
|
-
def lstat(self) -> StatResult:
|
|
1949
|
-
'''Like Path.stat() but, if the path points to a symbolic link, return the symbolic link’s information rather than its target’s.'''
|
|
1950
|
-
return self.stat(follow_symlinks=False)
|
|
1951
|
-
|
|
1952
1964
|
def unlink(self, missing_ok: bool = False) -> None:
|
|
1953
1965
|
'''
|
|
1954
1966
|
Remove the file on s3
|
megfile/sftp.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import IO, AnyStr, BinaryIO, Callable, Iterator, List, Optional, Tuple
|
|
2
2
|
|
|
3
3
|
from megfile.interfaces import FileEntry, PathLike, StatResult
|
|
4
|
-
from megfile.sftp_path import SftpPath, is_sftp, sftp_concat, sftp_download, sftp_glob, sftp_glob_stat, sftp_iglob, sftp_path_join, sftp_readlink, sftp_resolve, sftp_upload
|
|
4
|
+
from megfile.sftp_path import SftpPath, is_sftp, sftp_concat, sftp_download, sftp_glob, sftp_glob_stat, sftp_iglob, sftp_lstat, sftp_path_join, sftp_readlink, sftp_resolve, sftp_upload
|
|
5
5
|
|
|
6
6
|
__all__ = [
|
|
7
7
|
'is_sftp',
|
|
@@ -14,6 +14,7 @@ __all__ = [
|
|
|
14
14
|
'sftp_upload',
|
|
15
15
|
'sftp_path_join',
|
|
16
16
|
'sftp_concat',
|
|
17
|
+
'sftp_lstat',
|
|
17
18
|
'sftp_exists',
|
|
18
19
|
'sftp_getmtime',
|
|
19
20
|
'sftp_getsize',
|
|
@@ -30,7 +31,6 @@ __all__ = [
|
|
|
30
31
|
'sftp_scan_stat',
|
|
31
32
|
'sftp_scandir',
|
|
32
33
|
'sftp_stat',
|
|
33
|
-
'sftp_lstat',
|
|
34
34
|
'sftp_unlink',
|
|
35
35
|
'sftp_walk',
|
|
36
36
|
'sftp_getmd5',
|
|
@@ -244,16 +244,6 @@ def sftp_stat(path: PathLike, follow_symlinks=True) -> StatResult:
|
|
|
244
244
|
return SftpPath(path).stat(follow_symlinks)
|
|
245
245
|
|
|
246
246
|
|
|
247
|
-
def sftp_lstat(path: PathLike) -> StatResult:
|
|
248
|
-
'''
|
|
249
|
-
Get StatResult of file on sftp, including file size and mtime, referring to fs_getsize and fs_getmtime
|
|
250
|
-
|
|
251
|
-
:param path: Given path
|
|
252
|
-
:returns: StatResult
|
|
253
|
-
'''
|
|
254
|
-
return SftpPath(path).lstat()
|
|
255
|
-
|
|
256
|
-
|
|
257
247
|
def sftp_unlink(path: PathLike, missing_ok: bool = False) -> None:
|
|
258
248
|
'''
|
|
259
249
|
Remove the file on sftp
|
megfile/sftp_path.py
CHANGED
|
@@ -38,6 +38,7 @@ __all__ = [
|
|
|
38
38
|
'sftp_upload',
|
|
39
39
|
'sftp_path_join',
|
|
40
40
|
'sftp_concat',
|
|
41
|
+
'sftp_lstat',
|
|
41
42
|
]
|
|
42
43
|
|
|
43
44
|
SFTP_USERNAME = "SFTP_USERNAME"
|
|
@@ -213,6 +214,7 @@ def _get_ssh_client(
|
|
|
213
214
|
fcntl.flock(fd, fcntl.LOCK_EX)
|
|
214
215
|
ssh_client.connect(
|
|
215
216
|
hostname=hostname,
|
|
217
|
+
port=port,
|
|
216
218
|
username=username,
|
|
217
219
|
password=password,
|
|
218
220
|
pkey=private_key,
|
|
@@ -528,6 +530,16 @@ def sftp_concat(src_paths: List[PathLike], dst_path: PathLike) -> None:
|
|
|
528
530
|
raise OSError(f'Failed to concat {src_paths} to {dst_path}')
|
|
529
531
|
|
|
530
532
|
|
|
533
|
+
def sftp_lstat(path: PathLike) -> StatResult:
|
|
534
|
+
'''
|
|
535
|
+
Get StatResult of file on sftp, including file size and mtime, referring to fs_getsize and fs_getmtime
|
|
536
|
+
|
|
537
|
+
:param path: Given path
|
|
538
|
+
:returns: StatResult
|
|
539
|
+
'''
|
|
540
|
+
return SftpPath(path).lstat()
|
|
541
|
+
|
|
542
|
+
|
|
531
543
|
@SmartPath.register
|
|
532
544
|
class SftpPath(URIPath):
|
|
533
545
|
"""sftp protocol
|
|
@@ -981,14 +993,6 @@ class SftpPath(URIPath):
|
|
|
981
993
|
result = _make_stat(self._client.lstat(self._real_path))
|
|
982
994
|
return result
|
|
983
995
|
|
|
984
|
-
def lstat(self) -> StatResult:
|
|
985
|
-
'''
|
|
986
|
-
Get StatResult of file on sftp, including file size and mtime, referring to fs_getsize and fs_getmtime
|
|
987
|
-
|
|
988
|
-
:returns: StatResult
|
|
989
|
-
'''
|
|
990
|
-
return self.stat(follow_symlinks=False)
|
|
991
|
-
|
|
992
996
|
def unlink(self, missing_ok: bool = False) -> None:
|
|
993
997
|
'''
|
|
994
998
|
Remove the file on sftp
|
megfile/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
VERSION = "2.2.
|
|
1
|
+
VERSION = "2.2.8.post1"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: megfile
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.8.post1
|
|
4
4
|
Summary: Megvii file operation library
|
|
5
5
|
Home-page: https://github.com/megvii-research/megfile
|
|
6
6
|
Author: megvii
|
|
@@ -24,10 +24,13 @@ Description-Content-Type: text/markdown
|
|
|
24
24
|
License-File: LICENSE
|
|
25
25
|
License-File: LICENSE.pyre
|
|
26
26
|
Requires-Dist: boto3
|
|
27
|
-
Requires-Dist: botocore
|
|
27
|
+
Requires-Dist: botocore >=1.13.0
|
|
28
28
|
Requires-Dist: requests
|
|
29
29
|
Requires-Dist: paramiko
|
|
30
30
|
Requires-Dist: tqdm
|
|
31
|
+
Requires-Dist: pyyaml
|
|
32
|
+
Provides-Extra: hdfs
|
|
33
|
+
Requires-Dist: hdfs ; extra == 'hdfs'
|
|
31
34
|
|
|
32
35
|
megfile - Megvii FILE library
|
|
33
36
|
---
|
|
@@ -53,37 +56,23 @@ megfile - Megvii FILE library
|
|
|
53
56
|
|
|
54
57
|
`megfile`'s advantages are:
|
|
55
58
|
|
|
56
|
-
* `smart_open` can open resources that use various protocols
|
|
57
|
-
* `smart_glob` is available on
|
|
59
|
+
* `smart_open` can open resources that use various protocols. Especially, reader / writer of s3 in `megfile` is implemented with multi-thread, which is faster than known competitors.
|
|
60
|
+
* `smart_glob` is available on majority protocols. And it supports zsh extended pattern syntax of `[]`, e.g. `s3://bucket/video.{mp4,avi}`.
|
|
58
61
|
* All-inclusive functions like `smart_exists` / `smart_stat` / `smart_sync`. If you don't find the functions you want, [submit an issue](https://github.com/megvii-research/megfile/issues).
|
|
59
|
-
* Compatible with `pathlib.Path` interface, referring to `
|
|
62
|
+
* Compatible with `pathlib.Path` interface, referring to `SmartPath` and other protocol classes like `S3Path`.
|
|
60
63
|
|
|
61
|
-
##
|
|
62
|
-
|
|
63
|
-
Here's an example of writing a file to s3 / sftp / fs, syncing to local, reading and finally deleting it.
|
|
64
|
-
|
|
65
|
-
### Path Format
|
|
66
|
-
- local file
|
|
67
|
-
- unix filesystem path
|
|
68
|
-
- examples:
|
|
69
|
-
- `/data/test.txt`
|
|
70
|
-
- `test.txt`
|
|
71
|
-
- 1
|
|
64
|
+
## Support Protocols
|
|
65
|
+
- fs(local filesystem)
|
|
72
66
|
- s3
|
|
73
|
-
- `s3[+profile_name]://bucket/key`
|
|
74
67
|
- sftp
|
|
75
|
-
- `sftp://[username[:password]@]hostname[:port]//absolute_file_path`
|
|
76
|
-
- `sftp://[username[:password]@]hostname[:port]/relative_file_path`
|
|
77
68
|
- http
|
|
78
|
-
- http / https url
|
|
79
|
-
- examples:
|
|
80
|
-
- `http://hostname/test`
|
|
81
|
-
- `https://hostname/test`
|
|
82
69
|
- stdio
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
70
|
+
- hdfs: `pip install 'megfile[hdfs]'`
|
|
71
|
+
|
|
72
|
+
## Quick Start
|
|
73
|
+
|
|
74
|
+
Path string in `megfile` almost is `protocol://path/to/file`, for example `s3://bucketA/key`. But sftp path is a little different, format is `sftp://[username[:password]@]hostname[:port]//file_path`, and relative path is replace `//file_path` to `/file_path`.
|
|
75
|
+
Here's an example of writing a file to s3 / fs, syncing to local, reading and finally deleting it.
|
|
87
76
|
|
|
88
77
|
### Functional Interface
|
|
89
78
|
```python
|
|
@@ -107,15 +96,12 @@ smart_remove('s3://playground/megfile-test')
|
|
|
107
96
|
|
|
108
97
|
# glob files or directories in s3 bucket
|
|
109
98
|
smart_glob('s3://playground/megfile-?.{mp4,avi}')
|
|
110
|
-
|
|
111
|
-
# smart_open also support protocols like http / https
|
|
112
|
-
smart_open('https://www.google.com')
|
|
113
|
-
|
|
114
|
-
# smart_open also support protocols like sftp
|
|
115
|
-
smart_open('sftp://username:password@sftp.server.com:22/path/to/file')
|
|
116
99
|
```
|
|
117
100
|
|
|
118
101
|
### SmartPath Interface
|
|
102
|
+
|
|
103
|
+
`SmartPath` has a similar interface with pathlib.Path.
|
|
104
|
+
|
|
119
105
|
```python
|
|
120
106
|
from megfile.smart_path import SmartPath
|
|
121
107
|
|
|
@@ -170,20 +156,32 @@ pip3 install -r requirements.txt -r requirements-dev.txt
|
|
|
170
156
|
|
|
171
157
|
## Configuration
|
|
172
158
|
|
|
173
|
-
|
|
159
|
+
Using `s3` as an example, the following describes the configuration methods. For more details, please refer to [Configuration](https://megvii-research.github.io/megfile/configuration.html).
|
|
160
|
+
|
|
161
|
+
You can use enviroments and configuration file for configuration, and priority is that environment variables take precedence over configuration file.
|
|
162
|
+
|
|
163
|
+
### Use enviroments
|
|
164
|
+
You can use enviroments to setup authentication credentials for your s3 account:
|
|
165
|
+
- `AWS_ACCESS_KEY_ID`: access key
|
|
166
|
+
- `AWS_SECRET_ACCESS_KEY`: secret key
|
|
167
|
+
- `OSS_ENDPOINT`: endpoint url of s3
|
|
168
|
+
- `AWS_S3_ADDRESSING_STYLE`: addressing style
|
|
169
|
+
|
|
170
|
+
### Use command
|
|
171
|
+
You can update config file with `megfile` command easyly:
|
|
172
|
+
[megfile config s3 [OPTIONS] AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY](https://megvii-research.github.io/megfile/cli.html#megfile-config-s3)
|
|
174
173
|
|
|
175
174
|
```
|
|
176
|
-
$
|
|
177
|
-
AWS Access Key ID [None]: accesskey
|
|
178
|
-
AWS Secret Access Key [None]: secretkey
|
|
179
|
-
Default region name [None]:
|
|
180
|
-
Default output format [None]:
|
|
175
|
+
$ megfile config s3 accesskey secretkey
|
|
181
176
|
|
|
182
|
-
# for aliyun
|
|
183
|
-
$
|
|
184
|
-
|
|
177
|
+
# for aliyun
|
|
178
|
+
$ megfile config s3 accesskey secretkey \
|
|
179
|
+
--addressing-style virtual \
|
|
180
|
+
--endpoint-url http://oss-cn-hangzhou.aliyuncs.com \
|
|
181
|
+
```
|
|
185
182
|
|
|
186
|
-
|
|
183
|
+
You can get the configuration from `~/.aws/credentials`, like:
|
|
184
|
+
```
|
|
187
185
|
[default]
|
|
188
186
|
aws_secret_access_key = accesskey
|
|
189
187
|
aws_access_key_id = secretkey
|
|
@@ -193,50 +191,6 @@ s3 =
|
|
|
193
191
|
endpoint_url = http://oss-cn-hangzhou.aliyuncs.com
|
|
194
192
|
```
|
|
195
193
|
|
|
196
|
-
You also can operate s3 files with different endpoint urls, access keys and secret keys. You can set config for different profiles by environment(`PROFILE_NAME__AWS_ACCESS_KEY_ID`, `PROFILE_NAME__AWS_SECRET_ACCESS_KEY`, `PROFILE_NAME__OSS_ENDPOINT`, `PROFILE_NAME__AWS_S3_ADDRESSING_STYLE`) or `~/.aws/config`. Then you can operate files with path `s3+profile_name://bucket/key`.
|
|
197
|
-
For example:
|
|
198
|
-
```
|
|
199
|
-
# set config with environment
|
|
200
|
-
$ export PROFILE1__AWS_ACCESS_KEY_ID=profile1-accesskey
|
|
201
|
-
$ export PROFILE1__AWS_SECRET_ACCESS_KEY=profile1-secretkey
|
|
202
|
-
$ export PROFILE1__OSS_ENDPOINT=https://profile1.s3.custom.com
|
|
203
|
-
|
|
204
|
-
$ export PROFILE2__AWS_ACCESS_KEY_ID=profile2-accesskey
|
|
205
|
-
$ export PROFILE2__AWS_SECRET_ACCESS_KEY=profile2-secretkey
|
|
206
|
-
$ export PROFILE2__OSS_ENDPOINT=https://profile2.s3.custom.com
|
|
207
|
-
|
|
208
|
-
# set config with file
|
|
209
|
-
$ cat ~/.aws/config
|
|
210
|
-
[profile1]
|
|
211
|
-
aws_secret_access_key = profile1-accesskey
|
|
212
|
-
aws_access_key_id = profile1-secretkey
|
|
213
|
-
s3 =
|
|
214
|
-
endpoint_url = https://profile1.s3.custom.com
|
|
215
|
-
|
|
216
|
-
[profile2]
|
|
217
|
-
aws_secret_access_key = profile2-accesskey
|
|
218
|
-
aws_access_key_id = profile2-secretkey
|
|
219
|
-
s3 =
|
|
220
|
-
addressing_style = virtual
|
|
221
|
-
endpoint_url = https://profile2.s3.custom.com
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
# python
|
|
225
|
-
megfile.smart_copy('s3+profile1://bucket/key', 's3+profile2://bucket/key')
|
|
226
|
-
```
|
|
227
|
-
|
|
228
|
-
sftp support some environments:
|
|
229
|
-
```
|
|
230
|
-
# If you are not set username or password in path, you can set them in environments
|
|
231
|
-
$ export SFTP_USERNAME=user
|
|
232
|
-
$ export SFTP_PASSWORD=user_password
|
|
233
|
-
|
|
234
|
-
# You can also set private key for sftp connection
|
|
235
|
-
$ export SFTP_PRIVATE_KEY_PATH=/home/user/custom_private_key_path # default not use private key
|
|
236
|
-
$ export SFTP_PRIVATE_KEY_TYPE=RSA # default is RSA
|
|
237
|
-
$ export SFTP_PRIVATE_KEY_PASSWORD=private_key_password
|
|
238
|
-
```
|
|
239
|
-
|
|
240
194
|
## How to Contribute
|
|
241
195
|
* We welcome everyone to contribute code to the `megfile` project, but the contributed code needs to meet the following conditions as much as possible:
|
|
242
196
|
|
|
@@ -1,21 +1,23 @@
|
|
|
1
|
-
megfile/__init__.py,sha256=
|
|
2
|
-
megfile/cli.py,sha256=
|
|
3
|
-
megfile/errors.py,sha256=
|
|
4
|
-
megfile/fs.py,sha256=
|
|
5
|
-
megfile/fs_path.py,sha256=
|
|
1
|
+
megfile/__init__.py,sha256=MT8SIXsmEUvtSpd1GHv6e3fFfR1gRnlEdkNNqv3gngo,6534
|
|
2
|
+
megfile/cli.py,sha256=lzIMM13Kqd06H7tudM6Yw2OKxXnLC369cv15ggVtMyc,20002
|
|
3
|
+
megfile/errors.py,sha256=ATLYeKgfppA3cb1lNwzWf0AcKAJksr6KXPglpJa2j2w,13198
|
|
4
|
+
megfile/fs.py,sha256=OfY0z4GSl8fT3mDGdeqP2hWFsd1QJl-h8RkSbg6-M8I,11547
|
|
5
|
+
megfile/fs_path.py,sha256=CyOd_hOFGaYeq8CLSqF3B3ns52GMuugy9b5PD-yK0I4,38789
|
|
6
|
+
megfile/hdfs.py,sha256=aAkHobOO0nDcLoqj0tx_1tvgoLOCooTWuukq0pO-nQA,9156
|
|
7
|
+
megfile/hdfs_path.py,sha256=tgB2-lOqscnLRttyhm6gaD3WE-txYV6bqTH0kHeW2T8,26644
|
|
6
8
|
megfile/http.py,sha256=a3oAuARSSaIU8VMx86Mui0N5Vh-EI0AoHnwxRU5DSMU,2032
|
|
7
|
-
megfile/http_path.py,sha256=
|
|
9
|
+
megfile/http_path.py,sha256=pbMPE5bK7rjoFXBHVVD-O9BtPnFwsK1boHBJg6YERcQ,11420
|
|
8
10
|
megfile/interfaces.py,sha256=h3tWE8hVt5S-HopaMAX6lunPJ97vzhv6jH_2HubcDNc,6219
|
|
9
|
-
megfile/pathlike.py,sha256=
|
|
10
|
-
megfile/s3.py,sha256=
|
|
11
|
-
megfile/s3_path.py,sha256
|
|
12
|
-
megfile/sftp.py,sha256=
|
|
13
|
-
megfile/sftp_path.py,sha256=
|
|
11
|
+
megfile/pathlike.py,sha256=Ere6tMf2nsI7bDsZo0WBzl_2HRrS_4iKOpYp0zZltAU,29487
|
|
12
|
+
megfile/s3.py,sha256=siBZfveWX1TDA4Mp41UvugcG3zlrhl_iPUbixUp1TmI,12352
|
|
13
|
+
megfile/s3_path.py,sha256=-u4g6uQ2JuMuzgvjw4KMmdF3jXAGkRy2kRRgAGcEgQ8,88778
|
|
14
|
+
megfile/sftp.py,sha256=JCkF2v1ZbHuIy_Bg3l85AesjFDimDzx9Gh1gRoMsahc,12524
|
|
15
|
+
megfile/sftp_path.py,sha256=gz0tW3FUSY44kqoEygBMfq-WJR4vxaOuu850j-zZbtI,50576
|
|
14
16
|
megfile/smart.py,sha256=JH5zed90eQchS8GNQ7mbXvif-pNKjPjvnadMazKfQMs,33278
|
|
15
17
|
megfile/smart_path.py,sha256=Y0UFh4J2ccydRY2W-wX2ubaf9zzJx1M2nf-VLBGe4mk,6749
|
|
16
18
|
megfile/stdio.py,sha256=yRhlfUA2DHi3bq-9cXsSlbLCnHvS_zvglO2IYYyPsGc,707
|
|
17
19
|
megfile/stdio_path.py,sha256=eQulTXUwHvUKA-5PKCGfVNiEPkJhG9YtVhtU58OcmoM,2873
|
|
18
|
-
megfile/version.py,sha256=
|
|
20
|
+
megfile/version.py,sha256=9139fPUG0s02fyUGMvGdaft44dmi26CtzBhSkOtlTw0,25
|
|
19
21
|
megfile/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
22
|
megfile/lib/base_prefetch_reader.py,sha256=SjrBffHVgvJnYtr8HNqiOozP9OJRYS37Eu1KQcZu1Z8,13221
|
|
21
23
|
megfile/lib/combine_reader.py,sha256=XFSqEY5A5X5Uf7eQ6AXAzrvNteESSXvKNVPktGjo3KY,4546
|
|
@@ -23,6 +25,8 @@ megfile/lib/compare.py,sha256=yG2fZve_gMg32rQVCdwixBdqgYRsjn-24TqhALQaOrA,2233
|
|
|
23
25
|
megfile/lib/compat.py,sha256=rYjfzQ3svuY7pB37W1JGyWH1kxd9aT4RtIe90npPtXI,3033
|
|
24
26
|
megfile/lib/fnmatch.py,sha256=HgdlnEWBsdFUOZqnW_v1kj1jeH_9lMcCqW85pyMu4vM,4054
|
|
25
27
|
megfile/lib/glob.py,sha256=7i9dIput9rI9JIPyTZX-JDmFS7IP_THlX1k-35foAfw,9732
|
|
28
|
+
megfile/lib/hdfs_prefetch_reader.py,sha256=GODZnEGgAOwXAMiIbbeClnpLTHyUMeEncC3VU7jUWo4,2014
|
|
29
|
+
megfile/lib/hdfs_tools.py,sha256=t4GeoBxO0HPahIQDrsK17WBsLZtcfAaNwWfappzZ5q8,442
|
|
26
30
|
megfile/lib/http_prefetch_reader.py,sha256=YDtQXRX-yxyaFzqI_CL3X73-Idkdz1aPIDL29uY77zw,3326
|
|
27
31
|
megfile/lib/joinpath.py,sha256=D4Px6-lnDDpYs1LMUHkTIGqMPJQ0oCBGfTzREs373iU,929
|
|
28
32
|
megfile/lib/lazy_handler.py,sha256=f1rip2_T57vVo0WRNXve2bAa4LArvVheMfQg1S0vFzg,1915
|
|
@@ -38,10 +42,10 @@ megfile/lib/stdio_handler.py,sha256=QDWtcZxz-hzi-rqQUiSlR3NrihX1fjK_Rj9T2mdTFEg,
|
|
|
38
42
|
megfile/lib/url.py,sha256=VbQLjo0s4AaV0iSk66BcjI68aUTcN9zBZ5x6-cM4Qvs,103
|
|
39
43
|
megfile/utils/__init__.py,sha256=qdX8FF_dYFKwp1BIWx3JeSGd91s7AKUDSEpDv9tORcM,9162
|
|
40
44
|
megfile/utils/mutex.py,sha256=-2KH3bNovKRd9zvsXq9n3bWM7rQdoG9hO7tUPxVG_Po,2538
|
|
41
|
-
megfile-2.2.
|
|
42
|
-
megfile-2.2.
|
|
43
|
-
megfile-2.2.
|
|
44
|
-
megfile-2.2.
|
|
45
|
-
megfile-2.2.
|
|
46
|
-
megfile-2.2.
|
|
47
|
-
megfile-2.2.
|
|
45
|
+
megfile-2.2.8.post1.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
|
46
|
+
megfile-2.2.8.post1.dist-info/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
|
|
47
|
+
megfile-2.2.8.post1.dist-info/METADATA,sha256=tEAeR70I3W2r9ZDfy5uxC5tAhG43ghUfmU84qnFggYg,8968
|
|
48
|
+
megfile-2.2.8.post1.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
49
|
+
megfile-2.2.8.post1.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
|
|
50
|
+
megfile-2.2.8.post1.dist-info/top_level.txt,sha256=i3rMgdU1ZAJekAceojhA-bkm3749PzshtRmLTbeLUPQ,8
|
|
51
|
+
megfile-2.2.8.post1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|