megfile 2.2.1.post1__py3-none-any.whl → 2.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megfile/cli.py +30 -5
- megfile/errors.py +6 -2
- megfile/fs.py +8 -2
- megfile/fs_path.py +9 -3
- megfile/pathlike.py +1 -1
- megfile/s3.py +7 -4
- megfile/s3_path.py +69 -28
- megfile/sftp.py +7 -2
- megfile/sftp_path.py +57 -21
- megfile/smart.py +9 -3
- megfile/stdio.py +1 -1
- megfile/stdio_path.py +1 -1
- megfile/version.py +1 -1
- {megfile-2.2.1.post1.dist-info → megfile-2.2.3.dist-info}/METADATA +5 -4
- {megfile-2.2.1.post1.dist-info → megfile-2.2.3.dist-info}/RECORD +20 -20
- {megfile-2.2.1.post1.dist-info → megfile-2.2.3.dist-info}/LICENSE +0 -0
- {megfile-2.2.1.post1.dist-info → megfile-2.2.3.dist-info}/LICENSE.pyre +0 -0
- {megfile-2.2.1.post1.dist-info → megfile-2.2.3.dist-info}/WHEEL +0 -0
- {megfile-2.2.1.post1.dist-info → megfile-2.2.3.dist-info}/entry_points.txt +0 -0
- {megfile-2.2.1.post1.dist-info → megfile-2.2.3.dist-info}/top_level.txt +0 -0
megfile/cli.py
CHANGED
|
@@ -141,10 +141,18 @@ def cp(
|
|
|
141
141
|
2) as executor:
|
|
142
142
|
if progress_bar:
|
|
143
143
|
smart_sync_with_progress(
|
|
144
|
-
src_path,
|
|
144
|
+
src_path,
|
|
145
|
+
dst_path,
|
|
146
|
+
followlinks=True,
|
|
147
|
+
map_func=executor.map,
|
|
148
|
+
force=True)
|
|
145
149
|
else:
|
|
146
150
|
smart_sync(
|
|
147
|
-
src_path,
|
|
151
|
+
src_path,
|
|
152
|
+
dst_path,
|
|
153
|
+
followlinks=True,
|
|
154
|
+
map_func=executor.map,
|
|
155
|
+
force=True)
|
|
148
156
|
else:
|
|
149
157
|
if progress_bar:
|
|
150
158
|
file_size = smart_stat(src_path).size
|
|
@@ -251,7 +259,14 @@ def rm(path: str, recursive: bool):
|
|
|
251
259
|
type=click.INT,
|
|
252
260
|
default=8,
|
|
253
261
|
help='Number of concurrent workers.')
|
|
254
|
-
|
|
262
|
+
@click.option(
|
|
263
|
+
'-f',
|
|
264
|
+
'--force',
|
|
265
|
+
is_flag=True,
|
|
266
|
+
help='Copy files forcely, ignore same files.')
|
|
267
|
+
def sync(
|
|
268
|
+
src_path: str, dst_path: str, progress_bar: bool, worker: int,
|
|
269
|
+
force: bool):
|
|
255
270
|
with ThreadPoolExecutor(max_workers=worker) as executor:
|
|
256
271
|
if has_magic(src_path):
|
|
257
272
|
root_dir = get_non_glob_dir(src_path)
|
|
@@ -278,6 +293,7 @@ def sync(src_path: str, dst_path: str, progress_bar: bool, worker):
|
|
|
278
293
|
callback_after_copy_file=callback_after_copy_file,
|
|
279
294
|
src_file_stats=path_stats,
|
|
280
295
|
map_func=executor.map,
|
|
296
|
+
force=force,
|
|
281
297
|
)
|
|
282
298
|
|
|
283
299
|
tbar.close()
|
|
@@ -288,14 +304,23 @@ def sync(src_path: str, dst_path: str, progress_bar: bool, worker):
|
|
|
288
304
|
dst_path,
|
|
289
305
|
src_file_stats=path_stats,
|
|
290
306
|
map_func=executor.map,
|
|
307
|
+
force=force,
|
|
291
308
|
)
|
|
292
309
|
else:
|
|
293
310
|
if progress_bar:
|
|
294
311
|
smart_sync_with_progress(
|
|
295
|
-
src_path,
|
|
312
|
+
src_path,
|
|
313
|
+
dst_path,
|
|
314
|
+
followlinks=True,
|
|
315
|
+
map_func=executor.map,
|
|
316
|
+
force=force)
|
|
296
317
|
else:
|
|
297
318
|
smart_sync(
|
|
298
|
-
src_path,
|
|
319
|
+
src_path,
|
|
320
|
+
dst_path,
|
|
321
|
+
followlinks=True,
|
|
322
|
+
map_func=executor.map,
|
|
323
|
+
force=force)
|
|
299
324
|
|
|
300
325
|
|
|
301
326
|
@cli.command(short_help="Make the path if it doesn't already exist.")
|
megfile/errors.py
CHANGED
|
@@ -2,7 +2,8 @@ import time
|
|
|
2
2
|
from contextlib import contextmanager
|
|
3
3
|
from functools import wraps
|
|
4
4
|
from logging import getLogger
|
|
5
|
-
from
|
|
5
|
+
from shutil import SameFileError
|
|
6
|
+
from typing import Callable, Optional
|
|
6
7
|
|
|
7
8
|
import botocore.exceptions
|
|
8
9
|
import requests.exceptions
|
|
@@ -27,6 +28,7 @@ __all__ = [
|
|
|
27
28
|
'ProtocolExistsError',
|
|
28
29
|
'ProtocolNotFoundError',
|
|
29
30
|
'S3UnknownError',
|
|
31
|
+
'SameFileError',
|
|
30
32
|
'translate_http_error',
|
|
31
33
|
'translate_s3_error',
|
|
32
34
|
'patch_method',
|
|
@@ -305,7 +307,9 @@ def translate_s3_error(s3_error: Exception, s3_url: PathLike) -> Exception:
|
|
|
305
307
|
elif isinstance(s3_error, ClientError):
|
|
306
308
|
code = client_error_code(s3_error)
|
|
307
309
|
if code in ('NoSuchBucket'):
|
|
308
|
-
return S3BucketNotFoundError(
|
|
310
|
+
return S3BucketNotFoundError(
|
|
311
|
+
'No such bucket: %r' %
|
|
312
|
+
s3_error.response.get('Error', {}).get('BucketName') or s3_url) # pytype: disable=attribute-error
|
|
309
313
|
if code in ('404', 'NoSuchKey'):
|
|
310
314
|
return S3FileNotFoundError('No such file: %r' % s3_url)
|
|
311
315
|
if code in ('401', '403', 'AccessDenied'):
|
megfile/fs.py
CHANGED
|
@@ -345,13 +345,19 @@ def fs_copy(
|
|
|
345
345
|
return FSPath(src_path).copy(dst_path, callback, followlinks)
|
|
346
346
|
|
|
347
347
|
|
|
348
|
-
def fs_sync(
|
|
348
|
+
def fs_sync(
|
|
349
|
+
src_path: PathLike,
|
|
350
|
+
dst_path: PathLike,
|
|
351
|
+
followlinks: bool = False,
|
|
352
|
+
force: bool = False) -> None:
|
|
349
353
|
'''Force write of everything to disk.
|
|
350
354
|
|
|
351
355
|
:param src_path: Given path
|
|
352
356
|
:param dst_path: Target file path
|
|
357
|
+
:param followlinks: False if regard symlink as file, else True
|
|
358
|
+
:param force: Sync file forcely, do not ignore same files
|
|
353
359
|
'''
|
|
354
|
-
return FSPath(src_path).sync(dst_path, followlinks)
|
|
360
|
+
return FSPath(src_path).sync(dst_path, followlinks, force)
|
|
355
361
|
|
|
356
362
|
|
|
357
363
|
def fs_symlink(src_path: PathLike, dst_path: PathLike) -> None:
|
megfile/fs_path.py
CHANGED
|
@@ -765,10 +765,16 @@ class FSPath(URIPath):
|
|
|
765
765
|
else:
|
|
766
766
|
raise
|
|
767
767
|
|
|
768
|
-
def sync(
|
|
768
|
+
def sync(
|
|
769
|
+
self,
|
|
770
|
+
dst_path: PathLike,
|
|
771
|
+
followlinks: bool = False,
|
|
772
|
+
force: bool = False) -> None:
|
|
769
773
|
'''Force write of everything to disk.
|
|
770
774
|
|
|
771
775
|
:param dst_path: Target file path
|
|
776
|
+
:param followlinks: False if regard symlink as file, else True
|
|
777
|
+
:param force: Sync file forcely, do not ignore same files
|
|
772
778
|
'''
|
|
773
779
|
if self.is_dir(followlinks=followlinks):
|
|
774
780
|
|
|
@@ -776,7 +782,7 @@ class FSPath(URIPath):
|
|
|
776
782
|
ignore_files = []
|
|
777
783
|
for name in names:
|
|
778
784
|
dst_obj = self.from_path(dst_path).joinpath(name)
|
|
779
|
-
if dst_obj.exists() and is_same_file(
|
|
785
|
+
if not force and dst_obj.exists() and is_same_file(
|
|
780
786
|
self.joinpath(name).stat(), dst_obj.stat(), 'copy'):
|
|
781
787
|
ignore_files.append(name)
|
|
782
788
|
return ignore_files
|
|
@@ -852,7 +858,7 @@ class FSPath(URIPath):
|
|
|
852
858
|
|
|
853
859
|
def open(
|
|
854
860
|
self,
|
|
855
|
-
mode: str,
|
|
861
|
+
mode: str = 'r',
|
|
856
862
|
buffering=-1,
|
|
857
863
|
encoding=None,
|
|
858
864
|
errors=None,
|
megfile/pathlike.py
CHANGED
|
@@ -317,7 +317,7 @@ class BasePath:
|
|
|
317
317
|
"""Remove (delete) the directory."""
|
|
318
318
|
|
|
319
319
|
@method_not_implemented
|
|
320
|
-
def open(self, mode: str, **kwargs) -> IO[AnyStr]: # type: ignore
|
|
320
|
+
def open(self, mode: str = 'r', **kwargs) -> IO[AnyStr]: # type: ignore
|
|
321
321
|
"""Open the file with mode."""
|
|
322
322
|
|
|
323
323
|
@method_not_implemented
|
megfile/s3.py
CHANGED
|
@@ -64,7 +64,6 @@ def s3_access(
|
|
|
64
64
|
followlinks: bool = False) -> bool:
|
|
65
65
|
'''
|
|
66
66
|
Test if path has access permission described by mode
|
|
67
|
-
Using head_bucket(), now READ/WRITE are same.
|
|
68
67
|
|
|
69
68
|
:param path: Given path
|
|
70
69
|
:param mode: access mode
|
|
@@ -325,15 +324,19 @@ def s3_copy(
|
|
|
325
324
|
|
|
326
325
|
|
|
327
326
|
def s3_sync(
|
|
328
|
-
src_url: PathLike,
|
|
329
|
-
|
|
327
|
+
src_url: PathLike,
|
|
328
|
+
dst_url: PathLike,
|
|
329
|
+
followlinks: bool = False,
|
|
330
|
+
force: bool = False) -> None:
|
|
330
331
|
'''
|
|
331
332
|
Copy file/directory on src_url to dst_url
|
|
332
333
|
|
|
333
334
|
:param src_url: Given path
|
|
334
335
|
:param dst_url: Given destination path
|
|
336
|
+
:param followlinks: False if regard symlink as file, else True
|
|
337
|
+
:param force: Sync file forcely, do not ignore same files
|
|
335
338
|
'''
|
|
336
|
-
return S3Path(src_url).sync(dst_url, followlinks)
|
|
339
|
+
return S3Path(src_url).sync(dst_url, followlinks, force)
|
|
337
340
|
|
|
338
341
|
|
|
339
342
|
def s3_symlink(src_path: PathLike, dst_path: PathLike) -> None:
|
megfile/s3_path.py
CHANGED
|
@@ -12,7 +12,7 @@ import boto3
|
|
|
12
12
|
import botocore
|
|
13
13
|
from botocore.awsrequest import AWSResponse
|
|
14
14
|
|
|
15
|
-
from megfile.errors import S3BucketNotFoundError, S3ConfigError, S3FileExistsError, S3FileNotFoundError, S3IsADirectoryError, S3NameTooLongError, S3NotADirectoryError, S3NotALinkError, S3PermissionError, S3UnknownError, UnsupportedError, _create_missing_ok_generator
|
|
15
|
+
from megfile.errors import S3BucketNotFoundError, S3ConfigError, S3FileExistsError, S3FileNotFoundError, S3IsADirectoryError, S3NameTooLongError, S3NotADirectoryError, S3NotALinkError, S3PermissionError, S3UnknownError, SameFileError, UnsupportedError, _create_missing_ok_generator
|
|
16
16
|
from megfile.errors import _logger as error_logger
|
|
17
17
|
from megfile.errors import patch_method, raise_s3_error, s3_error_code_should_retry, s3_should_retry, translate_fs_error, translate_s3_error
|
|
18
18
|
from megfile.interfaces import Access, ContextIterator, FileCacher, FileEntry, PathLike, StatResult, URIPath
|
|
@@ -189,6 +189,20 @@ def get_s3_client(
|
|
|
189
189
|
|
|
190
190
|
:returns: S3 client
|
|
191
191
|
'''
|
|
192
|
+
addressing_style_env_key = 'AWS_S3_ADDRESSING_STYLE'
|
|
193
|
+
if profile_name:
|
|
194
|
+
addressing_style_env_key = f'{profile_name}__AWS_S3_ADDRESSING_STYLE'.upper(
|
|
195
|
+
)
|
|
196
|
+
addressing_style = os.environ.get(addressing_style_env_key)
|
|
197
|
+
if addressing_style:
|
|
198
|
+
if config:
|
|
199
|
+
config = config.merge(
|
|
200
|
+
botocore.config.Config(
|
|
201
|
+
s3={'addressing_style': addressing_style}))
|
|
202
|
+
else:
|
|
203
|
+
config = botocore.config.Config(
|
|
204
|
+
s3={'addressing_style': addressing_style})
|
|
205
|
+
|
|
192
206
|
if cache_key is not None:
|
|
193
207
|
return thread_local(
|
|
194
208
|
cache_key, get_s3_client, config=config, profile_name=profile_name)
|
|
@@ -518,10 +532,6 @@ def _s3_binary_mode(s3_open_func):
|
|
|
518
532
|
raise S3FileExistsError('File exists: %r' % s3_url)
|
|
519
533
|
mode = mode.replace('x', 'w')
|
|
520
534
|
|
|
521
|
-
if 'w' in mode or 'a' in mode:
|
|
522
|
-
if not S3Path(s3_url).hasbucket():
|
|
523
|
-
raise S3BucketNotFoundError('No such bucket: %r' % s3_url)
|
|
524
|
-
|
|
525
535
|
fileobj = s3_open_func(s3_url, get_binary_mode(mode), **kwargs)
|
|
526
536
|
if 'b' not in mode:
|
|
527
537
|
fileobj = io.TextIOWrapper(
|
|
@@ -1260,7 +1270,6 @@ class S3Path(URIPath):
|
|
|
1260
1270
|
followlinks: bool = False) -> bool:
|
|
1261
1271
|
'''
|
|
1262
1272
|
Test if path has access permission described by mode
|
|
1263
|
-
Using head_bucket(), now READ/WRITE are same.
|
|
1264
1273
|
|
|
1265
1274
|
:param mode: access mode
|
|
1266
1275
|
:returns: bool, if the bucket of s3_url has read/write access.
|
|
@@ -1271,7 +1280,7 @@ class S3Path(URIPath):
|
|
|
1271
1280
|
s3_url = self.readlink().path_with_protocol
|
|
1272
1281
|
except S3NotALinkError:
|
|
1273
1282
|
pass
|
|
1274
|
-
bucket,
|
|
1283
|
+
bucket, key = parse_s3_url(s3_url) # only check bucket accessibility
|
|
1275
1284
|
if not bucket:
|
|
1276
1285
|
raise Exception("No available bucket")
|
|
1277
1286
|
if not isinstance(mode, Access):
|
|
@@ -1280,15 +1289,38 @@ class S3Path(URIPath):
|
|
|
1280
1289
|
.format(mode, ', '.join([str(a) for a in Access])))
|
|
1281
1290
|
if mode not in (Access.READ, Access.WRITE):
|
|
1282
1291
|
raise TypeError('Unsupported mode: {}'.format(mode))
|
|
1292
|
+
|
|
1283
1293
|
try:
|
|
1284
|
-
self.
|
|
1294
|
+
if not self.exists():
|
|
1295
|
+
return False
|
|
1285
1296
|
except Exception as error:
|
|
1286
1297
|
error = translate_s3_error(error, s3_url)
|
|
1287
|
-
if isinstance(error,
|
|
1288
|
-
|
|
1298
|
+
if isinstance(error, S3PermissionError):
|
|
1299
|
+
return False
|
|
1300
|
+
raise error
|
|
1301
|
+
|
|
1302
|
+
if mode == Access.READ:
|
|
1303
|
+
return True
|
|
1304
|
+
try:
|
|
1305
|
+
if not key:
|
|
1306
|
+
key = 'test'
|
|
1307
|
+
elif key.endswith('/'):
|
|
1308
|
+
key = key[:-1]
|
|
1309
|
+
upload_id = self._client.create_multipart_upload(
|
|
1310
|
+
Bucket=bucket,
|
|
1311
|
+
Key=key,
|
|
1312
|
+
)['UploadId']
|
|
1313
|
+
self._client.abort_multipart_upload(
|
|
1314
|
+
Bucket=bucket,
|
|
1315
|
+
Key=key,
|
|
1316
|
+
UploadId=upload_id,
|
|
1317
|
+
)
|
|
1318
|
+
return True
|
|
1319
|
+
except Exception as error:
|
|
1320
|
+
error = translate_s3_error(error, s3_url)
|
|
1321
|
+
if isinstance(error, S3PermissionError):
|
|
1289
1322
|
return False
|
|
1290
1323
|
raise error
|
|
1291
|
-
return True
|
|
1292
1324
|
|
|
1293
1325
|
def exists(self, followlinks: bool = False) -> bool:
|
|
1294
1326
|
'''
|
|
@@ -1302,7 +1334,7 @@ class S3Path(URIPath):
|
|
|
1302
1334
|
if not bucket: # s3:// => True, s3:///key => False
|
|
1303
1335
|
return not key
|
|
1304
1336
|
|
|
1305
|
-
return self.
|
|
1337
|
+
return self.is_file(followlinks) or self.is_dir()
|
|
1306
1338
|
|
|
1307
1339
|
def getmtime(self, follow_symlinks: bool = False) -> float:
|
|
1308
1340
|
'''
|
|
@@ -1531,10 +1563,21 @@ class S3Path(URIPath):
|
|
|
1531
1563
|
self._client.head_bucket(Bucket=bucket)
|
|
1532
1564
|
except Exception as error:
|
|
1533
1565
|
error = translate_s3_error(error, self.path_with_protocol)
|
|
1534
|
-
if isinstance(error,
|
|
1535
|
-
|
|
1566
|
+
if isinstance(error, S3PermissionError):
|
|
1567
|
+
# Aliyun OSS doesn't give bucket api permission when you only have read and write permission
|
|
1568
|
+
try:
|
|
1569
|
+
self._client.list_objects_v2(Bucket=bucket, MaxKeys=1)
|
|
1570
|
+
return True
|
|
1571
|
+
except Exception as error2:
|
|
1572
|
+
error2 = translate_s3_error(error2, self.path_with_protocol)
|
|
1573
|
+
if isinstance(
|
|
1574
|
+
error2,
|
|
1575
|
+
(S3UnknownError, S3ConfigError, S3PermissionError)):
|
|
1576
|
+
raise error2
|
|
1577
|
+
return False
|
|
1578
|
+
elif isinstance(error, (S3UnknownError, S3ConfigError)):
|
|
1536
1579
|
raise error
|
|
1537
|
-
|
|
1580
|
+
elif isinstance(error, S3FileNotFoundError):
|
|
1538
1581
|
return False
|
|
1539
1582
|
|
|
1540
1583
|
return True
|
|
@@ -1978,6 +2021,10 @@ class S3Path(URIPath):
|
|
|
1978
2021
|
src_url = self.path_with_protocol
|
|
1979
2022
|
src_bucket, src_key = parse_s3_url(src_url)
|
|
1980
2023
|
dst_bucket, dst_key = parse_s3_url(dst_url)
|
|
2024
|
+
if dst_bucket == src_bucket and src_key.rstrip('/') == dst_key.rstrip(
|
|
2025
|
+
'/'):
|
|
2026
|
+
raise SameFileError(
|
|
2027
|
+
f"'{src_url}' and '{dst_url}' are the same file")
|
|
1981
2028
|
|
|
1982
2029
|
if not src_bucket:
|
|
1983
2030
|
raise S3BucketNotFoundError('Empty bucket name: %r' % src_url)
|
|
@@ -1996,7 +2043,7 @@ class S3Path(URIPath):
|
|
|
1996
2043
|
except S3NotALinkError:
|
|
1997
2044
|
pass
|
|
1998
2045
|
|
|
1999
|
-
|
|
2046
|
+
with raise_s3_error(f"'{src_url}' or '{dst_url}'"):
|
|
2000
2047
|
self._client.copy(
|
|
2001
2048
|
{
|
|
2002
2049
|
'Bucket': src_bucket,
|
|
@@ -2005,28 +2052,22 @@ class S3Path(URIPath):
|
|
|
2005
2052
|
Bucket=dst_bucket,
|
|
2006
2053
|
Key=dst_key,
|
|
2007
2054
|
Callback=callback)
|
|
2008
|
-
except Exception as error:
|
|
2009
|
-
error = translate_s3_error(error, dst_url)
|
|
2010
|
-
# Error can't help tell which is problematic
|
|
2011
|
-
if isinstance(error, S3BucketNotFoundError):
|
|
2012
|
-
if not self.hasbucket():
|
|
2013
|
-
raise S3BucketNotFoundError('No such bucket: %r' % src_url)
|
|
2014
|
-
elif isinstance(error, S3FileNotFoundError):
|
|
2015
|
-
if not self.is_file():
|
|
2016
|
-
raise S3FileNotFoundError('No such file: %r' % src_url)
|
|
2017
|
-
raise error
|
|
2018
2055
|
|
|
2019
|
-
def sync(
|
|
2056
|
+
def sync(
|
|
2057
|
+
self, dst_url: PathLike, followlinks: bool = False,
|
|
2058
|
+
force: bool = False) -> None:
|
|
2020
2059
|
'''
|
|
2021
2060
|
Copy file/directory on src_url to dst_url
|
|
2022
2061
|
|
|
2023
2062
|
:param dst_url: Given destination path
|
|
2063
|
+
:param followlinks: False if regard symlink as file, else True
|
|
2064
|
+
:param force: Sync file forcely, do not ignore same files
|
|
2024
2065
|
'''
|
|
2025
2066
|
for src_file_path, dst_file_path in _s3_scan_pairs(
|
|
2026
2067
|
self.path_with_protocol, dst_url):
|
|
2027
2068
|
src_file_path = self.from_path(src_file_path)
|
|
2028
2069
|
dst_file_path = self.from_path(dst_file_path)
|
|
2029
|
-
if dst_file_path.exists() and is_same_file(
|
|
2070
|
+
if not force and dst_file_path.exists() and is_same_file(
|
|
2030
2071
|
src_file_path.stat(), dst_file_path.stat(), 'copy'):
|
|
2031
2072
|
continue
|
|
2032
2073
|
src_file_path.copy(dst_file_path, followlinks=followlinks)
|
megfile/sftp.py
CHANGED
|
@@ -395,10 +395,15 @@ def sftp_copy(
|
|
|
395
395
|
|
|
396
396
|
|
|
397
397
|
def sftp_sync(
|
|
398
|
-
src_path: PathLike,
|
|
398
|
+
src_path: PathLike,
|
|
399
|
+
dst_path: PathLike,
|
|
400
|
+
followlinks: bool = False,
|
|
401
|
+
force: bool = False):
|
|
399
402
|
'''Copy file/directory on src_url to dst_url
|
|
400
403
|
|
|
401
404
|
:param src_path: Given path
|
|
402
405
|
:param dst_url: Given destination path
|
|
406
|
+
:param followlinks: False if regard symlink as file, else True
|
|
407
|
+
:param force: Sync file forcely, do not ignore same files
|
|
403
408
|
'''
|
|
404
|
-
return SftpPath(src_path).sync(dst_path, followlinks)
|
|
409
|
+
return SftpPath(src_path).sync(dst_path, followlinks, force)
|
megfile/sftp_path.py
CHANGED
|
@@ -3,6 +3,7 @@ import hashlib
|
|
|
3
3
|
import io
|
|
4
4
|
import os
|
|
5
5
|
import shlex
|
|
6
|
+
import socket
|
|
6
7
|
import subprocess
|
|
7
8
|
from logging import getLogger as get_logger
|
|
8
9
|
from stat import S_ISDIR, S_ISLNK, S_ISREG
|
|
@@ -11,7 +12,7 @@ from urllib.parse import urlsplit, urlunsplit
|
|
|
11
12
|
|
|
12
13
|
import paramiko
|
|
13
14
|
|
|
14
|
-
from megfile.errors import _create_missing_ok_generator, patch_method
|
|
15
|
+
from megfile.errors import SameFileError, _create_missing_ok_generator, patch_method
|
|
15
16
|
from megfile.interfaces import ContextIterator, FileEntry, PathLike, StatResult
|
|
16
17
|
from megfile.lib.compare import is_same_file
|
|
17
18
|
from megfile.lib.glob import FSFunc, iglob
|
|
@@ -45,6 +46,7 @@ SFTP_PRIVATE_KEY_TYPE = "SFTP_PRIVATE_KEY_TYPE"
|
|
|
45
46
|
SFTP_PRIVATE_KEY_PASSWORD = "SFTP_PRIVATE_KEY_PASSWORD"
|
|
46
47
|
MAX_RETRIES = 10
|
|
47
48
|
DEFAULT_SSH_CONNECT_TIMEOUT = 5
|
|
49
|
+
DEFAULT_SSH_KEEPALIVE_INTERVAL = 15
|
|
48
50
|
|
|
49
51
|
|
|
50
52
|
def _make_stat(stat: paramiko.SFTPAttributes) -> StatResult:
|
|
@@ -94,8 +96,11 @@ def provide_connect_info(
|
|
|
94
96
|
def sftp_should_retry(error: Exception) -> bool:
|
|
95
97
|
if type(error) is EOFError:
|
|
96
98
|
return False
|
|
97
|
-
elif isinstance(error,
|
|
98
|
-
|
|
99
|
+
elif isinstance(error, (
|
|
100
|
+
paramiko.ssh_exception.SSHException,
|
|
101
|
+
ConnectionError,
|
|
102
|
+
socket.timeout,
|
|
103
|
+
)):
|
|
99
104
|
return True
|
|
100
105
|
elif isinstance(error, OSError) and str(error) == 'Socket is closed':
|
|
101
106
|
return True
|
|
@@ -148,8 +153,14 @@ def _get_sftp_client(
|
|
|
148
153
|
|
|
149
154
|
:returns: sftp client
|
|
150
155
|
'''
|
|
151
|
-
|
|
152
|
-
|
|
156
|
+
session = get_ssh_session(
|
|
157
|
+
hostname=hostname,
|
|
158
|
+
port=port,
|
|
159
|
+
username=username,
|
|
160
|
+
password=password,
|
|
161
|
+
)
|
|
162
|
+
session.invoke_subsystem("sftp")
|
|
163
|
+
sftp_client = paramiko.SFTPClient(session)
|
|
153
164
|
_patch_sftp_client_request(sftp_client, hostname, port, username, password)
|
|
154
165
|
return sftp_client
|
|
155
166
|
|
|
@@ -189,6 +200,9 @@ def _get_ssh_client(
|
|
|
189
200
|
username=username,
|
|
190
201
|
password=password,
|
|
191
202
|
pkey=private_key,
|
|
203
|
+
timeout=DEFAULT_SSH_CONNECT_TIMEOUT,
|
|
204
|
+
auth_timeout=DEFAULT_SSH_CONNECT_TIMEOUT,
|
|
205
|
+
banner_timeout=DEFAULT_SSH_CONNECT_TIMEOUT,
|
|
192
206
|
)
|
|
193
207
|
atexit.register(ssh_client.close)
|
|
194
208
|
return ssh_client
|
|
@@ -211,10 +225,9 @@ def get_ssh_session(
|
|
|
211
225
|
username: Optional[str] = None,
|
|
212
226
|
password: Optional[str] = None,
|
|
213
227
|
) -> paramiko.Channel:
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
except paramiko.SSHException:
|
|
228
|
+
|
|
229
|
+
def retry_callback(error, *args, **kwargs):
|
|
230
|
+
ssh_client = get_ssh_client(hostname, port, username, password)
|
|
218
231
|
ssh_client.close()
|
|
219
232
|
atexit.unregister(ssh_client.close)
|
|
220
233
|
ssh_key = f'ssh_client:{hostname},{port},{username},{password}'
|
|
@@ -223,20 +236,34 @@ def get_ssh_session(
|
|
|
223
236
|
sftp_key = f'sftp_client:{hostname},{port},{username},{password}'
|
|
224
237
|
if thread_local.get(sftp_key):
|
|
225
238
|
del thread_local[sftp_key]
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
239
|
+
|
|
240
|
+
return patch_method(
|
|
241
|
+
_open_session, # pytype: disable=attribute-error
|
|
242
|
+
max_retries=MAX_RETRIES,
|
|
243
|
+
should_retry=sftp_should_retry,
|
|
244
|
+
retry_callback=retry_callback)(
|
|
245
|
+
hostname,
|
|
246
|
+
port,
|
|
247
|
+
username,
|
|
248
|
+
password,
|
|
249
|
+
)
|
|
233
250
|
|
|
234
251
|
|
|
235
|
-
def _open_session(
|
|
252
|
+
def _open_session(
|
|
253
|
+
hostname: str,
|
|
254
|
+
port: Optional[int] = None,
|
|
255
|
+
username: Optional[str] = None,
|
|
256
|
+
password: Optional[str] = None,
|
|
257
|
+
) -> paramiko.Channel:
|
|
258
|
+
ssh_client = get_ssh_client(hostname, port, username, password)
|
|
236
259
|
transport = ssh_client.get_transport()
|
|
237
260
|
if not transport:
|
|
238
|
-
raise paramiko.SSHException()
|
|
261
|
+
raise paramiko.SSHException('Get transport error')
|
|
262
|
+
transport.set_keepalive(DEFAULT_SSH_KEEPALIVE_INTERVAL)
|
|
239
263
|
session = transport.open_session(timeout=DEFAULT_SSH_CONNECT_TIMEOUT)
|
|
264
|
+
if not session:
|
|
265
|
+
raise paramiko.SSHException('Create session error')
|
|
266
|
+
session.settimeout(DEFAULT_SSH_CONNECT_TIMEOUT)
|
|
240
267
|
return session
|
|
241
268
|
|
|
242
269
|
|
|
@@ -1167,6 +1194,9 @@ class SftpPath(URIPath):
|
|
|
1167
1194
|
self.from_path(os.path.dirname(dst_path)).makedirs(exist_ok=True)
|
|
1168
1195
|
dst_path = self.from_path(dst_path)
|
|
1169
1196
|
if self._is_same_backend(dst_path):
|
|
1197
|
+
if self._real_path == dst_path._real_path:
|
|
1198
|
+
raise SameFileError(
|
|
1199
|
+
f"'{self.path}' and '{dst_path.path}' are the same file")
|
|
1170
1200
|
exec_result = self._exec_command(
|
|
1171
1201
|
["cp", self._real_path, dst_path._real_path])
|
|
1172
1202
|
if exec_result.returncode != 0:
|
|
@@ -1189,10 +1219,16 @@ class SftpPath(URIPath):
|
|
|
1189
1219
|
dst_path.utime(src_stat.st_atime, src_stat.st_mtime)
|
|
1190
1220
|
dst_path._client.chmod(dst_path._real_path, src_stat.st_mode)
|
|
1191
1221
|
|
|
1192
|
-
def sync(
|
|
1222
|
+
def sync(
|
|
1223
|
+
self,
|
|
1224
|
+
dst_path: PathLike,
|
|
1225
|
+
followlinks: bool = False,
|
|
1226
|
+
force: bool = False):
|
|
1193
1227
|
'''Copy file/directory on src_url to dst_url
|
|
1194
1228
|
|
|
1195
1229
|
:param dst_url: Given destination path
|
|
1230
|
+
:param followlinks: False if regard symlink as file, else True
|
|
1231
|
+
:param force: Sync file forcely, do not ignore same files
|
|
1196
1232
|
'''
|
|
1197
1233
|
if not self._is_same_protocol(dst_path):
|
|
1198
1234
|
raise OSError('Not a %s path: %r' % (self.protocol, dst_path))
|
|
@@ -1201,8 +1237,8 @@ class SftpPath(URIPath):
|
|
|
1201
1237
|
self.path_with_protocol, dst_path):
|
|
1202
1238
|
dst_path = self.from_path(dst_file_path)
|
|
1203
1239
|
src_path = self.from_path(src_file_path)
|
|
1204
|
-
if dst_path.exists() and is_same_file(
|
|
1205
|
-
|
|
1240
|
+
if not force and dst_path.exists() and is_same_file(
|
|
1241
|
+
src_path.stat(), dst_path.stat(), 'copy'):
|
|
1206
1242
|
continue
|
|
1207
1243
|
self.from_path(os.path.dirname(dst_file_path)).mkdir(
|
|
1208
1244
|
parents=True, exist_ok=True)
|
megfile/smart.py
CHANGED
|
@@ -7,6 +7,7 @@ from typing import IO, Any, AnyStr, BinaryIO, Callable, Iterable, Iterator, List
|
|
|
7
7
|
|
|
8
8
|
from tqdm import tqdm
|
|
9
9
|
|
|
10
|
+
from megfile.errors import SameFileError
|
|
10
11
|
from megfile.fs import fs_copy, is_fs
|
|
11
12
|
from megfile.interfaces import Access, ContextIterator, FileCacher, FileEntry, NullCacher, PathLike, StatResult
|
|
12
13
|
from megfile.lib.combine_reader import CombineReader
|
|
@@ -342,6 +343,7 @@ def _smart_sync_single_file(items: dict):
|
|
|
342
343
|
callback = items['callback']
|
|
343
344
|
followlinks = items['followlinks']
|
|
344
345
|
callback_after_copy_file = items['callback_after_copy_file']
|
|
346
|
+
force = items['force']
|
|
345
347
|
|
|
346
348
|
content_path = os.path.relpath(src_file_path, start=src_root_path)
|
|
347
349
|
if len(content_path) and content_path != '.':
|
|
@@ -355,7 +357,7 @@ def _smart_sync_single_file(items: dict):
|
|
|
355
357
|
dst_protocol, _ = SmartPath._extract_protocol(dst_abs_file_path)
|
|
356
358
|
should_sync = True
|
|
357
359
|
try:
|
|
358
|
-
if smart_exists(dst_abs_file_path) and is_same_file(
|
|
360
|
+
if not force and smart_exists(dst_abs_file_path) and is_same_file(
|
|
359
361
|
smart_stat(src_file_path), smart_stat(dst_abs_file_path),
|
|
360
362
|
get_sync_type(src_protocol, dst_protocol)):
|
|
361
363
|
should_sync = False
|
|
@@ -380,7 +382,8 @@ def smart_sync(
|
|
|
380
382
|
followlinks: bool = False,
|
|
381
383
|
callback_after_copy_file: Optional[Callable[[str, str], None]] = None,
|
|
382
384
|
src_file_stats: Optional[Iterable[FileEntry]] = None,
|
|
383
|
-
map_func: Callable[[Callable, Iterable], Any] = map
|
|
385
|
+
map_func: Callable[[Callable, Iterable], Any] = map,
|
|
386
|
+
force: bool = False) -> None:
|
|
384
387
|
'''
|
|
385
388
|
Sync file or directory on s3 and fs
|
|
386
389
|
|
|
@@ -443,6 +446,7 @@ def smart_sync(
|
|
|
443
446
|
callback=callback,
|
|
444
447
|
followlinks=followlinks,
|
|
445
448
|
callback_after_copy_file=callback_after_copy_file,
|
|
449
|
+
force=force,
|
|
446
450
|
)
|
|
447
451
|
|
|
448
452
|
for _ in map_func(_smart_sync_single_file, create_generator()):
|
|
@@ -454,7 +458,8 @@ def smart_sync_with_progress(
|
|
|
454
458
|
dst_path,
|
|
455
459
|
callback: Optional[Callable[[str, int], None]] = None,
|
|
456
460
|
followlinks: bool = False,
|
|
457
|
-
map_func: Callable[[Callable, Iterable], Iterator] = map
|
|
461
|
+
map_func: Callable[[Callable, Iterable], Iterator] = map,
|
|
462
|
+
force: bool = False):
|
|
458
463
|
src_path, dst_path = get_traditional_path(src_path), get_traditional_path(
|
|
459
464
|
dst_path)
|
|
460
465
|
file_stats = list(
|
|
@@ -478,6 +483,7 @@ def smart_sync_with_progress(
|
|
|
478
483
|
callback_after_copy_file=callback_after_copy_file,
|
|
479
484
|
src_file_stats=file_stats,
|
|
480
485
|
map_func=map_func,
|
|
486
|
+
force=force,
|
|
481
487
|
)
|
|
482
488
|
tbar.close()
|
|
483
489
|
sbar.close()
|
megfile/stdio.py
CHANGED
megfile/stdio_path.py
CHANGED
megfile/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
VERSION = "2.2.
|
|
1
|
+
VERSION = "2.2.3"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: megfile
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.3
|
|
4
4
|
Summary: Megvii file operation library
|
|
5
5
|
Home-page: https://github.com/megvii-research/megfile
|
|
6
6
|
Author: megvii
|
|
@@ -69,7 +69,7 @@ Here's an example of writing a file to s3 / sftp / fs, syncing to local, reading
|
|
|
69
69
|
- `/data/test.txt`
|
|
70
70
|
- `test.txt`
|
|
71
71
|
- 1
|
|
72
|
-
-
|
|
72
|
+
- s3
|
|
73
73
|
- `s3[+profile_name]://bucket/key`
|
|
74
74
|
- sftp
|
|
75
75
|
- `sftp://[username[:password]@]hostname[:port]//absolute_file_path`
|
|
@@ -170,7 +170,7 @@ pip3 install -r requirements.txt -r requirements-dev.txt
|
|
|
170
170
|
|
|
171
171
|
## Configuration
|
|
172
172
|
|
|
173
|
-
Before using `megfile` to access files on s3, you need to set up authentication credentials for your s3 account using the [AWS CLI](https://docs.aws.amazon.com/cli/latest/reference/configure/index.html) or editing the file `~/.aws/config` directly, see also: [boto3 configuration](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html) & [boto3 credentials](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html).
|
|
173
|
+
Before using `megfile` to access files on s3, you need to set up authentication credentials for your s3 account using the [AWS CLI](https://docs.aws.amazon.com/cli/latest/reference/configure/index.html) or editing the file `~/.aws/config` directly, see also: [boto3 configuration](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html) & [boto3 credentials](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html). Megfile also support environments for s3, like `OSS_ENDPOINT`, `AWS_ACCESS_KEY_ID` , `AWS_SECRET_ACCESS_KEY` and `AWS_S3_ADDRESSING_STYLE`.
|
|
174
174
|
|
|
175
175
|
```
|
|
176
176
|
$ aws configure
|
|
@@ -193,7 +193,7 @@ s3 =
|
|
|
193
193
|
endpoint_url = http://oss-cn-hangzhou.aliyuncs.com
|
|
194
194
|
```
|
|
195
195
|
|
|
196
|
-
You also can operate s3 files with different endpoint urls, access keys and secret keys. You can set config for different profiles by environment(`PROFILE_NAME__AWS_ACCESS_KEY_ID`, `PROFILE_NAME__AWS_SECRET_ACCESS_KEY`, `PROFILE_NAME__OSS_ENDPOINT`) or `~/.aws/config`. Then you can operate files with path `s3+profile_name://bucket/key`.
|
|
196
|
+
You also can operate s3 files with different endpoint urls, access keys and secret keys. You can set config for different profiles by environment(`PROFILE_NAME__AWS_ACCESS_KEY_ID`, `PROFILE_NAME__AWS_SECRET_ACCESS_KEY`, `PROFILE_NAME__OSS_ENDPOINT`, `PROFILE_NAME__AWS_S3_ADDRESSING_STYLE`) or `~/.aws/config`. Then you can operate files with path `s3+profile_name://bucket/key`.
|
|
197
197
|
For example:
|
|
198
198
|
```
|
|
199
199
|
# set config with environment
|
|
@@ -217,6 +217,7 @@ s3 =
|
|
|
217
217
|
aws_secret_access_key = profile2-accesskey
|
|
218
218
|
aws_access_key_id = profile2-secretkey
|
|
219
219
|
s3 =
|
|
220
|
+
addressing_style = virtual
|
|
220
221
|
endpoint_url = https://profile2.s3.custom.com
|
|
221
222
|
|
|
222
223
|
|
|
@@ -1,21 +1,21 @@
|
|
|
1
1
|
megfile/__init__.py,sha256=Qsi3XNP_0XYoSol-1AGutZqo0rfBnzaiZ-HVXll4fB0,5721
|
|
2
|
-
megfile/cli.py,sha256=
|
|
3
|
-
megfile/errors.py,sha256=
|
|
4
|
-
megfile/fs.py,sha256=
|
|
5
|
-
megfile/fs_path.py,sha256=
|
|
2
|
+
megfile/cli.py,sha256=ZRS2Ityuqk51T-BDvU-q1EDAS66Vw8gk817Wg_OWwSk,13625
|
|
3
|
+
megfile/errors.py,sha256=BIZ8XlQLRo0oZsU38Jhv8eooCFEjzsg3RSb1nTuLc5g,11663
|
|
4
|
+
megfile/fs.py,sha256=LtrzQsyZgogTJeoRFz4L52gxx0jByzRBLkpWYpvkp5I,11819
|
|
5
|
+
megfile/fs_path.py,sha256=JkY8qGIIboK5MK2rSagYEvnu5FTzmk9OHXIhTO7BjeY,38767
|
|
6
6
|
megfile/http.py,sha256=a3oAuARSSaIU8VMx86Mui0N5Vh-EI0AoHnwxRU5DSMU,2032
|
|
7
7
|
megfile/http_path.py,sha256=pbIlle-scbKC88XO0uV9y3sIm0HME6E_R061VTBg8w8,8966
|
|
8
8
|
megfile/interfaces.py,sha256=h3tWE8hVt5S-HopaMAX6lunPJ97vzhv6jH_2HubcDNc,6219
|
|
9
|
-
megfile/pathlike.py,sha256=
|
|
10
|
-
megfile/s3.py,sha256=
|
|
11
|
-
megfile/s3_path.py,sha256=
|
|
12
|
-
megfile/sftp.py,sha256=
|
|
13
|
-
megfile/sftp_path.py,sha256=
|
|
14
|
-
megfile/smart.py,sha256=
|
|
9
|
+
megfile/pathlike.py,sha256=WpP8zWSOAcAfYrD65hZS08UEi4_iCoEMs2xvfFMwZvY,29264
|
|
10
|
+
megfile/s3.py,sha256=7XZSWjcSY-hoLhLH9dtfyRpokfYH9raTO_Mf69RjpEs,12560
|
|
11
|
+
megfile/s3_path.py,sha256=fGMrV0_cRWmjqRcbziuhC7vCLXnDvbEEHbGDmW4ZHGQ,87172
|
|
12
|
+
megfile/sftp.py,sha256=CZYv1WKL0d_vuJ5aPgMhm0W8uskzaO5zbYGhGwt_mQs,12771
|
|
13
|
+
megfile/sftp_path.py,sha256=rwEfw93fZkNEf7TjZ1-8bGiaVU0oxwb3H7ub6x9iUsA,49153
|
|
14
|
+
megfile/smart.py,sha256=PAg8i56kFn94a6LGlH1v2qamCojYM9qKb-Ryw6rX3ks,33204
|
|
15
15
|
megfile/smart_path.py,sha256=Rwb1McXsshi9-F6miTRqE6j8FO2j1edjmSxZF32YZ6E,6708
|
|
16
|
-
megfile/stdio.py,sha256=
|
|
17
|
-
megfile/stdio_path.py,sha256=
|
|
18
|
-
megfile/version.py,sha256=
|
|
16
|
+
megfile/stdio.py,sha256=yRhlfUA2DHi3bq-9cXsSlbLCnHvS_zvglO2IYYyPsGc,707
|
|
17
|
+
megfile/stdio_path.py,sha256=eQulTXUwHvUKA-5PKCGfVNiEPkJhG9YtVhtU58OcmoM,2873
|
|
18
|
+
megfile/version.py,sha256=NUfAFE_kB_uAzWs7eyqhwlH9YxsrTSBEHoHyhugYN4Y,19
|
|
19
19
|
megfile/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
20
|
megfile/lib/base_prefetch_reader.py,sha256=SjrBffHVgvJnYtr8HNqiOozP9OJRYS37Eu1KQcZu1Z8,13221
|
|
21
21
|
megfile/lib/combine_reader.py,sha256=XFSqEY5A5X5Uf7eQ6AXAzrvNteESSXvKNVPktGjo3KY,4546
|
|
@@ -38,10 +38,10 @@ megfile/lib/stdio_handler.py,sha256=QDWtcZxz-hzi-rqQUiSlR3NrihX1fjK_Rj9T2mdTFEg,
|
|
|
38
38
|
megfile/lib/url.py,sha256=VbQLjo0s4AaV0iSk66BcjI68aUTcN9zBZ5x6-cM4Qvs,103
|
|
39
39
|
megfile/utils/__init__.py,sha256=qdX8FF_dYFKwp1BIWx3JeSGd91s7AKUDSEpDv9tORcM,9162
|
|
40
40
|
megfile/utils/mutex.py,sha256=-2KH3bNovKRd9zvsXq9n3bWM7rQdoG9hO7tUPxVG_Po,2538
|
|
41
|
-
megfile-2.2.
|
|
42
|
-
megfile-2.2.
|
|
43
|
-
megfile-2.2.
|
|
44
|
-
megfile-2.2.
|
|
45
|
-
megfile-2.2.
|
|
46
|
-
megfile-2.2.
|
|
47
|
-
megfile-2.2.
|
|
41
|
+
megfile-2.2.3.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
|
42
|
+
megfile-2.2.3.dist-info/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
|
|
43
|
+
megfile-2.2.3.dist-info/METADATA,sha256=VL8zMVQHM7NRnGr4X3A6uCXzRN220PvKQR47pjJC46g,10742
|
|
44
|
+
megfile-2.2.3.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
|
45
|
+
megfile-2.2.3.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
|
|
46
|
+
megfile-2.2.3.dist-info/top_level.txt,sha256=i3rMgdU1ZAJekAceojhA-bkm3749PzshtRmLTbeLUPQ,8
|
|
47
|
+
megfile-2.2.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|