megfile 2.2.1.post1__py3-none-any.whl → 2.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
megfile/cli.py CHANGED
@@ -141,10 +141,18 @@ def cp(
141
141
  2) as executor:
142
142
  if progress_bar:
143
143
  smart_sync_with_progress(
144
- src_path, dst_path, followlinks=True, map_func=executor.map)
144
+ src_path,
145
+ dst_path,
146
+ followlinks=True,
147
+ map_func=executor.map,
148
+ force=True)
145
149
  else:
146
150
  smart_sync(
147
- src_path, dst_path, followlinks=True, map_func=executor.map)
151
+ src_path,
152
+ dst_path,
153
+ followlinks=True,
154
+ map_func=executor.map,
155
+ force=True)
148
156
  else:
149
157
  if progress_bar:
150
158
  file_size = smart_stat(src_path).size
@@ -251,7 +259,14 @@ def rm(path: str, recursive: bool):
251
259
  type=click.INT,
252
260
  default=8,
253
261
  help='Number of concurrent workers.')
254
- def sync(src_path: str, dst_path: str, progress_bar: bool, worker):
262
+ @click.option(
263
+ '-f',
264
+ '--force',
265
+ is_flag=True,
266
+ help='Copy files forcely, ignore same files.')
267
+ def sync(
268
+ src_path: str, dst_path: str, progress_bar: bool, worker: int,
269
+ force: bool):
255
270
  with ThreadPoolExecutor(max_workers=worker) as executor:
256
271
  if has_magic(src_path):
257
272
  root_dir = get_non_glob_dir(src_path)
@@ -278,6 +293,7 @@ def sync(src_path: str, dst_path: str, progress_bar: bool, worker):
278
293
  callback_after_copy_file=callback_after_copy_file,
279
294
  src_file_stats=path_stats,
280
295
  map_func=executor.map,
296
+ force=force,
281
297
  )
282
298
 
283
299
  tbar.close()
@@ -288,14 +304,23 @@ def sync(src_path: str, dst_path: str, progress_bar: bool, worker):
288
304
  dst_path,
289
305
  src_file_stats=path_stats,
290
306
  map_func=executor.map,
307
+ force=force,
291
308
  )
292
309
  else:
293
310
  if progress_bar:
294
311
  smart_sync_with_progress(
295
- src_path, dst_path, followlinks=True, map_func=executor.map)
312
+ src_path,
313
+ dst_path,
314
+ followlinks=True,
315
+ map_func=executor.map,
316
+ force=force)
296
317
  else:
297
318
  smart_sync(
298
- src_path, dst_path, followlinks=True, map_func=executor.map)
319
+ src_path,
320
+ dst_path,
321
+ followlinks=True,
322
+ map_func=executor.map,
323
+ force=force)
299
324
 
300
325
 
301
326
  @cli.command(short_help="Make the path if it doesn't already exist.")
megfile/errors.py CHANGED
@@ -2,7 +2,8 @@ import time
2
2
  from contextlib import contextmanager
3
3
  from functools import wraps
4
4
  from logging import getLogger
5
- from typing import Callable, List, Optional
5
+ from shutil import SameFileError
6
+ from typing import Callable, Optional
6
7
 
7
8
  import botocore.exceptions
8
9
  import requests.exceptions
@@ -27,6 +28,7 @@ __all__ = [
27
28
  'ProtocolExistsError',
28
29
  'ProtocolNotFoundError',
29
30
  'S3UnknownError',
31
+ 'SameFileError',
30
32
  'translate_http_error',
31
33
  'translate_s3_error',
32
34
  'patch_method',
@@ -305,7 +307,9 @@ def translate_s3_error(s3_error: Exception, s3_url: PathLike) -> Exception:
305
307
  elif isinstance(s3_error, ClientError):
306
308
  code = client_error_code(s3_error)
307
309
  if code in ('NoSuchBucket'):
308
- return S3BucketNotFoundError('No such bucket: %r' % s3_url)
310
+ return S3BucketNotFoundError(
311
+ 'No such bucket: %r' %
312
+ s3_error.response.get('Error', {}).get('BucketName') or s3_url) # pytype: disable=attribute-error
309
313
  if code in ('404', 'NoSuchKey'):
310
314
  return S3FileNotFoundError('No such file: %r' % s3_url)
311
315
  if code in ('401', '403', 'AccessDenied'):
megfile/fs.py CHANGED
@@ -345,13 +345,19 @@ def fs_copy(
345
345
  return FSPath(src_path).copy(dst_path, callback, followlinks)
346
346
 
347
347
 
348
- def fs_sync(src_path: PathLike, dst_path: PathLike, followlinks: bool = False):
348
+ def fs_sync(
349
+ src_path: PathLike,
350
+ dst_path: PathLike,
351
+ followlinks: bool = False,
352
+ force: bool = False) -> None:
349
353
  '''Force write of everything to disk.
350
354
 
351
355
  :param src_path: Given path
352
356
  :param dst_path: Target file path
357
+ :param followlinks: False if regard symlink as file, else True
358
+ :param force: Sync file forcely, do not ignore same files
353
359
  '''
354
- return FSPath(src_path).sync(dst_path, followlinks)
360
+ return FSPath(src_path).sync(dst_path, followlinks, force)
355
361
 
356
362
 
357
363
  def fs_symlink(src_path: PathLike, dst_path: PathLike) -> None:
megfile/fs_path.py CHANGED
@@ -765,10 +765,16 @@ class FSPath(URIPath):
765
765
  else:
766
766
  raise
767
767
 
768
- def sync(self, dst_path: PathLike, followlinks: bool = False):
768
+ def sync(
769
+ self,
770
+ dst_path: PathLike,
771
+ followlinks: bool = False,
772
+ force: bool = False) -> None:
769
773
  '''Force write of everything to disk.
770
774
 
771
775
  :param dst_path: Target file path
776
+ :param followlinks: False if regard symlink as file, else True
777
+ :param force: Sync file forcely, do not ignore same files
772
778
  '''
773
779
  if self.is_dir(followlinks=followlinks):
774
780
 
@@ -776,7 +782,7 @@ class FSPath(URIPath):
776
782
  ignore_files = []
777
783
  for name in names:
778
784
  dst_obj = self.from_path(dst_path).joinpath(name)
779
- if dst_obj.exists() and is_same_file(
785
+ if not force and dst_obj.exists() and is_same_file(
780
786
  self.joinpath(name).stat(), dst_obj.stat(), 'copy'):
781
787
  ignore_files.append(name)
782
788
  return ignore_files
@@ -852,7 +858,7 @@ class FSPath(URIPath):
852
858
 
853
859
  def open(
854
860
  self,
855
- mode: str,
861
+ mode: str = 'r',
856
862
  buffering=-1,
857
863
  encoding=None,
858
864
  errors=None,
megfile/pathlike.py CHANGED
@@ -317,7 +317,7 @@ class BasePath:
317
317
  """Remove (delete) the directory."""
318
318
 
319
319
  @method_not_implemented
320
- def open(self, mode: str, **kwargs) -> IO[AnyStr]: # type: ignore
320
+ def open(self, mode: str = 'r', **kwargs) -> IO[AnyStr]: # type: ignore
321
321
  """Open the file with mode."""
322
322
 
323
323
  @method_not_implemented
megfile/s3.py CHANGED
@@ -64,7 +64,6 @@ def s3_access(
64
64
  followlinks: bool = False) -> bool:
65
65
  '''
66
66
  Test if path has access permission described by mode
67
- Using head_bucket(), now READ/WRITE are same.
68
67
 
69
68
  :param path: Given path
70
69
  :param mode: access mode
@@ -325,15 +324,19 @@ def s3_copy(
325
324
 
326
325
 
327
326
  def s3_sync(
328
- src_url: PathLike, dst_url: PathLike,
329
- followlinks: bool = False) -> None:
327
+ src_url: PathLike,
328
+ dst_url: PathLike,
329
+ followlinks: bool = False,
330
+ force: bool = False) -> None:
330
331
  '''
331
332
  Copy file/directory on src_url to dst_url
332
333
 
333
334
  :param src_url: Given path
334
335
  :param dst_url: Given destination path
336
+ :param followlinks: False if regard symlink as file, else True
337
+ :param force: Sync file forcely, do not ignore same files
335
338
  '''
336
- return S3Path(src_url).sync(dst_url, followlinks)
339
+ return S3Path(src_url).sync(dst_url, followlinks, force)
337
340
 
338
341
 
339
342
  def s3_symlink(src_path: PathLike, dst_path: PathLike) -> None:
megfile/s3_path.py CHANGED
@@ -12,7 +12,7 @@ import boto3
12
12
  import botocore
13
13
  from botocore.awsrequest import AWSResponse
14
14
 
15
- from megfile.errors import S3BucketNotFoundError, S3ConfigError, S3FileExistsError, S3FileNotFoundError, S3IsADirectoryError, S3NameTooLongError, S3NotADirectoryError, S3NotALinkError, S3PermissionError, S3UnknownError, UnsupportedError, _create_missing_ok_generator
15
+ from megfile.errors import S3BucketNotFoundError, S3ConfigError, S3FileExistsError, S3FileNotFoundError, S3IsADirectoryError, S3NameTooLongError, S3NotADirectoryError, S3NotALinkError, S3PermissionError, S3UnknownError, SameFileError, UnsupportedError, _create_missing_ok_generator
16
16
  from megfile.errors import _logger as error_logger
17
17
  from megfile.errors import patch_method, raise_s3_error, s3_error_code_should_retry, s3_should_retry, translate_fs_error, translate_s3_error
18
18
  from megfile.interfaces import Access, ContextIterator, FileCacher, FileEntry, PathLike, StatResult, URIPath
@@ -189,6 +189,20 @@ def get_s3_client(
189
189
 
190
190
  :returns: S3 client
191
191
  '''
192
+ addressing_style_env_key = 'AWS_S3_ADDRESSING_STYLE'
193
+ if profile_name:
194
+ addressing_style_env_key = f'{profile_name}__AWS_S3_ADDRESSING_STYLE'.upper(
195
+ )
196
+ addressing_style = os.environ.get(addressing_style_env_key)
197
+ if addressing_style:
198
+ if config:
199
+ config = config.merge(
200
+ botocore.config.Config(
201
+ s3={'addressing_style': addressing_style}))
202
+ else:
203
+ config = botocore.config.Config(
204
+ s3={'addressing_style': addressing_style})
205
+
192
206
  if cache_key is not None:
193
207
  return thread_local(
194
208
  cache_key, get_s3_client, config=config, profile_name=profile_name)
@@ -518,10 +532,6 @@ def _s3_binary_mode(s3_open_func):
518
532
  raise S3FileExistsError('File exists: %r' % s3_url)
519
533
  mode = mode.replace('x', 'w')
520
534
 
521
- if 'w' in mode or 'a' in mode:
522
- if not S3Path(s3_url).hasbucket():
523
- raise S3BucketNotFoundError('No such bucket: %r' % s3_url)
524
-
525
535
  fileobj = s3_open_func(s3_url, get_binary_mode(mode), **kwargs)
526
536
  if 'b' not in mode:
527
537
  fileobj = io.TextIOWrapper(
@@ -1260,7 +1270,6 @@ class S3Path(URIPath):
1260
1270
  followlinks: bool = False) -> bool:
1261
1271
  '''
1262
1272
  Test if path has access permission described by mode
1263
- Using head_bucket(), now READ/WRITE are same.
1264
1273
 
1265
1274
  :param mode: access mode
1266
1275
  :returns: bool, if the bucket of s3_url has read/write access.
@@ -1271,7 +1280,7 @@ class S3Path(URIPath):
1271
1280
  s3_url = self.readlink().path_with_protocol
1272
1281
  except S3NotALinkError:
1273
1282
  pass
1274
- bucket, _ = parse_s3_url(s3_url) # only check bucket accessibility
1283
+ bucket, key = parse_s3_url(s3_url) # only check bucket accessibility
1275
1284
  if not bucket:
1276
1285
  raise Exception("No available bucket")
1277
1286
  if not isinstance(mode, Access):
@@ -1280,15 +1289,38 @@ class S3Path(URIPath):
1280
1289
  .format(mode, ', '.join([str(a) for a in Access])))
1281
1290
  if mode not in (Access.READ, Access.WRITE):
1282
1291
  raise TypeError('Unsupported mode: {}'.format(mode))
1292
+
1283
1293
  try:
1284
- self._client.head_bucket(Bucket=bucket)
1294
+ if not self.exists():
1295
+ return False
1285
1296
  except Exception as error:
1286
1297
  error = translate_s3_error(error, s3_url)
1287
- if isinstance(error, (S3PermissionError, S3FileNotFoundError,
1288
- S3BucketNotFoundError)):
1298
+ if isinstance(error, S3PermissionError):
1299
+ return False
1300
+ raise error
1301
+
1302
+ if mode == Access.READ:
1303
+ return True
1304
+ try:
1305
+ if not key:
1306
+ key = 'test'
1307
+ elif key.endswith('/'):
1308
+ key = key[:-1]
1309
+ upload_id = self._client.create_multipart_upload(
1310
+ Bucket=bucket,
1311
+ Key=key,
1312
+ )['UploadId']
1313
+ self._client.abort_multipart_upload(
1314
+ Bucket=bucket,
1315
+ Key=key,
1316
+ UploadId=upload_id,
1317
+ )
1318
+ return True
1319
+ except Exception as error:
1320
+ error = translate_s3_error(error, s3_url)
1321
+ if isinstance(error, S3PermissionError):
1289
1322
  return False
1290
1323
  raise error
1291
- return True
1292
1324
 
1293
1325
  def exists(self, followlinks: bool = False) -> bool:
1294
1326
  '''
@@ -1302,7 +1334,7 @@ class S3Path(URIPath):
1302
1334
  if not bucket: # s3:// => True, s3:///key => False
1303
1335
  return not key
1304
1336
 
1305
- return self.is_dir() or self.is_file(followlinks)
1337
+ return self.is_file(followlinks) or self.is_dir()
1306
1338
 
1307
1339
  def getmtime(self, follow_symlinks: bool = False) -> float:
1308
1340
  '''
@@ -1531,10 +1563,21 @@ class S3Path(URIPath):
1531
1563
  self._client.head_bucket(Bucket=bucket)
1532
1564
  except Exception as error:
1533
1565
  error = translate_s3_error(error, self.path_with_protocol)
1534
- if isinstance(error,
1535
- (S3UnknownError, S3ConfigError, S3PermissionError)):
1566
+ if isinstance(error, S3PermissionError):
1567
+ # Aliyun OSS doesn't give bucket api permission when you only have read and write permission
1568
+ try:
1569
+ self._client.list_objects_v2(Bucket=bucket, MaxKeys=1)
1570
+ return True
1571
+ except Exception as error2:
1572
+ error2 = translate_s3_error(error2, self.path_with_protocol)
1573
+ if isinstance(
1574
+ error2,
1575
+ (S3UnknownError, S3ConfigError, S3PermissionError)):
1576
+ raise error2
1577
+ return False
1578
+ elif isinstance(error, (S3UnknownError, S3ConfigError)):
1536
1579
  raise error
1537
- if isinstance(error, S3FileNotFoundError):
1580
+ elif isinstance(error, S3FileNotFoundError):
1538
1581
  return False
1539
1582
 
1540
1583
  return True
@@ -1978,6 +2021,10 @@ class S3Path(URIPath):
1978
2021
  src_url = self.path_with_protocol
1979
2022
  src_bucket, src_key = parse_s3_url(src_url)
1980
2023
  dst_bucket, dst_key = parse_s3_url(dst_url)
2024
+ if dst_bucket == src_bucket and src_key.rstrip('/') == dst_key.rstrip(
2025
+ '/'):
2026
+ raise SameFileError(
2027
+ f"'{src_url}' and '{dst_url}' are the same file")
1981
2028
 
1982
2029
  if not src_bucket:
1983
2030
  raise S3BucketNotFoundError('Empty bucket name: %r' % src_url)
@@ -1996,7 +2043,7 @@ class S3Path(URIPath):
1996
2043
  except S3NotALinkError:
1997
2044
  pass
1998
2045
 
1999
- try:
2046
+ with raise_s3_error(f"'{src_url}' or '{dst_url}'"):
2000
2047
  self._client.copy(
2001
2048
  {
2002
2049
  'Bucket': src_bucket,
@@ -2005,28 +2052,22 @@ class S3Path(URIPath):
2005
2052
  Bucket=dst_bucket,
2006
2053
  Key=dst_key,
2007
2054
  Callback=callback)
2008
- except Exception as error:
2009
- error = translate_s3_error(error, dst_url)
2010
- # Error can't help tell which is problematic
2011
- if isinstance(error, S3BucketNotFoundError):
2012
- if not self.hasbucket():
2013
- raise S3BucketNotFoundError('No such bucket: %r' % src_url)
2014
- elif isinstance(error, S3FileNotFoundError):
2015
- if not self.is_file():
2016
- raise S3FileNotFoundError('No such file: %r' % src_url)
2017
- raise error
2018
2055
 
2019
- def sync(self, dst_url: PathLike, followlinks: bool = False) -> None:
2056
+ def sync(
2057
+ self, dst_url: PathLike, followlinks: bool = False,
2058
+ force: bool = False) -> None:
2020
2059
  '''
2021
2060
  Copy file/directory on src_url to dst_url
2022
2061
 
2023
2062
  :param dst_url: Given destination path
2063
+ :param followlinks: False if regard symlink as file, else True
2064
+ :param force: Sync file forcely, do not ignore same files
2024
2065
  '''
2025
2066
  for src_file_path, dst_file_path in _s3_scan_pairs(
2026
2067
  self.path_with_protocol, dst_url):
2027
2068
  src_file_path = self.from_path(src_file_path)
2028
2069
  dst_file_path = self.from_path(dst_file_path)
2029
- if dst_file_path.exists() and is_same_file(
2070
+ if not force and dst_file_path.exists() and is_same_file(
2030
2071
  src_file_path.stat(), dst_file_path.stat(), 'copy'):
2031
2072
  continue
2032
2073
  src_file_path.copy(dst_file_path, followlinks=followlinks)
megfile/sftp.py CHANGED
@@ -395,10 +395,15 @@ def sftp_copy(
395
395
 
396
396
 
397
397
  def sftp_sync(
398
- src_path: PathLike, dst_path: PathLike, followlinks: bool = False):
398
+ src_path: PathLike,
399
+ dst_path: PathLike,
400
+ followlinks: bool = False,
401
+ force: bool = False):
399
402
  '''Copy file/directory on src_url to dst_url
400
403
 
401
404
  :param src_path: Given path
402
405
  :param dst_url: Given destination path
406
+ :param followlinks: False if regard symlink as file, else True
407
+ :param force: Sync file forcely, do not ignore same files
403
408
  '''
404
- return SftpPath(src_path).sync(dst_path, followlinks)
409
+ return SftpPath(src_path).sync(dst_path, followlinks, force)
megfile/sftp_path.py CHANGED
@@ -3,6 +3,7 @@ import hashlib
3
3
  import io
4
4
  import os
5
5
  import shlex
6
+ import socket
6
7
  import subprocess
7
8
  from logging import getLogger as get_logger
8
9
  from stat import S_ISDIR, S_ISLNK, S_ISREG
@@ -11,7 +12,7 @@ from urllib.parse import urlsplit, urlunsplit
11
12
 
12
13
  import paramiko
13
14
 
14
- from megfile.errors import _create_missing_ok_generator, patch_method
15
+ from megfile.errors import SameFileError, _create_missing_ok_generator, patch_method
15
16
  from megfile.interfaces import ContextIterator, FileEntry, PathLike, StatResult
16
17
  from megfile.lib.compare import is_same_file
17
18
  from megfile.lib.glob import FSFunc, iglob
@@ -45,6 +46,7 @@ SFTP_PRIVATE_KEY_TYPE = "SFTP_PRIVATE_KEY_TYPE"
45
46
  SFTP_PRIVATE_KEY_PASSWORD = "SFTP_PRIVATE_KEY_PASSWORD"
46
47
  MAX_RETRIES = 10
47
48
  DEFAULT_SSH_CONNECT_TIMEOUT = 5
49
+ DEFAULT_SSH_KEEPALIVE_INTERVAL = 15
48
50
 
49
51
 
50
52
  def _make_stat(stat: paramiko.SFTPAttributes) -> StatResult:
@@ -94,8 +96,11 @@ def provide_connect_info(
94
96
  def sftp_should_retry(error: Exception) -> bool:
95
97
  if type(error) is EOFError:
96
98
  return False
97
- elif isinstance(error,
98
- (paramiko.ssh_exception.SSHException, ConnectionError)):
99
+ elif isinstance(error, (
100
+ paramiko.ssh_exception.SSHException,
101
+ ConnectionError,
102
+ socket.timeout,
103
+ )):
99
104
  return True
100
105
  elif isinstance(error, OSError) and str(error) == 'Socket is closed':
101
106
  return True
@@ -148,8 +153,14 @@ def _get_sftp_client(
148
153
 
149
154
  :returns: sftp client
150
155
  '''
151
- ssh_client = get_ssh_client(hostname, port, username, password)
152
- sftp_client = ssh_client.open_sftp()
156
+ session = get_ssh_session(
157
+ hostname=hostname,
158
+ port=port,
159
+ username=username,
160
+ password=password,
161
+ )
162
+ session.invoke_subsystem("sftp")
163
+ sftp_client = paramiko.SFTPClient(session)
153
164
  _patch_sftp_client_request(sftp_client, hostname, port, username, password)
154
165
  return sftp_client
155
166
 
@@ -189,6 +200,9 @@ def _get_ssh_client(
189
200
  username=username,
190
201
  password=password,
191
202
  pkey=private_key,
203
+ timeout=DEFAULT_SSH_CONNECT_TIMEOUT,
204
+ auth_timeout=DEFAULT_SSH_CONNECT_TIMEOUT,
205
+ banner_timeout=DEFAULT_SSH_CONNECT_TIMEOUT,
192
206
  )
193
207
  atexit.register(ssh_client.close)
194
208
  return ssh_client
@@ -211,10 +225,9 @@ def get_ssh_session(
211
225
  username: Optional[str] = None,
212
226
  password: Optional[str] = None,
213
227
  ) -> paramiko.Channel:
214
- ssh_client = get_ssh_client(hostname, port, username, password)
215
- try:
216
- return _open_session(ssh_client)
217
- except paramiko.SSHException:
228
+
229
+ def retry_callback(error, *args, **kwargs):
230
+ ssh_client = get_ssh_client(hostname, port, username, password)
218
231
  ssh_client.close()
219
232
  atexit.unregister(ssh_client.close)
220
233
  ssh_key = f'ssh_client:{hostname},{port},{username},{password}'
@@ -223,20 +236,34 @@ def get_ssh_session(
223
236
  sftp_key = f'sftp_client:{hostname},{port},{username},{password}'
224
237
  if thread_local.get(sftp_key):
225
238
  del thread_local[sftp_key]
226
- return _open_session(
227
- get_ssh_client(
228
- hostname=hostname,
229
- port=port,
230
- username=username,
231
- password=password,
232
- ))
239
+
240
+ return patch_method(
241
+ _open_session, # pytype: disable=attribute-error
242
+ max_retries=MAX_RETRIES,
243
+ should_retry=sftp_should_retry,
244
+ retry_callback=retry_callback)(
245
+ hostname,
246
+ port,
247
+ username,
248
+ password,
249
+ )
233
250
 
234
251
 
235
- def _open_session(ssh_client: paramiko.SSHClient) -> paramiko.Channel:
252
+ def _open_session(
253
+ hostname: str,
254
+ port: Optional[int] = None,
255
+ username: Optional[str] = None,
256
+ password: Optional[str] = None,
257
+ ) -> paramiko.Channel:
258
+ ssh_client = get_ssh_client(hostname, port, username, password)
236
259
  transport = ssh_client.get_transport()
237
260
  if not transport:
238
- raise paramiko.SSHException()
261
+ raise paramiko.SSHException('Get transport error')
262
+ transport.set_keepalive(DEFAULT_SSH_KEEPALIVE_INTERVAL)
239
263
  session = transport.open_session(timeout=DEFAULT_SSH_CONNECT_TIMEOUT)
264
+ if not session:
265
+ raise paramiko.SSHException('Create session error')
266
+ session.settimeout(DEFAULT_SSH_CONNECT_TIMEOUT)
240
267
  return session
241
268
 
242
269
 
@@ -1167,6 +1194,9 @@ class SftpPath(URIPath):
1167
1194
  self.from_path(os.path.dirname(dst_path)).makedirs(exist_ok=True)
1168
1195
  dst_path = self.from_path(dst_path)
1169
1196
  if self._is_same_backend(dst_path):
1197
+ if self._real_path == dst_path._real_path:
1198
+ raise SameFileError(
1199
+ f"'{self.path}' and '{dst_path.path}' are the same file")
1170
1200
  exec_result = self._exec_command(
1171
1201
  ["cp", self._real_path, dst_path._real_path])
1172
1202
  if exec_result.returncode != 0:
@@ -1189,10 +1219,16 @@ class SftpPath(URIPath):
1189
1219
  dst_path.utime(src_stat.st_atime, src_stat.st_mtime)
1190
1220
  dst_path._client.chmod(dst_path._real_path, src_stat.st_mode)
1191
1221
 
1192
- def sync(self, dst_path: PathLike, followlinks: bool = False):
1222
+ def sync(
1223
+ self,
1224
+ dst_path: PathLike,
1225
+ followlinks: bool = False,
1226
+ force: bool = False):
1193
1227
  '''Copy file/directory on src_url to dst_url
1194
1228
 
1195
1229
  :param dst_url: Given destination path
1230
+ :param followlinks: False if regard symlink as file, else True
1231
+ :param force: Sync file forcely, do not ignore same files
1196
1232
  '''
1197
1233
  if not self._is_same_protocol(dst_path):
1198
1234
  raise OSError('Not a %s path: %r' % (self.protocol, dst_path))
@@ -1201,8 +1237,8 @@ class SftpPath(URIPath):
1201
1237
  self.path_with_protocol, dst_path):
1202
1238
  dst_path = self.from_path(dst_file_path)
1203
1239
  src_path = self.from_path(src_file_path)
1204
- if dst_path.exists() and is_same_file(src_path.stat(),
1205
- dst_path.stat(), 'copy'):
1240
+ if not force and dst_path.exists() and is_same_file(
1241
+ src_path.stat(), dst_path.stat(), 'copy'):
1206
1242
  continue
1207
1243
  self.from_path(os.path.dirname(dst_file_path)).mkdir(
1208
1244
  parents=True, exist_ok=True)
megfile/smart.py CHANGED
@@ -7,6 +7,7 @@ from typing import IO, Any, AnyStr, BinaryIO, Callable, Iterable, Iterator, List
7
7
 
8
8
  from tqdm import tqdm
9
9
 
10
+ from megfile.errors import SameFileError
10
11
  from megfile.fs import fs_copy, is_fs
11
12
  from megfile.interfaces import Access, ContextIterator, FileCacher, FileEntry, NullCacher, PathLike, StatResult
12
13
  from megfile.lib.combine_reader import CombineReader
@@ -342,6 +343,7 @@ def _smart_sync_single_file(items: dict):
342
343
  callback = items['callback']
343
344
  followlinks = items['followlinks']
344
345
  callback_after_copy_file = items['callback_after_copy_file']
346
+ force = items['force']
345
347
 
346
348
  content_path = os.path.relpath(src_file_path, start=src_root_path)
347
349
  if len(content_path) and content_path != '.':
@@ -355,7 +357,7 @@ def _smart_sync_single_file(items: dict):
355
357
  dst_protocol, _ = SmartPath._extract_protocol(dst_abs_file_path)
356
358
  should_sync = True
357
359
  try:
358
- if smart_exists(dst_abs_file_path) and is_same_file(
360
+ if not force and smart_exists(dst_abs_file_path) and is_same_file(
359
361
  smart_stat(src_file_path), smart_stat(dst_abs_file_path),
360
362
  get_sync_type(src_protocol, dst_protocol)):
361
363
  should_sync = False
@@ -380,7 +382,8 @@ def smart_sync(
380
382
  followlinks: bool = False,
381
383
  callback_after_copy_file: Optional[Callable[[str, str], None]] = None,
382
384
  src_file_stats: Optional[Iterable[FileEntry]] = None,
383
- map_func: Callable[[Callable, Iterable], Any] = map) -> None:
385
+ map_func: Callable[[Callable, Iterable], Any] = map,
386
+ force: bool = False) -> None:
384
387
  '''
385
388
  Sync file or directory on s3 and fs
386
389
 
@@ -443,6 +446,7 @@ def smart_sync(
443
446
  callback=callback,
444
447
  followlinks=followlinks,
445
448
  callback_after_copy_file=callback_after_copy_file,
449
+ force=force,
446
450
  )
447
451
 
448
452
  for _ in map_func(_smart_sync_single_file, create_generator()):
@@ -454,7 +458,8 @@ def smart_sync_with_progress(
454
458
  dst_path,
455
459
  callback: Optional[Callable[[str, int], None]] = None,
456
460
  followlinks: bool = False,
457
- map_func: Callable[[Callable, Iterable], Iterator] = map):
461
+ map_func: Callable[[Callable, Iterable], Iterator] = map,
462
+ force: bool = False):
458
463
  src_path, dst_path = get_traditional_path(src_path), get_traditional_path(
459
464
  dst_path)
460
465
  file_stats = list(
@@ -478,6 +483,7 @@ def smart_sync_with_progress(
478
483
  callback_after_copy_file=callback_after_copy_file,
479
484
  src_file_stats=file_stats,
480
485
  map_func=map_func,
486
+ force=force,
481
487
  )
482
488
  tbar.close()
483
489
  sbar.close()
megfile/stdio.py CHANGED
@@ -11,7 +11,7 @@ __all__ = [
11
11
 
12
12
  def stdio_open(
13
13
  path: PathLike,
14
- mode: str,
14
+ mode: str = 'rb',
15
15
  encoding: Optional[str] = None,
16
16
  errors: Optional[str] = None,
17
17
  **kwargs) -> IO[AnyStr]: # pytype: disable=signature-mismatch
megfile/stdio_path.py CHANGED
@@ -74,7 +74,7 @@ class StdioPath(BaseURIPath):
74
74
 
75
75
  def open(
76
76
  self,
77
- mode: str,
77
+ mode: str = 'rb',
78
78
  encoding: Optional[str] = None,
79
79
  errors: Optional[str] = None,
80
80
  **kwargs) -> IO[AnyStr]: # pytype: disable=signature-mismatch
megfile/version.py CHANGED
@@ -1 +1 @@
1
- VERSION = "2.2.1.post1"
1
+ VERSION = "2.2.3"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: megfile
3
- Version: 2.2.1.post1
3
+ Version: 2.2.3
4
4
  Summary: Megvii file operation library
5
5
  Home-page: https://github.com/megvii-research/megfile
6
6
  Author: megvii
@@ -69,7 +69,7 @@ Here's an example of writing a file to s3 / sftp / fs, syncing to local, reading
69
69
  - `/data/test.txt`
70
70
  - `test.txt`
71
71
  - 1
72
- - oss
72
+ - s3
73
73
  - `s3[+profile_name]://bucket/key`
74
74
  - sftp
75
75
  - `sftp://[username[:password]@]hostname[:port]//absolute_file_path`
@@ -170,7 +170,7 @@ pip3 install -r requirements.txt -r requirements-dev.txt
170
170
 
171
171
  ## Configuration
172
172
 
173
- Before using `megfile` to access files on s3, you need to set up authentication credentials for your s3 account using the [AWS CLI](https://docs.aws.amazon.com/cli/latest/reference/configure/index.html) or editing the file `~/.aws/config` directly, see also: [boto3 configuration](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html) & [boto3 credentials](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html). You can set your endpoint url with environments `OSS_ENDPOINT`.
173
+ Before using `megfile` to access files on s3, you need to set up authentication credentials for your s3 account using the [AWS CLI](https://docs.aws.amazon.com/cli/latest/reference/configure/index.html) or editing the file `~/.aws/config` directly, see also: [boto3 configuration](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html) & [boto3 credentials](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html). Megfile also support environments for s3, like `OSS_ENDPOINT`, `AWS_ACCESS_KEY_ID` , `AWS_SECRET_ACCESS_KEY` and `AWS_S3_ADDRESSING_STYLE`.
174
174
 
175
175
  ```
176
176
  $ aws configure
@@ -193,7 +193,7 @@ s3 =
193
193
  endpoint_url = http://oss-cn-hangzhou.aliyuncs.com
194
194
  ```
195
195
 
196
- You also can operate s3 files with different endpoint urls, access keys and secret keys. You can set config for different profiles by environment(`PROFILE_NAME__AWS_ACCESS_KEY_ID`, `PROFILE_NAME__AWS_SECRET_ACCESS_KEY`, `PROFILE_NAME__OSS_ENDPOINT`) or `~/.aws/config`. Then you can operate files with path `s3+profile_name://bucket/key`.
196
+ You also can operate s3 files with different endpoint urls, access keys and secret keys. You can set config for different profiles by environment(`PROFILE_NAME__AWS_ACCESS_KEY_ID`, `PROFILE_NAME__AWS_SECRET_ACCESS_KEY`, `PROFILE_NAME__OSS_ENDPOINT`, `PROFILE_NAME__AWS_S3_ADDRESSING_STYLE`) or `~/.aws/config`. Then you can operate files with path `s3+profile_name://bucket/key`.
197
197
  For example:
198
198
  ```
199
199
  # set config with environment
@@ -217,6 +217,7 @@ s3 =
217
217
  aws_secret_access_key = profile2-accesskey
218
218
  aws_access_key_id = profile2-secretkey
219
219
  s3 =
220
+ addressing_style = virtual
220
221
  endpoint_url = https://profile2.s3.custom.com
221
222
 
222
223
 
@@ -1,21 +1,21 @@
1
1
  megfile/__init__.py,sha256=Qsi3XNP_0XYoSol-1AGutZqo0rfBnzaiZ-HVXll4fB0,5721
2
- megfile/cli.py,sha256=MMhdYDiArEF5WkOX6B59NH5URPt5inz4WbO8Jb1kB2o,13045
3
- megfile/errors.py,sha256=y0PJ4lzkm-uqgVD1EqakmN0IHBMt1CCzDbdbq84sVb4,11491
4
- megfile/fs.py,sha256=sjkm_LsvNCw8hj9Ee-1HSNzvg7bRHTPem8KTDDBQlCw,11621
5
- megfile/fs_path.py,sha256=Lz-u8XL2RMViHl_39yapAwX5XSE_ZpQsuhAvVve6rk8,38532
2
+ megfile/cli.py,sha256=ZRS2Ityuqk51T-BDvU-q1EDAS66Vw8gk817Wg_OWwSk,13625
3
+ megfile/errors.py,sha256=BIZ8XlQLRo0oZsU38Jhv8eooCFEjzsg3RSb1nTuLc5g,11663
4
+ megfile/fs.py,sha256=LtrzQsyZgogTJeoRFz4L52gxx0jByzRBLkpWYpvkp5I,11819
5
+ megfile/fs_path.py,sha256=JkY8qGIIboK5MK2rSagYEvnu5FTzmk9OHXIhTO7BjeY,38767
6
6
  megfile/http.py,sha256=a3oAuARSSaIU8VMx86Mui0N5Vh-EI0AoHnwxRU5DSMU,2032
7
7
  megfile/http_path.py,sha256=pbIlle-scbKC88XO0uV9y3sIm0HME6E_R061VTBg8w8,8966
8
8
  megfile/interfaces.py,sha256=h3tWE8hVt5S-HopaMAX6lunPJ97vzhv6jH_2HubcDNc,6219
9
- megfile/pathlike.py,sha256=aU6j-Z3Sr6XH_REuJ37_vNv6XucEW6qpASWQyzGbL9g,29258
10
- megfile/s3.py,sha256=6d9bBVkVdXIvmJLfpOgJkK4nVAsLR3lirrB6x1-P2y8,12437
11
- megfile/s3_path.py,sha256=c6lzX-dinvJw1jRCpqctRy5JdjVzGhREnyY3_8MAafI,85697
12
- megfile/sftp.py,sha256=h2cy_XnQQOlCtHskW7rSQKG1S7Uoq7uq7X_Pyt2V16M,12590
13
- megfile/sftp_path.py,sha256=ELxJVsbZoOhkZJVbk9gxcM_BReiDsYC5NTdhSpEe2rk,47982
14
- megfile/smart.py,sha256=SFrHDMbglOTo-g03OXh8sz5zdhtPq1VS7_cFxVZYPk8,33010
9
+ megfile/pathlike.py,sha256=WpP8zWSOAcAfYrD65hZS08UEi4_iCoEMs2xvfFMwZvY,29264
10
+ megfile/s3.py,sha256=7XZSWjcSY-hoLhLH9dtfyRpokfYH9raTO_Mf69RjpEs,12560
11
+ megfile/s3_path.py,sha256=fGMrV0_cRWmjqRcbziuhC7vCLXnDvbEEHbGDmW4ZHGQ,87172
12
+ megfile/sftp.py,sha256=CZYv1WKL0d_vuJ5aPgMhm0W8uskzaO5zbYGhGwt_mQs,12771
13
+ megfile/sftp_path.py,sha256=rwEfw93fZkNEf7TjZ1-8bGiaVU0oxwb3H7ub6x9iUsA,49153
14
+ megfile/smart.py,sha256=PAg8i56kFn94a6LGlH1v2qamCojYM9qKb-Ryw6rX3ks,33204
15
15
  megfile/smart_path.py,sha256=Rwb1McXsshi9-F6miTRqE6j8FO2j1edjmSxZF32YZ6E,6708
16
- megfile/stdio.py,sha256=28AAQ7YLBZ51nrQDtK2th5lWhDJO3sZFKdQYNXQ_eNM,700
17
- megfile/stdio_path.py,sha256=QqZ5XJw4VFdP2s14MP7ooe9TgYIZtdNtu5vny5cUW5Y,2866
18
- megfile/version.py,sha256=J75j1o3zUHTJ6fEnNz_Qg0RLb_Uul_oUSapamVffVZU,25
16
+ megfile/stdio.py,sha256=yRhlfUA2DHi3bq-9cXsSlbLCnHvS_zvglO2IYYyPsGc,707
17
+ megfile/stdio_path.py,sha256=eQulTXUwHvUKA-5PKCGfVNiEPkJhG9YtVhtU58OcmoM,2873
18
+ megfile/version.py,sha256=NUfAFE_kB_uAzWs7eyqhwlH9YxsrTSBEHoHyhugYN4Y,19
19
19
  megfile/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
20
  megfile/lib/base_prefetch_reader.py,sha256=SjrBffHVgvJnYtr8HNqiOozP9OJRYS37Eu1KQcZu1Z8,13221
21
21
  megfile/lib/combine_reader.py,sha256=XFSqEY5A5X5Uf7eQ6AXAzrvNteESSXvKNVPktGjo3KY,4546
@@ -38,10 +38,10 @@ megfile/lib/stdio_handler.py,sha256=QDWtcZxz-hzi-rqQUiSlR3NrihX1fjK_Rj9T2mdTFEg,
38
38
  megfile/lib/url.py,sha256=VbQLjo0s4AaV0iSk66BcjI68aUTcN9zBZ5x6-cM4Qvs,103
39
39
  megfile/utils/__init__.py,sha256=qdX8FF_dYFKwp1BIWx3JeSGd91s7AKUDSEpDv9tORcM,9162
40
40
  megfile/utils/mutex.py,sha256=-2KH3bNovKRd9zvsXq9n3bWM7rQdoG9hO7tUPxVG_Po,2538
41
- megfile-2.2.1.post1.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
42
- megfile-2.2.1.post1.dist-info/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
43
- megfile-2.2.1.post1.dist-info/METADATA,sha256=EVFMWdWi2j42YXbfnD4kELPbMh-B1gBsTxHvakEiOPA,10601
44
- megfile-2.2.1.post1.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
45
- megfile-2.2.1.post1.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
46
- megfile-2.2.1.post1.dist-info/top_level.txt,sha256=i3rMgdU1ZAJekAceojhA-bkm3749PzshtRmLTbeLUPQ,8
47
- megfile-2.2.1.post1.dist-info/RECORD,,
41
+ megfile-2.2.3.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
42
+ megfile-2.2.3.dist-info/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
43
+ megfile-2.2.3.dist-info/METADATA,sha256=VL8zMVQHM7NRnGr4X3A6uCXzRN220PvKQR47pjJC46g,10742
44
+ megfile-2.2.3.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
45
+ megfile-2.2.3.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
46
+ megfile-2.2.3.dist-info/top_level.txt,sha256=i3rMgdU1ZAJekAceojhA-bkm3749PzshtRmLTbeLUPQ,8
47
+ megfile-2.2.3.dist-info/RECORD,,