megfile 4.2.5__py3-none-any.whl → 5.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
megfile/s3_path.py CHANGED
@@ -93,7 +93,6 @@ __all__ = [
93
93
  "get_endpoint_url",
94
94
  "get_s3_session",
95
95
  "get_s3_client",
96
- "s3_path_join",
97
96
  "is_s3",
98
97
  "s3_buffered_open",
99
98
  "s3_cached_open",
@@ -104,6 +103,7 @@ __all__ = [
104
103
  "s3_open",
105
104
  "S3Cacher",
106
105
  "s3_upload",
106
+ "s3_copy",
107
107
  "s3_download",
108
108
  "s3_load_content",
109
109
  "s3_concat",
@@ -351,7 +351,7 @@ def get_s3_client_with_cache(
351
351
  )
352
352
 
353
353
 
354
- def s3_path_join(path: PathLike, *other_paths: PathLike) -> str:
354
+ def _s3_path_join(path: PathLike, *other_paths: PathLike) -> str:
355
355
  """
356
356
  Concat 2 or more path to a complete path
357
357
 
@@ -366,7 +366,7 @@ def s3_path_join(path: PathLike, *other_paths: PathLike) -> str:
366
366
  and will directly concat.
367
367
 
368
368
  e.g. os.path.join('/path', 'to', '/file') => '/file',
369
- but s3_path_join('/path', 'to', '/file') => '/path/to/file'
369
+ but _s3_path_join('/path', 'to', '/file') => '/path/to/file'
370
370
  """
371
371
  return uri_join(fspath(path), *map(fspath, other_paths))
372
372
 
@@ -603,7 +603,7 @@ def _s3_glob_stat_single_path(
603
603
  with raise_s3_error(_s3_pathname, S3BucketNotFoundError):
604
604
  for resp in _list_objects_recursive(client, bucket, prefix, delimiter):
605
605
  for content in resp.get("Contents", []):
606
- path = s3_path_join(f"{protocol}://", bucket, content["Key"])
606
+ path = _s3_path_join(f"{protocol}://", bucket, content["Key"])
607
607
  if not search_dir and pattern.match(path):
608
608
  if path.endswith("/"):
609
609
  continue
@@ -619,7 +619,7 @@ def _s3_glob_stat_single_path(
619
619
  )
620
620
  dirname = os.path.dirname(dirname)
621
621
  for common_prefix in resp.get("CommonPrefixes", []):
622
- path = s3_path_join(
622
+ path = _s3_path_join(
623
623
  f"{protocol}://", bucket, common_prefix["Prefix"]
624
624
  )
625
625
  dirname = os.path.dirname(path)
@@ -640,7 +640,7 @@ def _s3_scan_pairs(
640
640
  for src_file_path in S3Path(src_url).scan():
641
641
  content_path = src_file_path[len(fspath(src_url)) :]
642
642
  if len(content_path) > 0:
643
- dst_file_path = s3_path_join(dst_url, content_path)
643
+ dst_file_path = _s3_path_join(dst_url, content_path)
644
644
  else:
645
645
  dst_file_path = dst_url
646
646
  yield src_file_path, dst_file_path
@@ -1103,6 +1103,27 @@ def s3_memory_open(
1103
1103
  s3_open = s3_buffered_open
1104
1104
 
1105
1105
 
1106
+ def s3_copy(
1107
+ src_url: PathLike,
1108
+ dst_url: PathLike,
1109
+ callback: Optional[Callable[[int], None]] = None,
1110
+ followlinks: bool = False,
1111
+ overwrite: bool = True,
1112
+ ) -> None:
1113
+ """File copy on S3
1114
+ Copy content of file on `src_path` to `dst_path`.
1115
+ It's caller's responsibility to ensure the s3_isfile(src_url) is True
1116
+
1117
+ :param src_url: Given path
1118
+ :param dst_path: Target file path
1119
+ :param callback: Called periodically during copy, and the input parameter is
1120
+ the data size (in bytes) of copy since the last call
1121
+ :param followlinks: False if regard symlink as file, else True
1122
+ :param overwrite: whether or not overwrite file when exists, default is True
1123
+ """
1124
+ return S3Path(src_url).copy(dst_url, callback, followlinks, overwrite)
1125
+
1126
+
1106
1127
  def s3_download(
1107
1128
  src_url: PathLike,
1108
1129
  dst_url: PathLike,
@@ -1120,8 +1141,7 @@ def s3_download(
1120
1141
  :param followlinks: False if regard symlink as file, else True
1121
1142
  :param overwrite: whether or not overwrite file when exists, default is True
1122
1143
  """
1123
- from megfile.fs import is_fs
1124
- from megfile.fs_path import FSPath
1144
+ from megfile.fs_path import FSPath, is_fs
1125
1145
 
1126
1146
  dst_url = fspath(dst_url)
1127
1147
  if not is_fs(dst_url):
@@ -1201,8 +1221,7 @@ def s3_upload(
1201
1221
  :param followlinks: False if regard symlink as file, else True
1202
1222
  :param overwrite: whether or not overwrite file when exists, default is True
1203
1223
  """
1204
- from megfile.fs import is_fs
1205
- from megfile.fs_path import FSPath
1224
+ from megfile.fs_path import FSPath, is_fs
1206
1225
 
1207
1226
  if not is_fs(src_url):
1208
1227
  raise OSError(f"src_url is not fs path: {src_url}")
@@ -1399,11 +1418,9 @@ class S3Path(URIPath):
1399
1418
  @cached_property
1400
1419
  def path_with_protocol(self) -> str:
1401
1420
  """Return path with protocol, like file:///root, s3://bucket/key"""
1402
- path = self.path
1403
- protocol_prefix = self._protocol_with_profile + "://"
1404
- if path.startswith(protocol_prefix):
1405
- return path
1406
- return protocol_prefix + path.lstrip("/")
1421
+ if self.path.startswith(self.root):
1422
+ return self.path
1423
+ return self.root + self.path.lstrip("/")
1407
1424
 
1408
1425
  @cached_property
1409
1426
  def path_without_protocol(self) -> str:
@@ -1411,21 +1428,14 @@ class S3Path(URIPath):
1411
1428
  Return path without protocol, example: if path is s3://bucket/key,
1412
1429
  return bucket/key
1413
1430
  """
1414
- path = self.path
1415
- protocol_prefix = self._protocol_with_profile + "://"
1416
- if path.startswith(protocol_prefix):
1417
- path = path[len(protocol_prefix) :]
1418
- return path
1431
+ if self.path.startswith(self.root):
1432
+ return self.path[len(self.root) :]
1433
+ return self.path
1419
1434
 
1420
1435
  @cached_property
1421
- def parts(self) -> Tuple[str, ...]:
1422
- """A tuple giving access to the path’s various components"""
1423
- parts = [f"{self._protocol_with_profile}://"]
1424
- path = self.path_without_protocol
1425
- path = path.lstrip("/")
1426
- if path != "":
1427
- parts.extend(path.split("/"))
1428
- return tuple(parts)
1436
+ def root(self) -> str:
1437
+ """Return root of the path, like s3://"""
1438
+ return f"{self._protocol_with_profile}://"
1429
1439
 
1430
1440
  @cached_property
1431
1441
  def _client(self):
@@ -1994,7 +2004,7 @@ class S3Path(URIPath):
1994
2004
  for content in resp.get("Contents", []):
1995
2005
  if content["Key"].endswith("/"):
1996
2006
  continue
1997
- path = s3_path_join(f"{protocol}://", bucket, content["Key"])
2007
+ path = _s3_path_join(f"{protocol}://", bucket, content["Key"])
1998
2008
 
1999
2009
  if followlinks:
2000
2010
  try:
@@ -2241,9 +2251,7 @@ class S3Path(URIPath):
2241
2251
  dirs = sorted(dirs)
2242
2252
  stack.extend(reversed(dirs))
2243
2253
 
2244
- root = s3_path_join(
2245
- f"{self._protocol_with_profile}://", bucket, current
2246
- )[:-1]
2254
+ root = _s3_path_join(self.root, bucket, current)[:-1]
2247
2255
  dirs = [path[len(current) :] for path in dirs]
2248
2256
  files = sorted(path[len(current) :] for path in files)
2249
2257
  if files or dirs or not current:
megfile/sftp2_path.py CHANGED
@@ -33,12 +33,12 @@ __all__ = [
33
33
  "is_sftp2",
34
34
  ]
35
35
 
36
- SFTP2_USERNAME = "SFTP2_USERNAME"
37
- SFTP2_PASSWORD = "SFTP2_PASSWORD"
38
- SFTP2_PRIVATE_KEY_PATH = "SFTP2_PRIVATE_KEY_PATH"
39
- SFTP2_PRIVATE_KEY_TYPE = "SFTP2_PRIVATE_KEY_TYPE"
40
- SFTP2_PRIVATE_KEY_PASSWORD = "SFTP2_PRIVATE_KEY_PASSWORD"
41
- SFTP2_MAX_UNAUTH_CONN = "SFTP2_MAX_UNAUTH_CONN"
36
+ SFTP_USERNAME = "SFTP_USERNAME"
37
+ SFTP_PASSWORD = "SFTP_PASSWORD"
38
+ SFTP_PRIVATE_KEY_PATH = "SFTP_PRIVATE_KEY_PATH"
39
+ SFTP_PRIVATE_KEY_TYPE = "SFTP_PRIVATE_KEY_TYPE"
40
+ SFTP_PRIVATE_KEY_PASSWORD = "SFTP_PRIVATE_KEY_PASSWORD"
41
+ SFTP_MAX_UNAUTH_CONN = "SFTP_MAX_UNAUTH_CONN"
42
42
  MAX_RETRIES = SFTP_MAX_RETRY_TIMES
43
43
  DEFAULT_SSH_CONNECT_TIMEOUT = 5
44
44
  DEFAULT_SSH_KEEPALIVE_INTERVAL = 15
@@ -66,11 +66,11 @@ def _make_stat(stat) -> StatResult:
66
66
 
67
67
  def get_private_key():
68
68
  """Get private key for SSH authentication"""
69
- private_key_path = os.getenv(SFTP2_PRIVATE_KEY_PATH)
69
+ private_key_path = os.getenv(SFTP_PRIVATE_KEY_PATH)
70
70
  if private_key_path:
71
71
  if not os.path.exists(private_key_path):
72
72
  raise FileNotFoundError(f"Private key file not exist: '{private_key_path}'")
73
- private_key_password = os.getenv(SFTP2_PRIVATE_KEY_PASSWORD)
73
+ private_key_password = os.getenv(SFTP_PRIVATE_KEY_PASSWORD)
74
74
  if private_key_password:
75
75
  return private_key_path, private_key_password
76
76
  return private_key_path, ""
@@ -87,12 +87,12 @@ def provide_connect_info(
87
87
  if not port:
88
88
  port = 22
89
89
  if not username:
90
- username = os.getenv(SFTP2_USERNAME)
90
+ username = os.getenv(SFTP_USERNAME)
91
91
  if not username:
92
92
  # 如果没有指定用户名,使用当前系统用户名
93
93
  username = getpass.getuser()
94
94
  if not password:
95
- password = os.getenv(SFTP2_PASSWORD)
95
+ password = os.getenv(SFTP_PASSWORD)
96
96
  private_key = get_private_key()
97
97
  return hostname, port, username, password, private_key
98
98
 
@@ -412,10 +412,7 @@ class Sftp2Path(URIPath):
412
412
  """sftp2 protocol
413
413
 
414
414
  uri format:
415
- - absolute path
416
- - sftp2://[username[:password]@]hostname[:port]//file_path
417
- - relative path
418
- - sftp2://[username[:password]@]hostname[:port]/file_path
415
+ - sftp2://[username[:password]@]hostname[:port]/file_path
419
416
  """
420
417
 
421
418
  protocol = "sftp2"
@@ -424,27 +421,12 @@ class Sftp2Path(URIPath):
424
421
  super().__init__(path, *other_paths)
425
422
  parts = urlsplit(self.path)
426
423
  self._urlsplit_parts = parts
427
- self._real_path = parts.path
428
- if parts.path.startswith("//"):
429
- self._root_dir = "/"
430
- else:
431
- self._root_dir = "/" # Default to absolute path for ssh2
432
- self._real_path = (
433
- parts.path.lstrip("/")
434
- if not parts.path.startswith("//")
435
- else parts.path[2:]
436
- )
437
- if not self._real_path.startswith("/"):
438
- self._real_path = f"/{self._real_path}"
424
+ self._remote_path = parts.path or "/"
439
425
 
440
426
  @cached_property
441
427
  def parts(self) -> Tuple[str, ...]:
442
428
  """A tuple giving access to the path's various components"""
443
- if self._urlsplit_parts.path.startswith("//"):
444
- new_parts = self._urlsplit_parts._replace(path="//")
445
- else:
446
- new_parts = self._urlsplit_parts._replace(path="/")
447
- parts = [urlunsplit(new_parts)]
429
+ parts = [urlunsplit(self._urlsplit_parts._replace(path=""))]
448
430
  path = self._urlsplit_parts.path.lstrip("/")
449
431
  if path != "":
450
432
  parts.extend(path.split("/"))
@@ -512,12 +494,6 @@ class Sftp2Path(URIPath):
512
494
  )
513
495
 
514
496
  def _generate_path_object(self, sftp_local_path: str, resolve: bool = False):
515
- if resolve or self._root_dir == "/":
516
- sftp_local_path = f"//{sftp_local_path.lstrip('/')}"
517
- else:
518
- sftp_local_path = os.path.relpath(sftp_local_path, start=self._root_dir)
519
- if sftp_local_path == ".":
520
- sftp_local_path = "/"
521
497
  new_parts = self._urlsplit_parts._replace(path=sftp_local_path)
522
498
  return self.from_path(urlunsplit(new_parts))
523
499
 
@@ -581,12 +557,12 @@ class Sftp2Path(URIPath):
581
557
  return self.from_path(path).is_dir(followlinks=followlinks)
582
558
 
583
559
  fs = FSFunc(_exist, _is_dir, _scandir)
584
- for real_path in _create_missing_ok_generator(
560
+ for remote_path in _create_missing_ok_generator(
585
561
  iglob(fspath(glob_path), recursive=recursive, fs=fs),
586
562
  missing_ok,
587
563
  FileNotFoundError(f"No match any file: {glob_path!r}"),
588
564
  ):
589
- yield self.from_path(real_path)
565
+ yield self.from_path(remote_path)
590
566
 
591
567
  def is_dir(self, followlinks: bool = False) -> bool:
592
568
  """Test if a path is directory"""
@@ -640,7 +616,7 @@ class Sftp2Path(URIPath):
640
616
  for parent_path_object in parent_path_objects[::-1]:
641
617
  parent_path_object.mkdir(mode=mode, parents=False, exist_ok=True)
642
618
  try:
643
- self._client.mkdir(self._real_path, mode)
619
+ self._client.mkdir(self._remote_path, mode)
644
620
  except OSError:
645
621
  if not self.exists():
646
622
  raise
@@ -671,7 +647,7 @@ class Sftp2Path(URIPath):
671
647
  if self._is_same_backend(dst_path):
672
648
  if overwrite:
673
649
  dst_path.remove(missing_ok=True)
674
- self._client.rename(self._real_path, dst_path._real_path)
650
+ self._client.rename(self._remote_path, dst_path._remote_path)
675
651
  else:
676
652
  self.sync(dst_path, overwrite=overwrite)
677
653
  self.remove(missing_ok=True)
@@ -681,7 +657,7 @@ class Sftp2Path(URIPath):
681
657
  self.from_path(file_entry.path).rename(
682
658
  dst_path.joinpath(file_entry.name)
683
659
  )
684
- self._client.rmdir(self._real_path)
660
+ self._client.rmdir(self._remote_path)
685
661
  else:
686
662
  if overwrite or not dst_path.exists():
687
663
  with self.open("rb") as fsrc:
@@ -704,9 +680,9 @@ class Sftp2Path(URIPath):
704
680
  if self.is_dir():
705
681
  for file_entry in self.scandir():
706
682
  self.from_path(file_entry.path).remove(missing_ok=missing_ok)
707
- self._client.rmdir(self._real_path)
683
+ self._client.rmdir(self._remote_path)
708
684
  else:
709
- self._client.unlink(self._real_path)
685
+ self._client.unlink(self._remote_path)
710
686
 
711
687
  def scan(self, missing_ok: bool = True, followlinks: bool = False) -> Iterator[str]:
712
688
  """Iteratively traverse only files in given directory"""
@@ -753,7 +729,7 @@ class Sftp2Path(URIPath):
753
729
 
754
730
  def scandir(self) -> ContextIterator:
755
731
  """Get all content of given file path"""
756
- real_path = self._real_path
732
+ remote_path = self._remote_path
757
733
  stat_result = None
758
734
  try:
759
735
  stat_result = self.stat(follow_symlinks=False)
@@ -761,13 +737,13 @@ class Sftp2Path(URIPath):
761
737
  raise NotADirectoryError(f"Not a directory: '{self.path_with_protocol}'")
762
738
 
763
739
  if stat_result.is_symlink():
764
- real_path = self.readlink()._real_path
740
+ remote_path = self.readlink()._remote_path
765
741
  elif not stat_result.is_dir():
766
742
  raise NotADirectoryError(f"Not a directory: '{self.path_with_protocol}'")
767
743
 
768
744
  def create_generator():
769
745
  # Use opendir and readdir from ssh2-python
770
- dir_handle = self._client.opendir(real_path)
746
+ dir_handle = self._client.opendir(remote_path)
771
747
  try:
772
748
  # ssh2-python's readdir returns a generator
773
749
  # First call returns all entries, subsequent calls return empty
@@ -795,9 +771,9 @@ class Sftp2Path(URIPath):
795
771
  """Get StatResult of file on sftp2"""
796
772
  try:
797
773
  if follow_symlinks:
798
- stat = self._client.stat(self._real_path)
774
+ stat = self._client.stat(self._remote_path)
799
775
  else:
800
- stat = self._client.lstat(self._real_path)
776
+ stat = self._client.lstat(self._remote_path)
801
777
  return _make_stat(stat)
802
778
  except SFTPProtocolError as e: # pytype: disable=mro-error
803
779
  raise FileNotFoundError(
@@ -812,7 +788,7 @@ class Sftp2Path(URIPath):
812
788
  """Remove the file on sftp2"""
813
789
  if missing_ok and not self.exists():
814
790
  return
815
- self._client.unlink(self._real_path)
791
+ self._client.unlink(self._remote_path)
816
792
 
817
793
  def walk(
818
794
  self, followlinks: bool = False
@@ -824,7 +800,7 @@ class Sftp2Path(URIPath):
824
800
  if self.is_file(followlinks=followlinks):
825
801
  return
826
802
 
827
- stack = [self._real_path]
803
+ stack = [self._remote_path]
828
804
  while stack:
829
805
  root = stack.pop()
830
806
  dirs, files = [], []
@@ -849,8 +825,8 @@ class Sftp2Path(URIPath):
849
825
 
850
826
  def resolve(self, strict=False) -> "Sftp2Path":
851
827
  """Return the canonical path"""
852
- path = self._client.realpath(self._real_path)
853
- return self._generate_path_object(path, resolve=True)
828
+ path = self._client.realpath(self._remote_path)
829
+ return self._generate_path_object(path)
854
830
 
855
831
  def md5(self, recalculate: bool = False, followlinks: bool = False):
856
832
  """Calculate the md5 value of the file"""
@@ -873,7 +849,7 @@ class Sftp2Path(URIPath):
873
849
  dst_path = self.from_path(dst_path)
874
850
  if dst_path.exists(followlinks=False):
875
851
  raise FileExistsError(f"File exists: '{dst_path.path_with_protocol}'")
876
- return self._client.symlink(self._real_path, dst_path._real_path)
852
+ return self._client.symlink(self._remote_path, dst_path._remote_path)
877
853
 
878
854
  def readlink(self) -> "Sftp2Path":
879
855
  """Return a Sftp2Path instance representing the path to which the
@@ -885,7 +861,7 @@ class Sftp2Path(URIPath):
885
861
  if not self.is_symlink():
886
862
  raise OSError(f"Not a symlink: {self.path_with_protocol!r}")
887
863
  try:
888
- path = self._client.realpath(self._real_path)
864
+ path = self._client.realpath(self._remote_path)
889
865
  if not path:
890
866
  raise OSError(f"Not a symlink: {self.path_with_protocol!r}")
891
867
  if not path.startswith("/"):
@@ -949,7 +925,7 @@ class Sftp2Path(URIPath):
949
925
  | ssh2.sftp.LIBSSH2_FXF_APPEND
950
926
  )
951
927
 
952
- sftp_handle = self._client.open(self._real_path, ssh2_mode, 0o644)
928
+ sftp_handle = self._client.open(self._remote_path, ssh2_mode, 0o644)
953
929
 
954
930
  # Create raw file wrapper
955
931
  raw_file = Sftp2RawFile(sftp_handle, self.path, mode)
@@ -979,7 +955,7 @@ class Sftp2Path(URIPath):
979
955
  """Change the file mode and permissions"""
980
956
  stat = SFTPAttributes()
981
957
  stat.permissions = int(mode)
982
- return self._client.setstat(self._real_path, stat)
958
+ return self._client.setstat(self._remote_path, stat)
983
959
 
984
960
  def absolute(self) -> "Sftp2Path":
985
961
  """Make the path absolute"""
@@ -989,7 +965,7 @@ class Sftp2Path(URIPath):
989
965
  """Remove this directory. The directory must be empty"""
990
966
  if len(self.listdir()) > 0:
991
967
  raise OSError(f"Directory not empty: '{self.path_with_protocol}'")
992
- return self._client.rmdir(self._real_path)
968
+ return self._client.rmdir(self._remote_path)
993
969
 
994
970
  def copy(
995
971
  self,
@@ -1017,7 +993,7 @@ class Sftp2Path(URIPath):
1017
993
  dst_path = self.from_path(dst_path)
1018
994
 
1019
995
  if self._is_same_backend(dst_path):
1020
- if self._real_path == dst_path._real_path:
996
+ if self._remote_path == dst_path._remote_path:
1021
997
  raise SameFileError(
1022
998
  f"'{self.path}' and '{dst_path.path}' are the same file"
1023
999
  )
@@ -1025,8 +1001,8 @@ class Sftp2Path(URIPath):
1025
1001
  exec_result = self._exec_command(
1026
1002
  [
1027
1003
  "cp",
1028
- self._real_path,
1029
- dst_path._real_path,
1004
+ self._remote_path,
1005
+ dst_path._remote_path,
1030
1006
  ]
1031
1007
  )
1032
1008
 
@@ -1087,4 +1063,4 @@ class Sftp2Path(URIPath):
1087
1063
  stat = SFTPAttributes()
1088
1064
  stat.atime = int(atime)
1089
1065
  stat.mtime = int(mtime)
1090
- self._client.setstat(self._real_path, stat)
1066
+ self._client.setstat(self._remote_path, stat)