megfile 3.1.6.post1__py3-none-any.whl → 4.0.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. megfile/cli.py +12 -7
  2. megfile/config.py +27 -39
  3. megfile/fs.py +169 -12
  4. megfile/fs_path.py +183 -260
  5. megfile/hdfs.py +106 -5
  6. megfile/hdfs_path.py +34 -90
  7. megfile/http.py +50 -1
  8. megfile/http_path.py +27 -65
  9. megfile/interfaces.py +1 -8
  10. megfile/lib/base_prefetch_reader.py +62 -78
  11. megfile/lib/combine_reader.py +5 -0
  12. megfile/lib/glob.py +3 -6
  13. megfile/lib/hdfs_prefetch_reader.py +7 -7
  14. megfile/lib/http_prefetch_reader.py +6 -6
  15. megfile/lib/s3_buffered_writer.py +71 -65
  16. megfile/lib/s3_cached_handler.py +1 -2
  17. megfile/lib/s3_limited_seekable_writer.py +3 -7
  18. megfile/lib/s3_memory_handler.py +1 -2
  19. megfile/lib/s3_pipe_handler.py +1 -2
  20. megfile/lib/s3_prefetch_reader.py +10 -19
  21. megfile/lib/s3_share_cache_reader.py +8 -5
  22. megfile/pathlike.py +397 -401
  23. megfile/s3.py +118 -17
  24. megfile/s3_path.py +126 -209
  25. megfile/sftp.py +300 -10
  26. megfile/sftp_path.py +46 -322
  27. megfile/smart.py +33 -27
  28. megfile/smart_path.py +9 -14
  29. megfile/stdio.py +1 -1
  30. megfile/stdio_path.py +2 -2
  31. megfile/utils/__init__.py +3 -4
  32. megfile/version.py +1 -1
  33. {megfile-3.1.6.post1.dist-info → megfile-4.0.0.post1.dist-info}/METADATA +7 -7
  34. megfile-4.0.0.post1.dist-info/RECORD +52 -0
  35. {megfile-3.1.6.post1.dist-info → megfile-4.0.0.post1.dist-info}/WHEEL +1 -1
  36. {megfile-3.1.6.post1.dist-info → megfile-4.0.0.post1.dist-info}/top_level.txt +0 -2
  37. docs/conf.py +0 -65
  38. megfile-3.1.6.post1.dist-info/RECORD +0 -55
  39. scripts/convert_results_to_sarif.py +0 -91
  40. scripts/generate_file.py +0 -344
  41. {megfile-3.1.6.post1.dist-info → megfile-4.0.0.post1.dist-info}/LICENSE +0 -0
  42. {megfile-3.1.6.post1.dist-info → megfile-4.0.0.post1.dist-info}/LICENSE.pyre +0 -0
  43. {megfile-3.1.6.post1.dist-info → megfile-4.0.0.post1.dist-info}/entry_points.txt +0 -0
megfile/s3_path.py CHANGED
@@ -11,16 +11,19 @@ from urllib.parse import urlparse
11
11
 
12
12
  import boto3
13
13
  import botocore
14
+ from boto3.s3.transfer import TransferConfig
14
15
  from botocore.awsrequest import AWSPreparedRequest, AWSResponse
15
16
 
16
17
  from megfile.config import (
17
- DEFAULT_BLOCK_SIZE,
18
- DEFAULT_MAX_BLOCK_SIZE,
19
- DEFAULT_MIN_BLOCK_SIZE,
20
18
  GLOBAL_MAX_WORKERS,
21
19
  HTTP_AUTH_HEADERS,
20
+ READER_BLOCK_SIZE,
21
+ READER_MAX_BUFFER_SIZE,
22
22
  S3_CLIENT_CACHE_MODE,
23
23
  S3_MAX_RETRY_TIMES,
24
+ WRITER_BLOCK_SIZE,
25
+ WRITER_MAX_BUFFER_SIZE,
26
+ to_boolean,
24
27
  )
25
28
  from megfile.errors import (
26
29
  S3BucketNotFoundError,
@@ -61,7 +64,6 @@ from megfile.lib.fnmatch import translate
61
64
  from megfile.lib.glob import has_magic, has_magic_ignore_brace, ungloblize
62
65
  from megfile.lib.joinpath import uri_join
63
66
  from megfile.lib.s3_buffered_writer import (
64
- DEFAULT_MAX_BUFFER_SIZE,
65
67
  S3BufferedWriter,
66
68
  )
67
69
  from megfile.lib.s3_cached_handler import S3CachedHandler
@@ -101,26 +103,14 @@ __all__ = [
101
103
  "s3_share_cache_open",
102
104
  "s3_open",
103
105
  "S3Cacher",
104
- "S3BufferedWriter",
105
- "S3LimitedSeekableWriter",
106
- "S3PrefetchReader",
107
- "S3ShareCacheReader",
108
106
  "s3_upload",
109
107
  "s3_download",
110
108
  "s3_load_content",
111
- "s3_readlink",
112
- "s3_glob",
113
- "s3_glob_stat",
114
- "s3_iglob",
115
- "s3_rename",
116
- "s3_makedirs",
117
109
  "s3_concat",
118
- "s3_lstat",
119
110
  ]
120
111
  _logger = get_logger(__name__)
121
112
  content_md5_header = "megfile-content-md5"
122
113
  endpoint_url = "https://s3.amazonaws.com"
123
- max_pool_connections = GLOBAL_MAX_WORKERS # for compatibility
124
114
  max_retries = S3_MAX_RETRY_TIMES
125
115
  max_keys = 1000
126
116
 
@@ -266,7 +256,7 @@ def get_env_var(env_name: str, profile_name=None):
266
256
  def parse_boolean(value: Optional[str], default: bool = False) -> bool:
267
257
  if value is None:
268
258
  return default
269
- return value.lower() in ("true", "yes", "1")
259
+ return to_boolean(value)
270
260
 
271
261
 
272
262
  def get_access_token(profile_name=None):
@@ -638,7 +628,7 @@ def _s3_scan_pairs(
638
628
  src_url: PathLike, dst_url: PathLike
639
629
  ) -> Iterator[Tuple[PathLike, PathLike]]:
640
630
  for src_file_path in S3Path(src_url).scan():
641
- content_path = src_file_path[len(src_url) :]
631
+ content_path = src_file_path[len(fspath(src_url)) :]
642
632
  if len(content_path) > 0:
643
633
  dst_file_path = s3_path_join(dst_url, content_path)
644
634
  else:
@@ -698,8 +688,8 @@ def s3_prefetch_open(
698
688
  mode: str = "rb",
699
689
  followlinks: bool = False,
700
690
  *,
701
- max_concurrency: Optional[int] = None,
702
- max_block_size: int = DEFAULT_BLOCK_SIZE,
691
+ max_workers: Optional[int] = None,
692
+ block_size: int = READER_BLOCK_SIZE,
703
693
  ) -> S3PrefetchReader:
704
694
  """Open a asynchronous prefetch reader, to support fast sequential
705
695
  read and random read
@@ -710,11 +700,18 @@ def s3_prefetch_open(
710
700
 
711
701
  Supports context manager
712
702
 
713
- Some parameter setting may perform well: max_concurrency=10 or 20,
714
- max_block_size=8 or 16 MB, default value None means using global thread pool
715
-
716
- :param max_concurrency: Max download thread number, None by default
717
- :param max_block_size: Max data size downloaded by each thread, in bytes,
703
+ Some parameter setting may perform well: max_workers=10 or 20,
704
+ block_size=8 or 16 MB, default value None means using global thread pool
705
+
706
+ :param s3_url: s3 path
707
+ :param mode: only support "r" or "rb"
708
+ :param encoding: encoding is the name of the encoding used to decode or encode
709
+ the file. This should only be used in text mode.
710
+ :param errors: errors is an optional string that specifies how encoding and
711
+ decoding errors are to be handled—this cannot be used in binary mode.
712
+ :param followlinks: follow symbolic link, default `False`
713
+ :param max_workers: Max download thread number, `None` by default
714
+ :param block_size: Max data size downloaded by each thread, in bytes,
718
715
  8MB by default
719
716
  :returns: An opened S3PrefetchReader object
720
717
  :raises: S3FileNotFoundError
@@ -730,15 +727,15 @@ def s3_prefetch_open(
730
727
  pass
731
728
 
732
729
  bucket, key = parse_s3_url(s3_url.path_with_protocol)
733
- config = botocore.config.Config(max_pool_connections=max_pool_connections)
730
+ config = botocore.config.Config(max_pool_connections=GLOBAL_MAX_WORKERS)
734
731
  client = get_s3_client_with_cache(config=config, profile_name=s3_url._profile_name)
735
732
  return S3PrefetchReader(
736
733
  bucket,
737
734
  key,
738
735
  s3_client=client,
739
736
  max_retries=max_retries,
740
- max_workers=max_concurrency,
741
- block_size=max_block_size,
737
+ max_workers=max_workers,
738
+ block_size=block_size,
742
739
  profile_name=s3_url._profile_name,
743
740
  )
744
741
 
@@ -750,8 +747,8 @@ def s3_share_cache_open(
750
747
  followlinks: bool = False,
751
748
  *,
752
749
  cache_key: str = "lru",
753
- max_concurrency: Optional[int] = None,
754
- max_block_size: int = DEFAULT_BLOCK_SIZE,
750
+ max_workers: Optional[int] = None,
751
+ block_size: int = READER_BLOCK_SIZE,
755
752
  ) -> S3ShareCacheReader:
756
753
  """Open a asynchronous prefetch reader, to support fast sequential read and
757
754
  random read
@@ -762,11 +759,18 @@ def s3_share_cache_open(
762
759
 
763
760
  Supports context manager
764
761
 
765
- Some parameter setting may perform well: max_concurrency=10 or 20,
766
- max_block_size=8 or 16 MB, default value None means using global thread pool
767
-
768
- :param max_concurrency: Max download thread number, None by default
769
- :param max_block_size: Max data size downloaded by each thread, in bytes,
762
+ Some parameter setting may perform well: max_workers=10 or 20,
763
+ block_size=8 or 16 MB, default value None means using global thread pool
764
+
765
+ :param s3_url: s3 path
766
+ :param mode: only support "r" or "rb"
767
+ :param encoding: encoding is the name of the encoding used to decode or encode
768
+ the file. This should only be used in text mode.
769
+ :param errors: errors is an optional string that specifies how encoding and
770
+ decoding errors are to be handled—this cannot be used in binary mode.
771
+ :param followlinks: follow symbolic link, default `False`
772
+ :param max_workers: Max download thread number, None by default
773
+ :param block_size: Max data size downloaded by each thread, in bytes,
770
774
  8MB by default
771
775
  :returns: An opened S3ShareCacheReader object
772
776
  :raises: S3FileNotFoundError
@@ -783,7 +787,7 @@ def s3_share_cache_open(
783
787
  pass
784
788
 
785
789
  bucket, key = parse_s3_url(s3_url.path_with_protocol)
786
- config = botocore.config.Config(max_pool_connections=max_pool_connections)
790
+ config = botocore.config.Config(max_pool_connections=GLOBAL_MAX_WORKERS)
787
791
  client = get_s3_client_with_cache(config=config, profile_name=s3_url._profile_name)
788
792
  return S3ShareCacheReader(
789
793
  bucket,
@@ -791,8 +795,8 @@ def s3_share_cache_open(
791
795
  cache_key=cache_key,
792
796
  s3_client=client,
793
797
  max_retries=max_retries,
794
- max_workers=max_concurrency,
795
- block_size=max_block_size,
798
+ max_workers=max_workers,
799
+ block_size=block_size,
796
800
  profile_name=s3_url._profile_name,
797
801
  )
798
802
 
@@ -819,7 +823,13 @@ def s3_pipe_open(
819
823
  But asynchronous behavior can guarantee the file are successfully written,
820
824
  and frequent execution may cause thread and file handle exhaustion
821
825
 
822
- :param mode: Mode to open file, either "rb" or "wb"
826
+ :param s3_url: s3 path
827
+ :param mode: Mode to open file, either "r", "rb", "w" or "wb"
828
+ :param encoding: encoding is the name of the encoding used to decode or encode
829
+ the file. This should only be used in text mode.
830
+ :param errors: errors is an optional string that specifies how encoding and
831
+ decoding errors are to be handled—this cannot be used in binary mode.
832
+ :param followlinks: follow symbolic link, default `False`
823
833
  :param join_thread: If wait after function execution until s3 finishes writing
824
834
  :returns: An opened BufferedReader / BufferedWriter object
825
835
  """
@@ -838,7 +848,7 @@ def s3_pipe_open(
838
848
  pass
839
849
 
840
850
  bucket, key = parse_s3_url(s3_url.path_with_protocol)
841
- config = botocore.config.Config(max_pool_connections=max_pool_connections)
851
+ config = botocore.config.Config(max_pool_connections=GLOBAL_MAX_WORKERS)
842
852
  client = get_s3_client_with_cache(config=config, profile_name=s3_url._profile_name)
843
853
  return S3PipeHandler(
844
854
  bucket,
@@ -869,7 +879,14 @@ def s3_cached_open(
869
879
  cache_path can specify the path of cache file. Performance could be better
870
880
  if cache file path is on ssd or tmpfs
871
881
 
872
- :param mode: Mode to open file, could be one of "rb", "wb" or "ab"
882
+ :param s3_url: s3 path
883
+ :param mode: Mode to open file, could be one of "rb", "wb", "ab", "rb+", "wb+"
884
+ or "ab+"
885
+ :param encoding: encoding is the name of the encoding used to decode or encode
886
+ the file. This should only be used in text mode.
887
+ :param errors: errors is an optional string that specifies how encoding and
888
+ decoding errors are to be handled—this cannot be used in binary mode.
889
+ :param followlinks: follow symbolic link, default `False`
873
890
  :param cache_path: cache file path
874
891
  :returns: An opened BufferedReader / BufferedWriter object
875
892
  """
@@ -884,7 +901,7 @@ def s3_cached_open(
884
901
  pass
885
902
 
886
903
  bucket, key = parse_s3_url(s3_url.path_with_protocol)
887
- config = botocore.config.Config(max_pool_connections=max_pool_connections)
904
+ config = botocore.config.Config(max_pool_connections=GLOBAL_MAX_WORKERS)
888
905
  client = get_s3_client_with_cache(config=config, profile_name=s3_url._profile_name)
889
906
  return S3CachedHandler(
890
907
  bucket,
@@ -902,16 +919,14 @@ def s3_buffered_open(
902
919
  mode: str,
903
920
  followlinks: bool = False,
904
921
  *,
905
- max_concurrency: Optional[int] = None,
906
- max_buffer_size: int = DEFAULT_MAX_BUFFER_SIZE,
907
- forward_ratio: Optional[float] = None,
922
+ max_workers: Optional[int] = None,
923
+ max_buffer_size: Optional[int] = None,
924
+ block_forward: Optional[int] = None,
908
925
  block_size: Optional[int] = None,
909
926
  limited_seekable: bool = False,
910
927
  buffered: bool = False,
911
928
  share_cache_key: Optional[str] = None,
912
929
  cache_path: Optional[str] = None,
913
- min_block_size: Optional[int] = None,
914
- max_block_size: int = DEFAULT_MAX_BLOCK_SIZE,
915
930
  ) -> IO:
916
931
  """Open an asynchronous prefetch reader, to support fast sequential read
917
932
 
@@ -921,22 +936,30 @@ def s3_buffered_open(
921
936
 
922
937
  Supports context manager
923
938
 
924
- Some parameter setting may perform well: max_concurrency=10 or 20,
925
- max_block_size=8 or 16 MB, default value None means using global thread pool
926
-
927
- :param max_concurrency: Max download thread number, None by default
928
- :param max_buffer_size: Max cached buffer size in memory, 128MB by default
929
- :param min_block_size: Min size of single block, default is same as block_size.
930
- Each block will be downloaded by single thread.
931
- :param max_block_size: Max size of single block, 128MB by default.
932
- Each block will be downloaded by single thread.
933
- :param block_size: Size of single block, 8MB by default.
939
+ Some parameter setting may perform well: max_workers=10 or 20,
940
+ default value None means using global thread pool
941
+
942
+ :param s3_url: s3 path
943
+ :param mode: Mode to open file, could be one of "rb", "wb", "ab", "rb+", "wb+"
944
+ or "ab+"
945
+ :param encoding: encoding is the name of the encoding used to decode or encode
946
+ the file. This should only be used in text mode.
947
+ :param errors: errors is an optional string that specifies how encoding and
948
+ decoding errors are to be handled—this cannot be used in binary mode.
949
+ :param followlinks: follow symbolic link, default `False`
950
+ :param max_workers: Max download / upload thread number, `None` by default,
951
+ will use global thread pool with 8 threads.
952
+ :param max_buffer_size: Max cached buffer size in memory, 128MB by default.
953
+ Set to `0` will disable cache.
954
+ :param block_forward: How many blocks of data cached from offset position, only for
955
+ read mode.
956
+ :param block_size: Size of single block.
934
957
  Each block will be uploaded by single thread.
935
958
  :param limited_seekable: If write-handle supports limited seek
936
959
  (both file head part and tail part can seek block_size).
937
960
  Notes: This parameter are valid only for write-handle.
938
961
  Read-handle support arbitrary seek
939
- :returns: An opened S3PrefetchReader object
962
+ :returns: An opened File object
940
963
  :raises: S3FileNotFoundError
941
964
  """
942
965
  if mode not in ("rb", "wb", "ab", "rb+", "wb+", "ab+"):
@@ -948,11 +971,8 @@ def s3_buffered_open(
948
971
  s3_url = s3_url.readlink()
949
972
  except S3NotALinkError:
950
973
  pass
951
- min_block_size = min_block_size or block_size or DEFAULT_MIN_BLOCK_SIZE
952
- block_size = block_size or DEFAULT_BLOCK_SIZE
953
-
954
974
  bucket, key = parse_s3_url(s3_url.path_with_protocol)
955
- config = botocore.config.Config(max_pool_connections=max_pool_connections)
975
+ config = botocore.config.Config(max_pool_connections=GLOBAL_MAX_WORKERS)
956
976
  client = get_s3_client_with_cache(config=config, profile_name=s3_url._profile_name)
957
977
 
958
978
  if "a" in mode or "+" in mode:
@@ -970,13 +990,6 @@ def s3_buffered_open(
970
990
  )
971
991
 
972
992
  if mode == "rb":
973
- # A rough conversion algorithm to align 2 types of Reader / Writer parameters
974
- # TODO: Optimize the conversion algorithm
975
- block_capacity = max_buffer_size // block_size
976
- if forward_ratio is None:
977
- block_forward = None
978
- else:
979
- block_forward = max(int(block_capacity * forward_ratio), 1)
980
993
  if share_cache_key is not None:
981
994
  reader = S3ShareCacheReader(
982
995
  bucket,
@@ -984,8 +997,8 @@ def s3_buffered_open(
984
997
  cache_key=share_cache_key,
985
998
  s3_client=client,
986
999
  max_retries=max_retries,
987
- max_workers=max_concurrency,
988
- block_size=block_size,
1000
+ max_workers=max_workers,
1001
+ block_size=block_size or READER_BLOCK_SIZE,
989
1002
  block_forward=block_forward,
990
1003
  profile_name=s3_url._profile_name,
991
1004
  )
@@ -995,10 +1008,10 @@ def s3_buffered_open(
995
1008
  key,
996
1009
  s3_client=client,
997
1010
  max_retries=max_retries,
998
- max_workers=max_concurrency,
999
- block_capacity=block_capacity,
1011
+ max_workers=max_workers,
1012
+ max_buffer_size=max_buffer_size or READER_MAX_BUFFER_SIZE,
1000
1013
  block_forward=block_forward,
1001
- block_size=block_size,
1014
+ block_size=block_size or READER_BLOCK_SIZE,
1002
1015
  profile_name=s3_url._profile_name,
1003
1016
  )
1004
1017
  if buffered or _is_pickle(reader):
@@ -1010,10 +1023,9 @@ def s3_buffered_open(
1010
1023
  bucket,
1011
1024
  key,
1012
1025
  s3_client=client,
1013
- max_workers=max_concurrency,
1014
- block_size=min_block_size,
1015
- max_block_size=max_block_size,
1016
- max_buffer_size=max_buffer_size,
1026
+ max_workers=max_workers,
1027
+ block_size=block_size or WRITER_BLOCK_SIZE,
1028
+ max_buffer_size=max_buffer_size or WRITER_MAX_BUFFER_SIZE,
1017
1029
  profile_name=s3_url._profile_name,
1018
1030
  )
1019
1031
  else:
@@ -1021,10 +1033,9 @@ def s3_buffered_open(
1021
1033
  bucket,
1022
1034
  key,
1023
1035
  s3_client=client,
1024
- max_workers=max_concurrency,
1025
- block_size=min_block_size,
1026
- max_block_size=max_block_size,
1027
- max_buffer_size=max_buffer_size,
1036
+ max_workers=max_workers,
1037
+ block_size=block_size or WRITER_BLOCK_SIZE,
1038
+ max_buffer_size=max_buffer_size or WRITER_MAX_BUFFER_SIZE,
1028
1039
  profile_name=s3_url._profile_name,
1029
1040
  )
1030
1041
  if buffered or _is_pickle(writer):
@@ -1059,7 +1070,7 @@ def s3_memory_open(
1059
1070
  pass
1060
1071
 
1061
1072
  bucket, key = parse_s3_url(s3_url.path_with_protocol)
1062
- config = botocore.config.Config(max_pool_connections=max_pool_connections)
1073
+ config = botocore.config.Config(max_pool_connections=GLOBAL_MAX_WORKERS)
1063
1074
  client = get_s3_client_with_cache(config=config, profile_name=s3_url._profile_name)
1064
1075
  return S3MemoryHandler(
1065
1076
  bucket, key, mode, s3_client=client, profile_name=s3_url._profile_name
@@ -1118,7 +1129,7 @@ def s3_download(
1118
1129
  if not src_url.is_file():
1119
1130
  raise S3IsADirectoryError("Is a directory: %r" % src_url.path_with_protocol)
1120
1131
 
1121
- dst_directory = os.path.dirname(dst_path.path_without_protocol)
1132
+ dst_directory = os.path.dirname(dst_path.path_without_protocol) # pyre-ignore[6]
1122
1133
  if dst_directory != "":
1123
1134
  os.makedirs(dst_directory, exist_ok=True)
1124
1135
 
@@ -1126,9 +1137,21 @@ def s3_download(
1126
1137
  download_file = patch_method(
1127
1138
  client.download_file, max_retries=max_retries, should_retry=s3_should_retry
1128
1139
  )
1140
+
1141
+ transfer_config = TransferConfig(
1142
+ multipart_threshold=READER_BLOCK_SIZE,
1143
+ max_concurrency=GLOBAL_MAX_WORKERS,
1144
+ multipart_chunksize=READER_BLOCK_SIZE,
1145
+ num_download_attempts=S3_MAX_RETRY_TIMES,
1146
+ max_io_queue=max(READER_MAX_BUFFER_SIZE // READER_BLOCK_SIZE, 1),
1147
+ )
1129
1148
  try:
1130
1149
  download_file(
1131
- src_bucket, src_key, dst_path.path_without_protocol, Callback=callback
1150
+ src_bucket,
1151
+ src_key,
1152
+ dst_path.path_without_protocol,
1153
+ Callback=callback,
1154
+ Config=transfer_config,
1132
1155
  )
1133
1156
  except Exception as error:
1134
1157
  error = translate_fs_error(error, dst_url)
@@ -1179,8 +1202,19 @@ def s3_upload(
1179
1202
  client.upload_fileobj, max_retries=max_retries, should_retry=s3_should_retry
1180
1203
  )
1181
1204
 
1205
+ transfer_config = TransferConfig(
1206
+ multipart_threshold=WRITER_BLOCK_SIZE,
1207
+ max_concurrency=GLOBAL_MAX_WORKERS,
1208
+ multipart_chunksize=WRITER_BLOCK_SIZE,
1209
+ )
1182
1210
  with open(src_path.path_without_protocol, "rb") as src, raise_s3_error(dst_url):
1183
- upload_fileobj(src, Bucket=dst_bucket, Key=dst_key, Callback=callback)
1211
+ upload_fileobj(
1212
+ src,
1213
+ Bucket=dst_bucket,
1214
+ Key=dst_key,
1215
+ Callback=callback,
1216
+ Config=transfer_config,
1217
+ )
1184
1218
 
1185
1219
 
1186
1220
  def s3_load_content(
@@ -1226,27 +1260,6 @@ def s3_load_content(
1226
1260
  )(client, bucket, key, range_str)
1227
1261
 
1228
1262
 
1229
- def s3_readlink(path) -> str:
1230
- """
1231
- Return a string representing the path to which the symbolic link points.
1232
-
1233
- :returns: Return a string representing the path to which the symbolic link points.
1234
- :raises: S3NameTooLongError, S3BucketNotFoundError, S3IsADirectoryError,
1235
- S3NotALinkError
1236
- """
1237
- return S3Path(path).readlink().path_with_protocol
1238
-
1239
-
1240
- def s3_rename(src_url: PathLike, dst_url: PathLike, overwrite: bool = True) -> None:
1241
- """
1242
- Move s3 file path from src_url to dst_url
1243
-
1244
- :param dst_url: Given destination path
1245
- :param overwrite: whether or not overwrite file when exists
1246
- """
1247
- S3Path(src_url).rename(dst_url, overwrite)
1248
-
1249
-
1250
1263
  class S3Cacher(FileCacher):
1251
1264
  cache_path = None
1252
1265
 
@@ -1268,97 +1281,8 @@ class S3Cacher(FileCacher):
1268
1281
  os.unlink(self.cache_path)
1269
1282
 
1270
1283
 
1271
- def s3_glob(
1272
- path: PathLike,
1273
- recursive: bool = True,
1274
- missing_ok: bool = True,
1275
- followlinks: bool = False,
1276
- ) -> List[str]:
1277
- """Return s3 path list in ascending alphabetical order,
1278
- in which path matches glob pattern
1279
-
1280
- Notes: Only glob in bucket. If trying to match bucket with wildcard characters,
1281
- raise UnsupportedError
1282
-
1283
- :param recursive: If False, `**` will not search directory recursively
1284
- :param missing_ok: If False and target path doesn't match any file,
1285
- raise FileNotFoundError
1286
- :raises: UnsupportedError, when bucket part contains wildcard characters
1287
- :returns: A list contains paths match `s3_pathname`
1288
- """
1289
- return list(
1290
- s3_iglob(
1291
- path=path,
1292
- recursive=recursive,
1293
- missing_ok=missing_ok,
1294
- followlinks=followlinks,
1295
- )
1296
- )
1297
-
1298
-
1299
- def s3_glob_stat(
1300
- path: PathLike,
1301
- recursive: bool = True,
1302
- missing_ok: bool = True,
1303
- followlinks: bool = False,
1304
- ) -> Iterator[FileEntry]:
1305
- """Return a generator contains tuples of path and file stat,
1306
- in ascending alphabetical order, in which path matches glob pattern
1307
-
1308
- Notes: Only glob in bucket. If trying to match bucket with wildcard characters,
1309
- raise UnsupportedError
1310
-
1311
- :param recursive: If False, `**` will not search directory recursively
1312
- :param missing_ok: If False and target path doesn't match any file,
1313
- raise FileNotFoundError
1314
- :raises: UnsupportedError, when bucket part contains wildcard characters
1315
- :returns: A generator contains tuples of path and file stat,
1316
- in which paths match `s3_pathname`
1317
- """
1318
- return S3Path(path).glob_stat(
1319
- pattern="", recursive=recursive, missing_ok=missing_ok, followlinks=followlinks
1320
- )
1321
-
1322
-
1323
- def s3_iglob(
1324
- path: PathLike,
1325
- recursive: bool = True,
1326
- missing_ok: bool = True,
1327
- followlinks: bool = False,
1328
- ) -> Iterator[str]:
1329
- """Return s3 path iterator in ascending alphabetical order,
1330
- in which path matches glob pattern
1331
-
1332
- Notes: Only glob in bucket. If trying to match bucket with wildcard characters,
1333
- raise UnsupportedError
1334
-
1335
- :param recursive: If False, `**` will not search directory recursively
1336
- :param missing_ok: If False and target path doesn't match any file,
1337
- raise FileNotFoundError
1338
- :raises: UnsupportedError, when bucket part contains wildcard characters
1339
- :returns: An iterator contains paths match `s3_pathname`
1340
- """
1341
- for path_obj in S3Path(path).iglob(
1342
- pattern="", recursive=recursive, missing_ok=missing_ok, followlinks=followlinks
1343
- ):
1344
- yield path_obj.path_with_protocol
1345
-
1346
-
1347
- def s3_makedirs(path: PathLike, exist_ok: bool = False):
1348
- """
1349
- Create an s3 directory.
1350
- Purely creating directory is invalid because it's unavailable on OSS.
1351
- This function is to test the target bucket have WRITE access.
1352
-
1353
- :param path: Given path
1354
- :param exist_ok: If False and target directory exists, raise S3FileExistsError
1355
- :raises: S3BucketNotFoundError, S3FileExistsError
1356
- """
1357
- return S3Path(path).mkdir(parents=True, exist_ok=exist_ok)
1358
-
1359
-
1360
1284
  def _group_src_paths_by_block(
1361
- src_paths: List[PathLike], block_size: int = DEFAULT_BLOCK_SIZE
1285
+ src_paths: List[PathLike], block_size: int = READER_BLOCK_SIZE
1362
1286
  ) -> List[List[Tuple[PathLike, Optional[str]]]]:
1363
1287
  groups = []
1364
1288
  current_group, current_group_size = [], 0
@@ -1404,7 +1328,7 @@ def _group_src_paths_by_block(
1404
1328
  def s3_concat(
1405
1329
  src_paths: List[PathLike],
1406
1330
  dst_path: PathLike,
1407
- block_size: int = DEFAULT_BLOCK_SIZE,
1331
+ block_size: int = READER_BLOCK_SIZE,
1408
1332
  max_workers: int = GLOBAL_MAX_WORKERS,
1409
1333
  ) -> None:
1410
1334
  """Concatenate s3 files to one file.
@@ -1419,9 +1343,10 @@ def s3_concat(
1419
1343
  else:
1420
1344
  groups = _group_src_paths_by_block(src_paths, block_size=block_size)
1421
1345
 
1422
- with MultiPartWriter(client, dst_path) as writer, ThreadPoolExecutor(
1423
- max_workers=max_workers
1424
- ) as executor:
1346
+ with (
1347
+ MultiPartWriter(client, dst_path) as writer,
1348
+ ThreadPoolExecutor(max_workers=max_workers) as executor,
1349
+ ):
1425
1350
  for index, group in enumerate(groups, start=1):
1426
1351
  if len(group) == 1:
1427
1352
  executor.submit(
@@ -1431,14 +1356,6 @@ def s3_concat(
1431
1356
  executor.submit(writer.upload_part_by_paths, index, group)
1432
1357
 
1433
1358
 
1434
- def s3_lstat(path: PathLike) -> StatResult:
1435
- """
1436
- Like Path.stat() but, if the path points to a symbolic link,
1437
- return the symbolic link’s information rather than its target’s.
1438
- """
1439
- return S3Path(path).lstat()
1440
-
1441
-
1442
1359
  @SmartPath.register
1443
1360
  class S3Path(URIPath):
1444
1361
  protocol = "s3"