megfile 3.1.1__py3-none-any.whl → 3.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. docs/conf.py +2 -4
  2. megfile/__init__.py +394 -203
  3. megfile/cli.py +258 -238
  4. megfile/config.py +25 -21
  5. megfile/errors.py +124 -114
  6. megfile/fs.py +174 -140
  7. megfile/fs_path.py +462 -354
  8. megfile/hdfs.py +133 -101
  9. megfile/hdfs_path.py +290 -236
  10. megfile/http.py +15 -14
  11. megfile/http_path.py +111 -107
  12. megfile/interfaces.py +70 -65
  13. megfile/lib/base_prefetch_reader.py +84 -65
  14. megfile/lib/combine_reader.py +12 -12
  15. megfile/lib/compare.py +17 -13
  16. megfile/lib/compat.py +1 -5
  17. megfile/lib/fnmatch.py +29 -30
  18. megfile/lib/glob.py +46 -54
  19. megfile/lib/hdfs_prefetch_reader.py +40 -25
  20. megfile/lib/hdfs_tools.py +1 -3
  21. megfile/lib/http_prefetch_reader.py +69 -46
  22. megfile/lib/joinpath.py +5 -5
  23. megfile/lib/lazy_handler.py +7 -3
  24. megfile/lib/s3_buffered_writer.py +58 -51
  25. megfile/lib/s3_cached_handler.py +13 -14
  26. megfile/lib/s3_limited_seekable_writer.py +37 -28
  27. megfile/lib/s3_memory_handler.py +34 -30
  28. megfile/lib/s3_pipe_handler.py +24 -25
  29. megfile/lib/s3_prefetch_reader.py +71 -52
  30. megfile/lib/s3_share_cache_reader.py +37 -24
  31. megfile/lib/shadow_handler.py +7 -3
  32. megfile/lib/stdio_handler.py +9 -8
  33. megfile/lib/url.py +3 -3
  34. megfile/pathlike.py +259 -228
  35. megfile/s3.py +220 -153
  36. megfile/s3_path.py +977 -802
  37. megfile/sftp.py +190 -156
  38. megfile/sftp_path.py +540 -450
  39. megfile/smart.py +397 -330
  40. megfile/smart_path.py +100 -105
  41. megfile/stdio.py +10 -9
  42. megfile/stdio_path.py +32 -35
  43. megfile/utils/__init__.py +73 -54
  44. megfile/utils/mutex.py +11 -14
  45. megfile/version.py +1 -1
  46. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/METADATA +5 -8
  47. megfile-3.1.2.dist-info/RECORD +55 -0
  48. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/WHEEL +1 -1
  49. scripts/convert_results_to_sarif.py +45 -78
  50. scripts/generate_file.py +140 -64
  51. megfile-3.1.1.dist-info/RECORD +0 -55
  52. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE +0 -0
  53. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE.pyre +0 -0
  54. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/entry_points.txt +0 -0
  55. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/top_level.txt +0 -0
megfile/s3_path.py CHANGED
@@ -12,17 +12,56 @@ import boto3
12
12
  import botocore
13
13
  from botocore.awsrequest import AWSResponse
14
14
 
15
- from megfile.config import DEFAULT_BLOCK_SIZE, DEFAULT_MAX_BLOCK_SIZE, DEFAULT_MIN_BLOCK_SIZE, GLOBAL_MAX_WORKERS, S3_CLIENT_CACHE_MODE, S3_MAX_RETRY_TIMES
16
- from megfile.errors import S3BucketNotFoundError, S3ConfigError, S3FileExistsError, S3FileNotFoundError, S3IsADirectoryError, S3NameTooLongError, S3NotADirectoryError, S3NotALinkError, S3PermissionError, S3UnknownError, SameFileError, UnsupportedError, _create_missing_ok_generator
17
- from megfile.errors import _logger as error_logger
18
- from megfile.errors import patch_method, raise_s3_error, s3_error_code_should_retry, s3_should_retry, translate_fs_error, translate_s3_error
19
- from megfile.interfaces import Access, ContextIterator, FileCacher, FileEntry, PathLike, StatResult, URIPath
15
+ from megfile.config import (
16
+ DEFAULT_BLOCK_SIZE,
17
+ DEFAULT_MAX_BLOCK_SIZE,
18
+ DEFAULT_MIN_BLOCK_SIZE,
19
+ GLOBAL_MAX_WORKERS,
20
+ S3_CLIENT_CACHE_MODE,
21
+ S3_MAX_RETRY_TIMES,
22
+ )
23
+ from megfile.errors import (
24
+ S3BucketNotFoundError,
25
+ S3ConfigError,
26
+ S3FileExistsError,
27
+ S3FileNotFoundError,
28
+ S3IsADirectoryError,
29
+ S3NameTooLongError,
30
+ S3NotADirectoryError,
31
+ S3NotALinkError,
32
+ S3PermissionError,
33
+ S3UnknownError,
34
+ SameFileError,
35
+ UnsupportedError,
36
+ _create_missing_ok_generator,
37
+ patch_method,
38
+ raise_s3_error,
39
+ s3_error_code_should_retry,
40
+ s3_should_retry,
41
+ translate_fs_error,
42
+ translate_s3_error,
43
+ )
44
+ from megfile.errors import (
45
+ _logger as error_logger,
46
+ )
47
+ from megfile.interfaces import (
48
+ Access,
49
+ ContextIterator,
50
+ FileCacher,
51
+ FileEntry,
52
+ PathLike,
53
+ StatResult,
54
+ URIPath,
55
+ )
20
56
  from megfile.lib.compare import is_same_file
21
57
  from megfile.lib.compat import fspath
22
58
  from megfile.lib.fnmatch import translate
23
59
  from megfile.lib.glob import has_magic, has_magic_ignore_brace, ungloblize
24
60
  from megfile.lib.joinpath import uri_join
25
- from megfile.lib.s3_buffered_writer import DEFAULT_MAX_BUFFER_SIZE, GLOBAL_MAX_WORKERS, S3BufferedWriter
61
+ from megfile.lib.s3_buffered_writer import (
62
+ DEFAULT_MAX_BUFFER_SIZE,
63
+ S3BufferedWriter,
64
+ )
26
65
  from megfile.lib.s3_cached_handler import S3CachedHandler
27
66
  from megfile.lib.s3_limited_seekable_writer import S3LimitedSeekableWriter
28
67
  from megfile.lib.s3_memory_handler import S3MemoryHandler
@@ -31,72 +70,88 @@ from megfile.lib.s3_prefetch_reader import S3PrefetchReader
31
70
  from megfile.lib.s3_share_cache_reader import S3ShareCacheReader
32
71
  from megfile.lib.url import get_url_scheme
33
72
  from megfile.smart_path import SmartPath
34
- from megfile.utils import _is_pickle, calculate_md5, generate_cache_path, get_binary_mode, get_content_offset, is_readable, necessary_params, process_local, thread_local
73
+ from megfile.utils import (
74
+ _is_pickle,
75
+ calculate_md5,
76
+ generate_cache_path,
77
+ get_binary_mode,
78
+ get_content_offset,
79
+ is_readable,
80
+ necessary_params,
81
+ process_local,
82
+ thread_local,
83
+ )
35
84
 
36
85
  __all__ = [
37
- 'S3Path',
38
- 'parse_s3_url',
39
- 'get_endpoint_url',
40
- 'get_s3_session',
41
- 'get_s3_client',
42
- 's3_path_join',
43
- 'is_s3',
44
- 's3_buffered_open',
45
- 's3_cached_open',
46
- 's3_memory_open',
47
- 's3_pipe_open',
48
- 's3_prefetch_open',
49
- 's3_share_cache_open',
50
- 's3_open',
51
- 'S3Cacher',
52
- 'S3BufferedWriter',
53
- 'S3LimitedSeekableWriter',
54
- 'S3PrefetchReader',
55
- 'S3ShareCacheReader',
56
- 's3_upload',
57
- 's3_download',
58
- 's3_load_content',
59
- 's3_readlink',
60
- 's3_glob',
61
- 's3_glob_stat',
62
- 's3_iglob',
63
- 's3_rename',
64
- 's3_makedirs',
65
- 's3_concat',
66
- 's3_lstat',
86
+ "S3Path",
87
+ "parse_s3_url",
88
+ "get_endpoint_url",
89
+ "get_s3_session",
90
+ "get_s3_client",
91
+ "s3_path_join",
92
+ "is_s3",
93
+ "s3_buffered_open",
94
+ "s3_cached_open",
95
+ "s3_memory_open",
96
+ "s3_pipe_open",
97
+ "s3_prefetch_open",
98
+ "s3_share_cache_open",
99
+ "s3_open",
100
+ "S3Cacher",
101
+ "S3BufferedWriter",
102
+ "S3LimitedSeekableWriter",
103
+ "S3PrefetchReader",
104
+ "S3ShareCacheReader",
105
+ "s3_upload",
106
+ "s3_download",
107
+ "s3_load_content",
108
+ "s3_readlink",
109
+ "s3_glob",
110
+ "s3_glob_stat",
111
+ "s3_iglob",
112
+ "s3_rename",
113
+ "s3_makedirs",
114
+ "s3_concat",
115
+ "s3_lstat",
67
116
  ]
68
117
  _logger = get_logger(__name__)
69
- content_md5_header = 'megfile-content-md5'
70
- endpoint_url = 'https://s3.amazonaws.com'
118
+ content_md5_header = "megfile-content-md5"
119
+ endpoint_url = "https://s3.amazonaws.com"
71
120
  max_pool_connections = GLOBAL_MAX_WORKERS # for compatibility
72
121
  max_retries = S3_MAX_RETRY_TIMES
73
122
  max_keys = 1000
74
123
 
75
124
 
76
125
  def _patch_make_request(client: botocore.client.BaseClient):
77
-
78
126
  def after_callback(result: Tuple[AWSResponse, dict], *args, **kwargs):
79
- if not isinstance(result, tuple) or len(result) != 2 \
80
- or not isinstance(result[0], AWSResponse) or not isinstance(result[1], dict):
127
+ if (
128
+ not isinstance(result, tuple)
129
+ or len(result) != 2
130
+ or not isinstance(result[0], AWSResponse)
131
+ or not isinstance(result[1], dict)
132
+ ):
81
133
  return result
82
134
  http, parsed_response = result
83
- if http.status_code >= 500:
135
+ if http.status_code >= 400:
84
136
  error_code = parsed_response.get("Error", {}).get("Code")
85
- operation_model = kwargs.get('operation_model') or (
86
- args[0] if args else None)
87
- operation_name = operation_model.name if operation_model else 'ProxyMethod'
137
+ operation_model = kwargs.get("operation_model") or (
138
+ args[0] if args else None
139
+ )
140
+ operation_name = operation_model.name if operation_model else "ProxyMethod"
88
141
  error_class = client.exceptions.from_code(error_code)
89
142
  raise error_class(parsed_response, operation_name)
90
143
  return result
91
144
 
92
145
  def retry_callback(error, operation_model, request_dict, request_context):
93
- if is_readable(request_dict['body']):
94
- request_dict['body'].seek(0)
146
+ if is_readable(request_dict["body"]):
147
+ request_dict["body"].seek(0)
95
148
 
96
149
  def before_callback(operation_model, request_dict, request_context):
97
150
  _logger.debug(
98
- 'send s3 request: %r, with parameters: %s', operation_model.name,
99
- request_dict)
151
+ "send s3 request: %r, with parameters: %s",
152
+ operation_model.name,
153
+ request_dict,
154
+ )
100
155
 
101
156
  client._make_request = patch_method(
102
157
  client._make_request,
@@ -104,28 +159,28 @@ def _patch_make_request(client: botocore.client.BaseClient):
104
159
  should_retry=s3_should_retry,
105
160
  after_callback=after_callback,
106
161
  before_callback=before_callback,
107
- retry_callback=retry_callback)
162
+ retry_callback=retry_callback,
163
+ )
108
164
  return client
109
165
 
110
166
 
111
167
  def parse_s3_url(s3_url: PathLike) -> Tuple[str, str]:
112
168
  s3_url = fspath(s3_url)
113
169
  if not is_s3(s3_url):
114
- raise ValueError('Not a s3 url: %r' % s3_url)
115
- right_part = s3_url.split('://', maxsplit=1)[1]
116
- bucket_pattern = re.match('(.*?)/', right_part)
170
+ raise ValueError("Not a s3 url: %r" % s3_url)
171
+ right_part = s3_url.split("://", maxsplit=1)[1]
172
+ bucket_pattern = re.match("(.*?)/", right_part)
117
173
  if bucket_pattern is None:
118
174
  bucket = right_part
119
- path = ''
175
+ path = ""
120
176
  else:
121
177
  bucket = bucket_pattern.group(1)
122
- path = right_part[len(bucket) + 1:]
178
+ path = right_part[len(bucket) + 1 :]
123
179
  return bucket, path
124
180
 
125
181
 
126
182
  def get_scoped_config(profile_name: Optional[str] = None) -> Dict:
127
- return get_s3_session(
128
- profile_name=profile_name)._session.get_scoped_config()
183
+ return get_s3_session(profile_name=profile_name)._session.get_scoped_config()
129
184
 
130
185
 
131
186
  @lru_cache()
@@ -134,15 +189,14 @@ def warning_endpoint_url(key: str, endpoint_url: str):
134
189
 
135
190
 
136
191
  def get_endpoint_url(profile_name: Optional[str] = None) -> str:
137
- '''Get the endpoint url of S3
192
+ """Get the endpoint url of S3
138
193
 
139
194
  :returns: S3 endpoint url
140
- '''
195
+ """
141
196
  if profile_name:
142
- environ_keys = (f'{profile_name}__OSS_ENDPOINT'.upper(),)
197
+ environ_keys = (f"{profile_name}__OSS_ENDPOINT".upper(),)
143
198
  else:
144
- environ_keys = (
145
- 'OSS_ENDPOINT', 'AWS_ENDPOINT_URL_S3', 'AWS_ENDPOINT_URL')
199
+ environ_keys = ("OSS_ENDPOINT", "AWS_ENDPOINT_URL_S3", "AWS_ENDPOINT_URL")
146
200
  for environ_key in environ_keys:
147
201
  environ_endpoint_url = os.environ.get(environ_key)
148
202
  if environ_endpoint_url:
@@ -150,10 +204,10 @@ def get_endpoint_url(profile_name: Optional[str] = None) -> str:
150
204
  return environ_endpoint_url
151
205
  try:
152
206
  config = get_scoped_config(profile_name=profile_name)
153
- config_endpoint_url = config.get('s3', {}).get('endpoint_url')
154
- config_endpoint_url = config_endpoint_url or config.get('endpoint_url')
207
+ config_endpoint_url = config.get("s3", {}).get("endpoint_url")
208
+ config_endpoint_url = config_endpoint_url or config.get("endpoint_url")
155
209
  if config_endpoint_url:
156
- warning_endpoint_url('~/.aws/config', config_endpoint_url)
210
+ warning_endpoint_url("~/.aws/config", config_endpoint_url)
157
211
  return config_endpoint_url
158
212
  except botocore.exceptions.ProfileNotFound:
159
213
  pass
@@ -161,27 +215,33 @@ def get_endpoint_url(profile_name: Optional[str] = None) -> str:
161
215
 
162
216
 
163
217
  def get_s3_session(profile_name=None) -> boto3.Session:
164
- '''Get S3 session
218
+ """Get S3 session
165
219
 
166
220
  :returns: S3 session
167
- '''
221
+ """
168
222
  return thread_local(
169
- f's3_session:{profile_name}', boto3.Session, profile_name=profile_name)
223
+ f"s3_session:{profile_name}", boto3.Session, profile_name=profile_name
224
+ )
170
225
 
171
226
 
172
227
  def get_access_token(profile_name=None):
173
- access_key_env_name = f"{profile_name}__AWS_ACCESS_KEY_ID".upper(
174
- ) if profile_name else "AWS_ACCESS_KEY_ID"
175
- secret_key_env_name = f"{profile_name}__AWS_SECRET_ACCESS_KEY".upper(
176
- ) if profile_name else "AWS_SECRET_ACCESS_KEY"
228
+ access_key_env_name = (
229
+ f"{profile_name}__AWS_ACCESS_KEY_ID".upper()
230
+ if profile_name
231
+ else "AWS_ACCESS_KEY_ID"
232
+ )
233
+ secret_key_env_name = (
234
+ f"{profile_name}__AWS_SECRET_ACCESS_KEY".upper()
235
+ if profile_name
236
+ else "AWS_SECRET_ACCESS_KEY"
237
+ )
177
238
  access_key = os.getenv(access_key_env_name)
178
239
  secret_key = os.getenv(secret_key_env_name)
179
240
  if access_key and secret_key:
180
241
  return access_key, secret_key
181
242
 
182
243
  try:
183
- credentials = get_s3_session(
184
- profile_name=profile_name).get_credentials()
244
+ credentials = get_s3_session(profile_name=profile_name).get_credentials()
185
245
  except botocore.exceptions.ProfileNotFound:
186
246
  credentials = None
187
247
  if credentials:
@@ -193,39 +253,42 @@ def get_access_token(profile_name=None):
193
253
 
194
254
 
195
255
  def get_s3_client(
196
- config: Optional[botocore.config.Config] = None,
197
- cache_key: Optional[str] = None,
198
- profile_name: Optional[str] = None):
199
- '''Get S3 client
256
+ config: Optional[botocore.config.Config] = None,
257
+ cache_key: Optional[str] = None,
258
+ profile_name: Optional[str] = None,
259
+ ):
260
+ """Get S3 client
200
261
 
201
262
  :returns: S3 client
202
- '''
263
+ """
203
264
  if cache_key is not None:
204
265
  local_storage = thread_local
205
- if S3_CLIENT_CACHE_MODE == 'process_local':
266
+ if S3_CLIENT_CACHE_MODE == "process_local":
206
267
  local_storage = process_local
207
268
  return local_storage(
208
269
  f"{cache_key}:{profile_name}",
209
270
  get_s3_client,
210
271
  config=config,
211
- profile_name=profile_name)
272
+ profile_name=profile_name,
273
+ )
212
274
 
213
275
  if config:
214
276
  config = botocore.config.Config(
215
- connect_timeout=5,
216
- max_pool_connections=GLOBAL_MAX_WORKERS).merge(config)
277
+ connect_timeout=5, max_pool_connections=GLOBAL_MAX_WORKERS
278
+ ).merge(config)
217
279
  else:
218
280
  config = botocore.config.Config(
219
- connect_timeout=5, max_pool_connections=GLOBAL_MAX_WORKERS)
281
+ connect_timeout=5, max_pool_connections=GLOBAL_MAX_WORKERS
282
+ )
220
283
 
221
- addressing_style_env_key = 'AWS_S3_ADDRESSING_STYLE'
284
+ addressing_style_env_key = "AWS_S3_ADDRESSING_STYLE"
222
285
  if profile_name:
223
- addressing_style_env_key = f'{profile_name}__AWS_S3_ADDRESSING_STYLE'.upper(
224
- )
286
+ addressing_style_env_key = f"{profile_name}__AWS_S3_ADDRESSING_STYLE".upper()
225
287
  addressing_style = os.environ.get(addressing_style_env_key)
226
288
  if addressing_style:
227
289
  config = config.merge(
228
- botocore.config.Config(s3={'addressing_style': addressing_style}))
290
+ botocore.config.Config(s3={"addressing_style": addressing_style})
291
+ )
229
292
 
230
293
  access_key, secret_key = get_access_token(profile_name)
231
294
  try:
@@ -233,7 +296,7 @@ def get_s3_client(
233
296
  except botocore.exceptions.ProfileNotFound:
234
297
  session = get_s3_session()
235
298
  client = session.client(
236
- 's3',
299
+ "s3",
237
300
  endpoint_url=get_endpoint_url(profile_name=profile_name),
238
301
  config=config,
239
302
  aws_access_key_id=access_key,
@@ -244,16 +307,15 @@ def get_s3_client(
244
307
 
245
308
 
246
309
  def get_s3_client_with_cache(
247
- config: Optional[botocore.config.Config] = None,
248
- profile_name: Optional[str] = None):
310
+ config: Optional[botocore.config.Config] = None, profile_name: Optional[str] = None
311
+ ):
249
312
  return get_s3_client(
250
- config=config,
251
- cache_key='s3_filelike_client',
252
- profile_name=profile_name)
313
+ config=config, cache_key="s3_filelike_client", profile_name=profile_name
314
+ )
253
315
 
254
316
 
255
317
  def s3_path_join(path: PathLike, *other_paths: PathLike) -> str:
256
- '''
318
+ """
257
319
  Concat 2 or more path to a complete path
258
320
 
259
321
  :param path: Given path
@@ -262,27 +324,31 @@ def s3_path_join(path: PathLike, *other_paths: PathLike) -> str:
262
324
 
263
325
  .. note ::
264
326
 
265
- The difference between this function and ``os.path.join`` is that this function ignores left side slash (which indicates absolute path) in ``other_paths`` and will directly concat.
266
- e.g. os.path.join('/path', 'to', '/file') => '/file', but s3_path_join('/path', 'to', '/file') => '/path/to/file'
267
- '''
327
+ The difference between this function and ``os.path.join`` is that this function
328
+ ignores left side slash (which indicates absolute path) in ``other_paths``
329
+ and will directly concat.
330
+
331
+ e.g. os.path.join('/path', 'to', '/file') => '/file',
332
+ but s3_path_join('/path', 'to', '/file') => '/path/to/file'
333
+ """
268
334
  return uri_join(fspath(path), *map(fspath, other_paths))
269
335
 
270
336
 
271
337
  def _list_all_buckets(profile_name: Optional[str] = None) -> List[str]:
272
338
  client = get_s3_client_with_cache(profile_name=profile_name)
273
339
  response = client.list_buckets()
274
- return [content['Name'] for content in response['Buckets']]
340
+ return [content["Name"] for content in response["Buckets"]]
275
341
 
276
342
 
277
343
  def _parse_s3_url_ignore_brace(s3_url: str) -> Tuple[str, str]:
278
344
  s3_url = fspath(s3_url)
279
345
  s3_scheme, right_part = s3_url[:5], s3_url[5:]
280
- if s3_scheme != 's3://':
281
- raise ValueError('Not a s3 url: %r' % s3_url)
346
+ if s3_scheme != "s3://":
347
+ raise ValueError("Not a s3 url: %r" % s3_url)
282
348
  left_brace = False
283
349
  for current_index, current_character in enumerate(right_part):
284
350
  if current_character == "/" and left_brace is False:
285
- return right_part[:current_index], right_part[current_index + 1:]
351
+ return right_part[:current_index], right_part[current_index + 1 :]
286
352
  elif current_character == "{":
287
353
  left_brace = True
288
354
  elif current_character == "}":
@@ -291,12 +357,13 @@ def _parse_s3_url_ignore_brace(s3_url: str) -> Tuple[str, str]:
291
357
 
292
358
 
293
359
  def _group_s3path_by_bucket(
294
- s3_pathname: str, profile_name: Optional[str] = None) -> List[str]:
360
+ s3_pathname: str, profile_name: Optional[str] = None
361
+ ) -> List[str]:
295
362
  bucket, key = _parse_s3_url_ignore_brace(s3_pathname)
296
363
  if not bucket:
297
364
  if not key:
298
- raise UnsupportedError('Glob whole s3', s3_pathname)
299
- raise S3BucketNotFoundError('Empty bucket name: %r' % s3_pathname)
365
+ raise UnsupportedError("Glob whole s3", s3_pathname)
366
+ raise S3BucketNotFoundError("Empty bucket name: %r" % s3_pathname)
300
367
 
301
368
  grouped_path = []
302
369
 
@@ -312,7 +379,7 @@ def _group_s3path_by_bucket(
312
379
  path_part = None
313
380
  if len(split_bucket_name) == 2:
314
381
  bucket_name, path_part = split_bucket_name
315
- pattern = re.compile(translate(re.sub(r'\*{2,}', '*', bucket_name)))
382
+ pattern = re.compile(translate(re.sub(r"\*{2,}", "*", bucket_name)))
316
383
 
317
384
  for bucket in all_bucket(profile_name):
318
385
  if pattern.fullmatch(bucket) is not None:
@@ -347,8 +414,8 @@ def _s3_split_magic_ignore_brace(s3_pathname: str) -> Tuple[str, str]:
347
414
  if current_character == "/" and left_brace is False:
348
415
  if has_magic_ignore_brace(s3_pathname[left_index:current_index]):
349
416
  magic_parts.append(s3_pathname[left_index:current_index])
350
- if s3_pathname[current_index + 1:]:
351
- magic_parts.append(s3_pathname[current_index + 1:])
417
+ if s3_pathname[current_index + 1 :]:
418
+ magic_parts.append(s3_pathname[current_index + 1 :])
352
419
  left_index = len(s3_pathname)
353
420
  break
354
421
  normal_parts.append(s3_pathname[left_index:current_index])
@@ -377,7 +444,6 @@ def _s3_split_magic_ignore_brace(s3_pathname: str) -> Tuple[str, str]:
377
444
 
378
445
 
379
446
  def _group_s3path_by_prefix(s3_pathname: str) -> List[str]:
380
-
381
447
  _, key = parse_s3_url(s3_pathname)
382
448
  if not key:
383
449
  return ungloblize(s3_pathname)
@@ -394,15 +460,15 @@ def _group_s3path_by_prefix(s3_pathname: str) -> List[str]:
394
460
 
395
461
 
396
462
  def _become_prefix(prefix: str) -> str:
397
- if prefix != '' and not prefix.endswith('/'):
398
- prefix += '/'
463
+ if prefix != "" and not prefix.endswith("/"):
464
+ prefix += "/"
399
465
  return prefix
400
466
 
401
467
 
402
468
  def _s3_split_magic(s3_pathname: str) -> Tuple[str, str]:
403
469
  if not has_magic(s3_pathname):
404
- return s3_pathname, ''
405
- delimiter = '/'
470
+ return s3_pathname, ""
471
+ delimiter = "/"
406
472
  normal_parts = []
407
473
  magic_parts = []
408
474
  all_parts = s3_pathname.split(delimiter)
@@ -415,53 +481,54 @@ def _s3_split_magic(s3_pathname: str) -> Tuple[str, str]:
415
481
  return delimiter.join(normal_parts), delimiter.join(magic_parts)
416
482
 
417
483
 
418
- def _list_objects_recursive(
419
- s3_client, bucket: str, prefix: str, delimiter: str = ''):
420
-
484
+ def _list_objects_recursive(s3_client, bucket: str, prefix: str, delimiter: str = ""):
421
485
  resp = s3_client.list_objects_v2(
422
- Bucket=bucket, Prefix=prefix, Delimiter=delimiter, MaxKeys=max_keys)
486
+ Bucket=bucket, Prefix=prefix, Delimiter=delimiter, MaxKeys=max_keys
487
+ )
423
488
 
424
489
  while True:
425
490
  yield resp
426
491
 
427
- if not resp['IsTruncated']:
492
+ if not resp["IsTruncated"]:
428
493
  break
429
494
 
430
495
  resp = s3_client.list_objects_v2(
431
496
  Bucket=bucket,
432
497
  Prefix=prefix,
433
498
  Delimiter=delimiter,
434
- ContinuationToken=resp['NextContinuationToken'],
435
- MaxKeys=max_keys)
499
+ ContinuationToken=resp["NextContinuationToken"],
500
+ MaxKeys=max_keys,
501
+ )
436
502
 
437
503
 
438
504
  def _make_stat(content: Dict[str, Any]):
439
505
  return StatResult(
440
- islnk=content.get('islnk', False),
441
- size=content['Size'],
442
- mtime=content['LastModified'].timestamp(),
506
+ islnk=content.get("islnk", False),
507
+ size=content["Size"],
508
+ mtime=content["LastModified"].timestamp(),
443
509
  extra=content,
444
510
  )
445
511
 
446
512
 
447
513
  def _s3_glob_stat_single_path(
448
- s3_pathname: PathLike,
449
- recursive: bool = True,
450
- missing_ok: bool = True,
451
- followlinks: bool = False,
452
- profile_name: Optional[str] = None) -> Iterator[FileEntry]:
514
+ s3_pathname: PathLike,
515
+ recursive: bool = True,
516
+ missing_ok: bool = True,
517
+ followlinks: bool = False,
518
+ profile_name: Optional[str] = None,
519
+ ) -> Iterator[FileEntry]:
453
520
  s3_pathname = fspath(s3_pathname)
454
521
  if not recursive:
455
522
  # If not recursive, replace ** with *
456
- s3_pathname = re.sub(r'\*{2,}', '*', s3_pathname)
523
+ s3_pathname = re.sub(r"\*{2,}", "*", s3_pathname)
457
524
  top_dir, wildcard_part = _s3_split_magic(s3_pathname)
458
- search_dir = wildcard_part.endswith('/')
525
+ search_dir = wildcard_part.endswith("/")
459
526
 
460
527
  def should_recursive(wildcard_part: str) -> bool:
461
- if '**' in wildcard_part:
528
+ if "**" in wildcard_part:
462
529
  return True
463
530
  for expanded_path in ungloblize(wildcard_part):
464
- parts_length = len(expanded_path.split('/'))
531
+ parts_length = len(expanded_path.split("/"))
465
532
  if parts_length + search_dir >= 2:
466
533
  return True
467
534
  return False
@@ -469,24 +536,23 @@ def _s3_glob_stat_single_path(
469
536
  def create_generator(_s3_pathname) -> Iterator[FileEntry]:
470
537
  top_dir_with_profile = top_dir
471
538
  if profile_name:
472
- top_dir_with_profile = f's3+{profile_name}://{top_dir[5:]}'
539
+ top_dir_with_profile = f"s3+{profile_name}://{top_dir[5:]}"
473
540
  if not S3Path(top_dir_with_profile).exists():
474
541
  return
475
542
  if not has_magic(_s3_pathname):
476
543
  _s3_pathname_obj = S3Path(_s3_pathname)
477
544
  if _s3_pathname_obj.is_file():
478
545
  stat = S3Path(_s3_pathname).stat(follow_symlinks=followlinks)
479
- yield FileEntry(
480
- _s3_pathname_obj.name, _s3_pathname_obj.path, stat)
546
+ yield FileEntry(_s3_pathname_obj.name, _s3_pathname_obj.path, stat)
481
547
  if _s3_pathname_obj.is_dir():
482
548
  yield FileEntry(
483
- _s3_pathname_obj.name, _s3_pathname_obj.path,
484
- StatResult(isdir=True))
549
+ _s3_pathname_obj.name, _s3_pathname_obj.path, StatResult(isdir=True)
550
+ )
485
551
  return
486
552
 
487
- delimiter = ''
553
+ delimiter = ""
488
554
  if not should_recursive(wildcard_part):
489
- delimiter = '/'
555
+ delimiter = "/"
490
556
 
491
557
  dirnames = set()
492
558
  pattern = re.compile(translate(_s3_pathname))
@@ -494,39 +560,39 @@ def _s3_glob_stat_single_path(
494
560
  prefix = _become_prefix(key)
495
561
  client = get_s3_client_with_cache(profile_name=profile_name)
496
562
  with raise_s3_error(_s3_pathname):
497
- for resp in _list_objects_recursive(client, bucket, prefix,
498
- delimiter):
499
- for content in resp.get('Contents', []):
500
- path = s3_path_join('s3://', bucket, content['Key'])
563
+ for resp in _list_objects_recursive(client, bucket, prefix, delimiter):
564
+ for content in resp.get("Contents", []):
565
+ path = s3_path_join("s3://", bucket, content["Key"])
501
566
  if not search_dir and pattern.match(path):
502
- yield FileEntry(
503
- S3Path(path).name, path, _make_stat(content))
567
+ yield FileEntry(S3Path(path).name, path, _make_stat(content))
504
568
  dirname = os.path.dirname(path)
505
569
  while dirname not in dirnames and dirname != top_dir:
506
570
  dirnames.add(dirname)
507
- path = dirname + '/' if search_dir else dirname
571
+ path = dirname + "/" if search_dir else dirname
508
572
  if pattern.match(path):
509
573
  yield FileEntry(
510
- S3Path(path).name, path, StatResult(isdir=True))
574
+ S3Path(path).name, path, StatResult(isdir=True)
575
+ )
511
576
  dirname = os.path.dirname(dirname)
512
- for common_prefix in resp.get('CommonPrefixes', []):
513
- path = s3_path_join(
514
- 's3://', bucket, common_prefix['Prefix'])
577
+ for common_prefix in resp.get("CommonPrefixes", []):
578
+ path = s3_path_join("s3://", bucket, common_prefix["Prefix"])
515
579
  dirname = os.path.dirname(path)
516
580
  if dirname not in dirnames and dirname != top_dir:
517
581
  dirnames.add(dirname)
518
- path = dirname + '/' if search_dir else dirname
582
+ path = dirname + "/" if search_dir else dirname
519
583
  if pattern.match(path):
520
584
  yield FileEntry(
521
- S3Path(path).name, path, StatResult(isdir=True))
585
+ S3Path(path).name, path, StatResult(isdir=True)
586
+ )
522
587
 
523
588
  return create_generator(s3_pathname)
524
589
 
525
590
 
526
- def _s3_scan_pairs(src_url: PathLike,
527
- dst_url: PathLike) -> Iterator[Tuple[PathLike, PathLike]]:
591
+ def _s3_scan_pairs(
592
+ src_url: PathLike, dst_url: PathLike
593
+ ) -> Iterator[Tuple[PathLike, PathLike]]:
528
594
  for src_file_path in S3Path(src_url).scan():
529
- content_path = src_file_path[len(src_url):]
595
+ content_path = src_file_path[len(src_url) :]
530
596
  if len(content_path) > 0:
531
597
  dst_file_path = s3_path_join(dst_url, content_path)
532
598
  else:
@@ -535,44 +601,45 @@ def _s3_scan_pairs(src_url: PathLike,
535
601
 
536
602
 
537
603
  def is_s3(path: PathLike) -> bool:
538
- '''
539
- 1. According to `aws-cli <https://docs.aws.amazon.com/cli/latest/reference/s3/index.html>`_ , test if a path is s3 path.
604
+ """
605
+ 1. According to
606
+ `aws-cli <https://docs.aws.amazon.com/cli/latest/reference/s3/index.html>`_ ,
607
+ test if a path is s3 path.
540
608
  2. megfile also support the path like `s3[+profile_name]://bucket/key`
541
609
 
542
610
  :param path: Path to be tested
543
611
  :returns: True if path is s3 path, else False
544
- '''
612
+ """
545
613
  path = fspath(path)
546
- if re.match(r'^s3(\+\w+)?:\/\/', path):
614
+ if re.match(r"^s3(\+\w+)?:\/\/", path):
547
615
  return True
548
616
  return False
549
617
 
550
618
 
551
619
  def _s3_binary_mode(s3_open_func):
552
-
553
620
  @wraps(s3_open_func)
554
621
  def wrapper(
555
- s3_url,
556
- mode: str = 'rb',
557
- encoding: Optional[str] = None,
558
- errors: Optional[str] = None,
559
- **kwargs):
622
+ s3_url,
623
+ mode: str = "rb",
624
+ encoding: Optional[str] = None,
625
+ errors: Optional[str] = None,
626
+ **kwargs,
627
+ ):
560
628
  bucket, key = parse_s3_url(s3_url)
561
629
  if not bucket:
562
- raise S3BucketNotFoundError('Empty bucket name: %r' % s3_url)
630
+ raise S3BucketNotFoundError("Empty bucket name: %r" % s3_url)
563
631
 
564
- if not key or key.endswith('/'):
565
- raise S3IsADirectoryError('Is a directory: %r' % s3_url)
632
+ if not key or key.endswith("/"):
633
+ raise S3IsADirectoryError("Is a directory: %r" % s3_url)
566
634
 
567
- if 'x' in mode:
635
+ if "x" in mode:
568
636
  if S3Path(s3_url).is_file():
569
- raise S3FileExistsError('File exists: %r' % s3_url)
570
- mode = mode.replace('x', 'w')
637
+ raise S3FileExistsError("File exists: %r" % s3_url)
638
+ mode = mode.replace("x", "w")
571
639
 
572
640
  fileobj = s3_open_func(s3_url, get_binary_mode(mode), **kwargs)
573
- if 'b' not in mode:
574
- fileobj = io.TextIOWrapper(
575
- fileobj, encoding=encoding, errors=errors) # type: ignore
641
+ if "b" not in mode:
642
+ fileobj = io.TextIOWrapper(fileobj, encoding=encoding, errors=errors) # type: ignore
576
643
  fileobj.mode = mode # pyre-ignore[41]
577
644
  return fileobj
578
645
 
@@ -581,13 +648,15 @@ def _s3_binary_mode(s3_open_func):
581
648
 
582
649
  @_s3_binary_mode
583
650
  def s3_prefetch_open(
584
- s3_url: PathLike,
585
- mode: str = 'rb',
586
- followlinks: bool = False,
587
- *,
588
- max_concurrency: Optional[int] = None,
589
- max_block_size: int = DEFAULT_BLOCK_SIZE) -> S3PrefetchReader:
590
- '''Open a asynchronous prefetch reader, to support fast sequential read and random read
651
+ s3_url: PathLike,
652
+ mode: str = "rb",
653
+ followlinks: bool = False,
654
+ *,
655
+ max_concurrency: Optional[int] = None,
656
+ max_block_size: int = DEFAULT_BLOCK_SIZE,
657
+ ) -> S3PrefetchReader:
658
+ """Open a asynchronous prefetch reader, to support fast sequential
659
+ read and random read
591
660
 
592
661
  .. note ::
593
662
 
@@ -595,15 +664,17 @@ def s3_prefetch_open(
595
664
 
596
665
  Supports context manager
597
666
 
598
- Some parameter setting may perform well: max_concurrency=10 or 20, max_block_size=8 or 16 MB, default value None means using global thread pool
667
+ Some parameter setting may perform well: max_concurrency=10 or 20,
668
+ max_block_size=8 or 16 MB, default value None means using global thread pool
599
669
 
600
670
  :param max_concurrency: Max download thread number, None by default
601
- :param max_block_size: Max data size downloaded by each thread, in bytes, 8MB by default
671
+ :param max_block_size: Max data size downloaded by each thread, in bytes,
672
+ 8MB by default
602
673
  :returns: An opened S3PrefetchReader object
603
674
  :raises: S3FileNotFoundError
604
- '''
605
- if mode != 'rb':
606
- raise ValueError('unacceptable mode: %r' % mode)
675
+ """
676
+ if mode != "rb":
677
+ raise ValueError("unacceptable mode: %r" % mode)
607
678
  if not isinstance(s3_url, S3Path):
608
679
  s3_url = S3Path(s3_url)
609
680
  if followlinks:
@@ -614,8 +685,7 @@ def s3_prefetch_open(
614
685
 
615
686
  bucket, key = parse_s3_url(s3_url.path_with_protocol)
616
687
  config = botocore.config.Config(max_pool_connections=max_pool_connections)
617
- client = get_s3_client_with_cache(
618
- config=config, profile_name=s3_url._profile_name)
688
+ client = get_s3_client_with_cache(config=config, profile_name=s3_url._profile_name)
619
689
  return S3PrefetchReader(
620
690
  bucket,
621
691
  key,
@@ -623,19 +693,22 @@ def s3_prefetch_open(
623
693
  max_retries=max_retries,
624
694
  max_workers=max_concurrency,
625
695
  block_size=max_block_size,
626
- profile_name=s3_url._profile_name)
696
+ profile_name=s3_url._profile_name,
697
+ )
627
698
 
628
699
 
629
700
  @_s3_binary_mode
630
701
  def s3_share_cache_open(
631
- s3_url: PathLike,
632
- mode: str = 'rb',
633
- followlinks: bool = False,
634
- *,
635
- cache_key: str = 'lru',
636
- max_concurrency: Optional[int] = None,
637
- max_block_size: int = DEFAULT_BLOCK_SIZE) -> S3ShareCacheReader:
638
- '''Open a asynchronous prefetch reader, to support fast sequential read and random read
702
+ s3_url: PathLike,
703
+ mode: str = "rb",
704
+ followlinks: bool = False,
705
+ *,
706
+ cache_key: str = "lru",
707
+ max_concurrency: Optional[int] = None,
708
+ max_block_size: int = DEFAULT_BLOCK_SIZE,
709
+ ) -> S3ShareCacheReader:
710
+ """Open a asynchronous prefetch reader, to support fast sequential read and
711
+ random read
639
712
 
640
713
  .. note ::
641
714
 
@@ -643,15 +716,17 @@ def s3_share_cache_open(
643
716
 
644
717
  Supports context manager
645
718
 
646
- Some parameter setting may perform well: max_concurrency=10 or 20, max_block_size=8 or 16 MB, default value None means using global thread pool
719
+ Some parameter setting may perform well: max_concurrency=10 or 20,
720
+ max_block_size=8 or 16 MB, default value None means using global thread pool
647
721
 
648
722
  :param max_concurrency: Max download thread number, None by default
649
- :param max_block_size: Max data size downloaded by each thread, in bytes, 8MB by default
723
+ :param max_block_size: Max data size downloaded by each thread, in bytes,
724
+ 8MB by default
650
725
  :returns: An opened S3ShareCacheReader object
651
726
  :raises: S3FileNotFoundError
652
- '''
653
- if mode != 'rb':
654
- raise ValueError('unacceptable mode: %r' % mode)
727
+ """
728
+ if mode != "rb":
729
+ raise ValueError("unacceptable mode: %r" % mode)
655
730
 
656
731
  if not isinstance(s3_url, S3Path):
657
732
  s3_url = S3Path(s3_url)
@@ -663,8 +738,7 @@ def s3_share_cache_open(
663
738
 
664
739
  bucket, key = parse_s3_url(s3_url.path_with_protocol)
665
740
  config = botocore.config.Config(max_pool_connections=max_pool_connections)
666
- client = get_s3_client_with_cache(
667
- config=config, profile_name=s3_url._profile_name)
741
+ client = get_s3_client_with_cache(config=config, profile_name=s3_url._profile_name)
668
742
  return S3ShareCacheReader(
669
743
  bucket,
670
744
  key,
@@ -673,17 +747,16 @@ def s3_share_cache_open(
673
747
  max_retries=max_retries,
674
748
  max_workers=max_concurrency,
675
749
  block_size=max_block_size,
676
- profile_name=s3_url._profile_name)
750
+ profile_name=s3_url._profile_name,
751
+ )
677
752
 
678
753
 
679
754
  @_s3_binary_mode
680
755
  def s3_pipe_open(
681
- s3_url: PathLike,
682
- mode: str,
683
- followlinks: bool = False,
684
- *,
685
- join_thread: bool = True) -> S3PipeHandler:
686
- '''Open a asynchronous read-write reader / writer, to support fast sequential read / write
756
+ s3_url: PathLike, mode: str, followlinks: bool = False, *, join_thread: bool = True
757
+ ) -> S3PipeHandler:
758
+ """Open a asynchronous read-write reader / writer, to support fast sequential
759
+ read / write
687
760
 
688
761
  .. note ::
689
762
 
@@ -691,19 +764,24 @@ def s3_pipe_open(
691
764
 
692
765
  Supports context manager
693
766
 
694
- When join_thread is False, while the file handle are closing, this function will not wait until the asynchronous writing finishes;
695
- False doesn't affect read-handle, but this can speed up write-handle because file will be written asynchronously.
696
- But asynchronous behavior can guarantee the file are successfully written, and frequent execution may cause thread and file handle exhaustion
767
+ When join_thread is False, while the file handle are closing,
768
+ this function will not wait until the asynchronous writing finishes;
769
+
770
+ False doesn't affect read-handle, but this can speed up write-handle because
771
+ file will be written asynchronously.
772
+
773
+ But asynchronous behavior can guarantee the file are successfully written,
774
+ and frequent execution may cause thread and file handle exhaustion
697
775
 
698
776
  :param mode: Mode to open file, either "rb" or "wb"
699
777
  :param join_thread: If wait after function execution until s3 finishes writing
700
778
  :returns: An opened BufferedReader / BufferedWriter object
701
- '''
702
- if mode not in ('rb', 'wb'):
703
- raise ValueError('unacceptable mode: %r' % mode)
779
+ """
780
+ if mode not in ("rb", "wb"):
781
+ raise ValueError("unacceptable mode: %r" % mode)
704
782
 
705
- if mode[0] == 'r' and not S3Path(s3_url).is_file():
706
- raise S3FileNotFoundError('No such file: %r' % s3_url)
783
+ if mode[0] == "r" and not S3Path(s3_url).is_file():
784
+ raise S3FileNotFoundError("No such file: %r" % s3_url)
707
785
 
708
786
  if not isinstance(s3_url, S3Path):
709
787
  s3_url = S3Path(s3_url)
@@ -715,25 +793,26 @@ def s3_pipe_open(
715
793
 
716
794
  bucket, key = parse_s3_url(s3_url.path_with_protocol)
717
795
  config = botocore.config.Config(max_pool_connections=max_pool_connections)
718
- client = get_s3_client_with_cache(
719
- config=config, profile_name=s3_url._profile_name)
796
+ client = get_s3_client_with_cache(config=config, profile_name=s3_url._profile_name)
720
797
  return S3PipeHandler(
721
798
  bucket,
722
799
  key,
723
800
  mode,
724
801
  s3_client=client,
725
802
  join_thread=join_thread,
726
- profile_name=s3_url._profile_name)
803
+ profile_name=s3_url._profile_name,
804
+ )
727
805
 
728
806
 
729
807
  @_s3_binary_mode
730
808
  def s3_cached_open(
731
- s3_url: PathLike,
732
- mode: str,
733
- followlinks: bool = False,
734
- *,
735
- cache_path: Optional[str] = None) -> S3CachedHandler:
736
- '''Open a local-cache file reader / writer, for frequent random read / write
809
+ s3_url: PathLike,
810
+ mode: str,
811
+ followlinks: bool = False,
812
+ *,
813
+ cache_path: Optional[str] = None,
814
+ ) -> S3CachedHandler:
815
+ """Open a local-cache file reader / writer, for frequent random read / write
737
816
 
738
817
  .. note ::
739
818
 
@@ -741,14 +820,15 @@ def s3_cached_open(
741
820
 
742
821
  Supports context manager
743
822
 
744
- cache_path can specify the path of cache file. Performance could be better if cache file path is on ssd or tmpfs
823
+ cache_path can specify the path of cache file. Performance could be better
824
+ if cache file path is on ssd or tmpfs
745
825
 
746
826
  :param mode: Mode to open file, could be one of "rb", "wb" or "ab"
747
827
  :param cache_path: cache file path
748
828
  :returns: An opened BufferedReader / BufferedWriter object
749
- '''
750
- if mode not in ('rb', 'wb', 'ab', 'rb+', 'wb+', 'ab+'):
751
- raise ValueError('unacceptable mode: %r' % mode)
829
+ """
830
+ if mode not in ("rb", "wb", "ab", "rb+", "wb+", "ab+"):
831
+ raise ValueError("unacceptable mode: %r" % mode)
752
832
  if not isinstance(s3_url, S3Path):
753
833
  s3_url = S3Path(s3_url)
754
834
  if followlinks:
@@ -759,34 +839,35 @@ def s3_cached_open(
759
839
 
760
840
  bucket, key = parse_s3_url(s3_url.path_with_protocol)
761
841
  config = botocore.config.Config(max_pool_connections=max_pool_connections)
762
- client = get_s3_client_with_cache(
763
- config=config, profile_name=s3_url._profile_name)
842
+ client = get_s3_client_with_cache(config=config, profile_name=s3_url._profile_name)
764
843
  return S3CachedHandler(
765
844
  bucket,
766
845
  key,
767
846
  mode,
768
847
  s3_client=client,
769
848
  cache_path=cache_path,
770
- profile_name=s3_url._profile_name)
849
+ profile_name=s3_url._profile_name,
850
+ )
771
851
 
772
852
 
773
853
  @_s3_binary_mode
774
854
  def s3_buffered_open(
775
- s3_url: PathLike,
776
- mode: str,
777
- followlinks: bool = False,
778
- *,
779
- max_concurrency: Optional[int] = None,
780
- max_buffer_size: int = DEFAULT_MAX_BUFFER_SIZE,
781
- forward_ratio: Optional[float] = None,
782
- block_size: Optional[int] = None,
783
- limited_seekable: bool = False,
784
- buffered: bool = False,
785
- share_cache_key: Optional[str] = None,
786
- cache_path: Optional[str] = None,
787
- min_block_size: Optional[int] = None,
788
- max_block_size: int = DEFAULT_MAX_BLOCK_SIZE) -> IO:
789
- '''Open an asynchronous prefetch reader, to support fast sequential read
855
+ s3_url: PathLike,
856
+ mode: str,
857
+ followlinks: bool = False,
858
+ *,
859
+ max_concurrency: Optional[int] = None,
860
+ max_buffer_size: int = DEFAULT_MAX_BUFFER_SIZE,
861
+ forward_ratio: Optional[float] = None,
862
+ block_size: Optional[int] = None,
863
+ limited_seekable: bool = False,
864
+ buffered: bool = False,
865
+ share_cache_key: Optional[str] = None,
866
+ cache_path: Optional[str] = None,
867
+ min_block_size: Optional[int] = None,
868
+ max_block_size: int = DEFAULT_MAX_BLOCK_SIZE,
869
+ ) -> IO:
870
+ """Open an asynchronous prefetch reader, to support fast sequential read
790
871
 
791
872
  .. note ::
792
873
 
@@ -794,19 +875,26 @@ def s3_buffered_open(
794
875
 
795
876
  Supports context manager
796
877
 
797
- Some parameter setting may perform well: max_concurrency=10 or 20, max_block_size=8 or 16 MB, default value None means using global thread pool
878
+ Some parameter setting may perform well: max_concurrency=10 or 20,
879
+ max_block_size=8 or 16 MB, default value None means using global thread pool
798
880
 
799
881
  :param max_concurrency: Max download thread number, None by default
800
882
  :param max_buffer_size: Max cached buffer size in memory, 128MB by default
801
- :param min_block_size: Min size of single block, default is same as block_size. Each block will be downloaded by single thread.
802
- :param max_block_size: Max size of single block, 128MB by default. Each block will be downloaded by single thread.
803
- :param block_size: Size of single block, 8MB by default. Each block will be uploaded by single thread.
804
- :param limited_seekable: If write-handle supports limited seek (both file head part and tail part can seek block_size). Notes: This parameter are valid only for write-handle. Read-handle support arbitrary seek
883
+ :param min_block_size: Min size of single block, default is same as block_size.
884
+ Each block will be downloaded by single thread.
885
+ :param max_block_size: Max size of single block, 128MB by default.
886
+ Each block will be downloaded by single thread.
887
+ :param block_size: Size of single block, 8MB by default.
888
+ Each block will be uploaded by single thread.
889
+ :param limited_seekable: If write-handle supports limited seek
890
+ (both file head part and tail part can seek block_size).
891
+ Notes: This parameter are valid only for write-handle.
892
+ Read-handle support arbitrary seek
805
893
  :returns: An opened S3PrefetchReader object
806
894
  :raises: S3FileNotFoundError
807
- '''
808
- if mode not in ('rb', 'wb', 'ab', 'rb+', 'wb+', 'ab+'):
809
- raise ValueError('unacceptable mode: %r' % mode)
895
+ """
896
+ if mode not in ("rb", "wb", "ab", "rb+", "wb+", "ab+"):
897
+ raise ValueError("unacceptable mode: %r" % mode)
810
898
  if not isinstance(s3_url, S3Path):
811
899
  s3_url = S3Path(s3_url)
812
900
  if followlinks:
@@ -819,26 +907,23 @@ def s3_buffered_open(
819
907
 
820
908
  bucket, key = parse_s3_url(s3_url.path_with_protocol)
821
909
  config = botocore.config.Config(max_pool_connections=max_pool_connections)
822
- client = get_s3_client_with_cache(
823
- config=config, profile_name=s3_url._profile_name)
910
+ client = get_s3_client_with_cache(config=config, profile_name=s3_url._profile_name)
824
911
 
825
- if 'a' in mode or '+' in mode:
912
+ if "a" in mode or "+" in mode:
826
913
  if cache_path is None:
827
914
  return S3MemoryHandler(
828
- bucket,
829
- key,
830
- mode,
831
- s3_client=client,
832
- profile_name=s3_url._profile_name)
915
+ bucket, key, mode, s3_client=client, profile_name=s3_url._profile_name
916
+ )
833
917
  return S3CachedHandler(
834
918
  bucket,
835
919
  key,
836
920
  mode,
837
921
  s3_client=client,
838
922
  cache_path=cache_path,
839
- profile_name=s3_url._profile_name)
923
+ profile_name=s3_url._profile_name,
924
+ )
840
925
 
841
- if mode == 'rb':
926
+ if mode == "rb":
842
927
  # A rough conversion algorithm to align 2 types of Reader / Writer parameters
843
928
  # TODO: Optimize the conversion algorithm
844
929
  block_capacity = max_buffer_size // block_size
@@ -856,7 +941,8 @@ def s3_buffered_open(
856
941
  max_workers=max_concurrency,
857
942
  block_size=block_size,
858
943
  block_forward=block_forward,
859
- profile_name=s3_url._profile_name)
944
+ profile_name=s3_url._profile_name,
945
+ )
860
946
  else:
861
947
  reader = S3PrefetchReader(
862
948
  bucket,
@@ -867,7 +953,8 @@ def s3_buffered_open(
867
953
  block_capacity=block_capacity,
868
954
  block_forward=block_forward,
869
955
  block_size=block_size,
870
- profile_name=s3_url._profile_name)
956
+ profile_name=s3_url._profile_name,
957
+ )
871
958
  if buffered or _is_pickle(reader):
872
959
  reader = io.BufferedReader(reader) # type: ignore
873
960
  return reader
@@ -881,7 +968,8 @@ def s3_buffered_open(
881
968
  block_size=min_block_size,
882
969
  max_block_size=max_block_size,
883
970
  max_buffer_size=max_buffer_size,
884
- profile_name=s3_url._profile_name)
971
+ profile_name=s3_url._profile_name,
972
+ )
885
973
  else:
886
974
  writer = S3BufferedWriter(
887
975
  bucket,
@@ -891,7 +979,8 @@ def s3_buffered_open(
891
979
  block_size=min_block_size,
892
980
  max_block_size=max_block_size,
893
981
  max_buffer_size=max_buffer_size,
894
- profile_name=s3_url._profile_name)
982
+ profile_name=s3_url._profile_name,
983
+ )
895
984
  if buffered or _is_pickle(writer):
896
985
  writer = io.BufferedWriter(writer) # type: ignore
897
986
  return writer
@@ -899,10 +988,9 @@ def s3_buffered_open(
899
988
 
900
989
  @_s3_binary_mode
901
990
  def s3_memory_open(
902
- s3_url: PathLike,
903
- mode: str,
904
- followlinks: bool = False) -> S3MemoryHandler:
905
- '''Open a memory-cache file reader / writer, for frequent random read / write
991
+ s3_url: PathLike, mode: str, followlinks: bool = False
992
+ ) -> S3MemoryHandler:
993
+ """Open a memory-cache file reader / writer, for frequent random read / write
906
994
 
907
995
  .. note ::
908
996
 
@@ -910,11 +998,12 @@ def s3_memory_open(
910
998
 
911
999
  Supports context manager
912
1000
 
913
- :param mode: Mode to open file, could be one of "rb", "wb", "ab", "rb+", "wb+" or "ab+"
1001
+ :param mode: Mode to open file, could be one of "rb", "wb", "ab", "rb+",
1002
+ "wb+" or "ab+"
914
1003
  :returns: An opened BufferedReader / BufferedWriter object
915
- '''
916
- if mode not in ('rb', 'wb', 'ab', 'rb+', 'wb+', 'ab+'):
917
- raise ValueError('unacceptable mode: %r' % mode)
1004
+ """
1005
+ if mode not in ("rb", "wb", "ab", "rb+", "wb+", "ab+"):
1006
+ raise ValueError("unacceptable mode: %r" % mode)
918
1007
  if not isinstance(s3_url, S3Path):
919
1008
  s3_url = S3Path(s3_url)
920
1009
  if followlinks:
@@ -925,37 +1014,40 @@ def s3_memory_open(
925
1014
 
926
1015
  bucket, key = parse_s3_url(s3_url.path_with_protocol)
927
1016
  config = botocore.config.Config(max_pool_connections=max_pool_connections)
928
- client = get_s3_client_with_cache(
929
- config=config, profile_name=s3_url._profile_name)
1017
+ client = get_s3_client_with_cache(config=config, profile_name=s3_url._profile_name)
930
1018
  return S3MemoryHandler(
931
- bucket, key, mode, s3_client=client, profile_name=s3_url._profile_name)
1019
+ bucket, key, mode, s3_client=client, profile_name=s3_url._profile_name
1020
+ )
932
1021
 
933
1022
 
934
1023
  s3_open = s3_buffered_open
935
1024
 
936
1025
 
937
1026
  def s3_download(
938
- src_url: PathLike,
939
- dst_url: PathLike,
940
- callback: Optional[Callable[[int], None]] = None,
941
- followlinks: bool = False,
942
- overwrite: bool = True) -> None:
943
- '''
1027
+ src_url: PathLike,
1028
+ dst_url: PathLike,
1029
+ callback: Optional[Callable[[int], None]] = None,
1030
+ followlinks: bool = False,
1031
+ overwrite: bool = True,
1032
+ ) -> None:
1033
+ """
944
1034
  Downloads a file from s3 to local filesystem.
1035
+
945
1036
  :param src_url: source s3 path
946
1037
  :param dst_url: target fs path
947
- :param callback: Called periodically during copy, and the input parameter is the data size (in bytes) of copy since the last call
1038
+ :param callback: Called periodically during copy, and the input parameter is
1039
+ the data size (in bytes) of copy since the last call
948
1040
  :param followlinks: False if regard symlink as file, else True
949
1041
  :param overwrite: whether or not overwrite file when exists, default is True
950
- '''
1042
+ """
951
1043
  from megfile.fs import is_fs
952
1044
  from megfile.fs_path import FSPath
953
1045
 
954
1046
  dst_url = fspath(dst_url)
955
1047
  if not is_fs(dst_url):
956
- raise OSError(f'dst_url is not fs path: {dst_url}')
957
- if not dst_url or dst_url.endswith('/'):
958
- raise S3IsADirectoryError('Is a directory: %r' % dst_url)
1048
+ raise OSError(f"dst_url is not fs path: {dst_url}")
1049
+ if not dst_url or dst_url.endswith("/"):
1050
+ raise S3IsADirectoryError("Is a directory: %r" % dst_url)
959
1051
 
960
1052
  dst_path = FSPath(dst_url)
961
1053
  if not overwrite and dst_path.exists():
@@ -971,104 +1063,97 @@ def s3_download(
971
1063
  src_bucket, src_key = parse_s3_url(src_url.path_with_protocol)
972
1064
  if not src_bucket:
973
1065
  raise S3BucketNotFoundError(
974
- 'Empty bucket name: %r' % src_url.path_with_protocol)
1066
+ "Empty bucket name: %r" % src_url.path_with_protocol
1067
+ )
975
1068
 
976
1069
  if not src_url.exists():
977
- raise S3FileNotFoundError(
978
- 'File not found: %r' % src_url.path_with_protocol)
1070
+ raise S3FileNotFoundError("File not found: %r" % src_url.path_with_protocol)
979
1071
 
980
1072
  if not src_url.is_file():
981
- raise S3IsADirectoryError(
982
- 'Is a directory: %r' % src_url.path_with_protocol)
1073
+ raise S3IsADirectoryError("Is a directory: %r" % src_url.path_with_protocol)
983
1074
 
984
1075
  dst_directory = os.path.dirname(dst_path.path_without_protocol)
985
- if dst_directory != '':
1076
+ if dst_directory != "":
986
1077
  os.makedirs(dst_directory, exist_ok=True)
987
1078
 
988
1079
  client = get_s3_client_with_cache(profile_name=src_url._profile_name)
989
1080
  download_file = patch_method(
990
- client.download_file,
991
- max_retries=max_retries,
992
- should_retry=s3_should_retry,
1081
+ client.download_file, max_retries=max_retries, should_retry=s3_should_retry
993
1082
  )
994
1083
  try:
995
1084
  download_file(
996
- src_bucket,
997
- src_key,
998
- dst_path.path_without_protocol,
999
- Callback=callback)
1085
+ src_bucket, src_key, dst_path.path_without_protocol, Callback=callback
1086
+ )
1000
1087
  except Exception as error:
1001
1088
  error = translate_fs_error(error, dst_url)
1002
1089
  error = translate_s3_error(error, src_url.path_with_protocol)
1003
1090
  raise error
1004
1091
 
1005
1092
  src_stat = src_url.stat()
1006
- os.utime(
1007
- dst_path.path_without_protocol, (src_stat.st_mtime, src_stat.st_mtime))
1093
+ os.utime(dst_path.path_without_protocol, (src_stat.st_mtime, src_stat.st_mtime))
1008
1094
 
1009
1095
 
1010
1096
  def s3_upload(
1011
- src_url: PathLike,
1012
- dst_url: PathLike,
1013
- callback: Optional[Callable[[int], None]] = None,
1014
- followlinks: bool = False,
1015
- overwrite: bool = True) -> None:
1016
- '''
1097
+ src_url: PathLike,
1098
+ dst_url: PathLike,
1099
+ callback: Optional[Callable[[int], None]] = None,
1100
+ followlinks: bool = False,
1101
+ overwrite: bool = True,
1102
+ ) -> None:
1103
+ """
1017
1104
  Uploads a file from local filesystem to s3.
1105
+
1018
1106
  :param src_url: source fs path
1019
1107
  :param dst_url: target s3 path
1020
- :param callback: Called periodically during copy, and the input parameter is the data size (in bytes) of copy since the last call
1108
+ :param callback: Called periodically during copy, and the input parameter is
1109
+ the data size (in bytes) of copy since the last call
1021
1110
  :param followlinks: False if regard symlink as file, else True
1022
1111
  :param overwrite: whether or not overwrite file when exists, default is True
1023
- '''
1112
+ """
1024
1113
  from megfile.fs import is_fs
1025
1114
  from megfile.fs_path import FSPath
1026
1115
 
1027
1116
  if not is_fs(src_url):
1028
- raise OSError(f'src_url is not fs path: {src_url}')
1117
+ raise OSError(f"src_url is not fs path: {src_url}")
1029
1118
  src_path = FSPath(src_url)
1030
1119
  if followlinks and src_path.is_symlink():
1031
1120
  src_path = src_path.readlink()
1032
1121
 
1033
1122
  dst_bucket, dst_key = parse_s3_url(dst_url)
1034
1123
  if not dst_bucket:
1035
- raise S3BucketNotFoundError('Empty bucket name: %r' % dst_url)
1036
- if not dst_key or dst_key.endswith('/'):
1037
- raise S3IsADirectoryError('Is a directory: %r' % dst_url)
1124
+ raise S3BucketNotFoundError("Empty bucket name: %r" % dst_url)
1125
+ if not dst_key or dst_key.endswith("/"):
1126
+ raise S3IsADirectoryError("Is a directory: %r" % dst_url)
1038
1127
 
1039
1128
  if not overwrite and S3Path(dst_url).is_file():
1040
1129
  return
1041
1130
 
1042
- client = get_s3_client_with_cache(
1043
- profile_name=S3Path(dst_url)._profile_name)
1131
+ client = get_s3_client_with_cache(profile_name=S3Path(dst_url)._profile_name)
1044
1132
  upload_fileobj = patch_method(
1045
- client.upload_fileobj,
1046
- max_retries=max_retries,
1047
- should_retry=s3_should_retry,
1133
+ client.upload_fileobj, max_retries=max_retries, should_retry=s3_should_retry
1048
1134
  )
1049
1135
 
1050
- with open(src_path.path_without_protocol,
1051
- 'rb') as src, raise_s3_error(dst_url):
1136
+ with open(src_path.path_without_protocol, "rb") as src, raise_s3_error(dst_url):
1052
1137
  upload_fileobj(src, Bucket=dst_bucket, Key=dst_key, Callback=callback)
1053
1138
 
1054
1139
 
1055
1140
  def s3_load_content(
1056
- s3_url,
1057
- start: Optional[int] = None,
1058
- stop: Optional[int] = None,
1059
- followlinks: bool = False) -> bytes:
1060
- '''
1141
+ s3_url,
1142
+ start: Optional[int] = None,
1143
+ stop: Optional[int] = None,
1144
+ followlinks: bool = False,
1145
+ ) -> bytes:
1146
+ """
1061
1147
  Get specified file from [start, stop) in bytes
1062
1148
 
1063
1149
  :param s3_url: Specified path
1064
1150
  :param start: start index
1065
1151
  :param stop: stop index
1066
1152
  :returns: bytes content in range [start, stop)
1067
- '''
1153
+ """
1068
1154
 
1069
1155
  def _get_object(client, bucket, key, range_str):
1070
- return client.get_object(
1071
- Bucket=bucket, Key=key, Range=range_str)['Body'].read()
1156
+ return client.get_object(Bucket=bucket, Key=key, Range=range_str)["Body"].read()
1072
1157
 
1073
1158
  s3_url = S3Path(s3_url)
1074
1159
  if followlinks:
@@ -1079,65 +1164,60 @@ def s3_load_content(
1079
1164
 
1080
1165
  bucket, key = parse_s3_url(s3_url.path_with_protocol)
1081
1166
  if not bucket:
1082
- raise S3BucketNotFoundError('Empty bucket name: %r' % s3_url)
1083
- if not key or key.endswith('/'):
1084
- raise S3IsADirectoryError('Is a directory: %r' % s3_url)
1167
+ raise S3BucketNotFoundError("Empty bucket name: %r" % s3_url)
1168
+ if not key or key.endswith("/"):
1169
+ raise S3IsADirectoryError("Is a directory: %r" % s3_url)
1085
1170
 
1086
- start, stop = get_content_offset(
1087
- start, stop, s3_url.getsize(follow_symlinks=False))
1171
+ start, stop = get_content_offset(start, stop, s3_url.getsize(follow_symlinks=False))
1088
1172
  if start == 0 and stop == 0:
1089
- return b''
1090
- range_str = 'bytes=%d-%d' % (start, stop - 1)
1173
+ return b""
1174
+ range_str = "bytes=%d-%d" % (start, stop - 1)
1091
1175
 
1092
1176
  client = get_s3_client_with_cache(profile_name=s3_url._profile_name)
1093
1177
  with raise_s3_error(s3_url.path):
1094
1178
  return patch_method(
1095
- _get_object,
1096
- max_retries=max_retries,
1097
- should_retry=s3_should_retry,
1179
+ _get_object, max_retries=max_retries, should_retry=s3_should_retry
1098
1180
  )(client, bucket, key, range_str)
1099
1181
 
1100
1182
 
1101
1183
  def s3_readlink(path) -> str:
1102
- '''
1184
+ """
1103
1185
  Return a string representing the path to which the symbolic link points.
1104
1186
 
1105
1187
  :returns: Return a string representing the path to which the symbolic link points.
1106
- :raises: S3NameTooLongError, S3BucketNotFoundError, S3IsADirectoryError, S3NotALinkError
1107
- '''
1188
+ :raises: S3NameTooLongError, S3BucketNotFoundError, S3IsADirectoryError,
1189
+ S3NotALinkError
1190
+ """
1108
1191
  return S3Path(path).readlink().path_with_protocol
1109
1192
 
1110
1193
 
1111
- def s3_rename(
1112
- src_url: PathLike, dst_url: PathLike, overwrite: bool = True) -> None:
1113
- '''
1194
+ def s3_rename(src_url: PathLike, dst_url: PathLike, overwrite: bool = True) -> None:
1195
+ """
1114
1196
  Move s3 file path from src_url to dst_url
1115
1197
 
1116
1198
  :param dst_url: Given destination path
1117
1199
  :param overwrite: whether or not overwrite file when exists
1118
- '''
1200
+ """
1119
1201
  S3Path(src_url).rename(dst_url, overwrite)
1120
1202
 
1121
1203
 
1122
1204
  class S3Cacher(FileCacher):
1123
1205
  cache_path = None
1124
1206
 
1125
- def __init__(
1126
- self, path: str, cache_path: Optional[str] = None, mode: str = 'r'):
1127
- if mode not in ('r', 'w', 'a'):
1128
- raise ValueError('unacceptable mode: %r' % mode)
1207
+ def __init__(self, path: str, cache_path: Optional[str] = None, mode: str = "r"):
1208
+ if mode not in ("r", "w", "a"):
1209
+ raise ValueError("unacceptable mode: %r" % mode)
1129
1210
  if cache_path is None:
1130
1211
  cache_path = generate_cache_path(path)
1131
- if mode in ('r', 'a'):
1212
+ if mode in ("r", "a"):
1132
1213
  s3_download(path, cache_path)
1133
1214
  self.name = path
1134
1215
  self.mode = mode
1135
1216
  self.cache_path = cache_path
1136
1217
 
1137
1218
  def _close(self):
1138
- if self.cache_path is not None and \
1139
- os.path.exists(self.cache_path):
1140
- if self.mode in ('w', 'a'):
1219
+ if self.cache_path is not None and os.path.exists(self.cache_path):
1220
+ if self.mode in ("w", "a"):
1141
1221
  s3_upload(self.cache_path, self.name)
1142
1222
  os.unlink(self.cache_path)
1143
1223
 
@@ -1148,40 +1228,50 @@ def s3_glob(
1148
1228
  missing_ok: bool = True,
1149
1229
  followlinks: bool = False,
1150
1230
  ) -> List[str]:
1151
- '''Return s3 path list in ascending alphabetical order, in which path matches glob pattern
1152
- Notes: Only glob in bucket. If trying to match bucket with wildcard characters, raise UnsupportedError
1231
+ """Return s3 path list in ascending alphabetical order,
1232
+ in which path matches glob pattern
1233
+
1234
+ Notes: Only glob in bucket. If trying to match bucket with wildcard characters,
1235
+ raise UnsupportedError
1153
1236
 
1154
1237
  :param recursive: If False, `**` will not search directory recursively
1155
- :param missing_ok: If False and target path doesn't match any file, raise FileNotFoundError
1238
+ :param missing_ok: If False and target path doesn't match any file,
1239
+ raise FileNotFoundError
1156
1240
  :raises: UnsupportedError, when bucket part contains wildcard characters
1157
1241
  :returns: A list contains paths match `s3_pathname`
1158
- '''
1242
+ """
1159
1243
  return list(
1160
1244
  s3_iglob(
1161
1245
  path=path,
1162
1246
  recursive=recursive,
1163
1247
  missing_ok=missing_ok,
1164
- followlinks=followlinks))
1248
+ followlinks=followlinks,
1249
+ )
1250
+ )
1165
1251
 
1166
1252
 
1167
1253
  def s3_glob_stat(
1168
- path: PathLike,
1169
- recursive: bool = True,
1170
- missing_ok: bool = True,
1171
- followlinks: bool = False) -> Iterator[FileEntry]:
1172
- '''Return a generator contains tuples of path and file stat, in ascending alphabetical order, in which path matches glob pattern
1173
- Notes: Only glob in bucket. If trying to match bucket with wildcard characters, raise UnsupportedError
1254
+ path: PathLike,
1255
+ recursive: bool = True,
1256
+ missing_ok: bool = True,
1257
+ followlinks: bool = False,
1258
+ ) -> Iterator[FileEntry]:
1259
+ """Return a generator contains tuples of path and file stat,
1260
+ in ascending alphabetical order, in which path matches glob pattern
1261
+
1262
+ Notes: Only glob in bucket. If trying to match bucket with wildcard characters,
1263
+ raise UnsupportedError
1174
1264
 
1175
1265
  :param recursive: If False, `**` will not search directory recursively
1176
- :param missing_ok: If False and target path doesn't match any file, raise FileNotFoundError
1266
+ :param missing_ok: If False and target path doesn't match any file,
1267
+ raise FileNotFoundError
1177
1268
  :raises: UnsupportedError, when bucket part contains wildcard characters
1178
- :returns: A generator contains tuples of path and file stat, in which paths match `s3_pathname`
1179
- '''
1269
+ :returns: A generator contains tuples of path and file stat,
1270
+ in which paths match `s3_pathname`
1271
+ """
1180
1272
  return S3Path(path).glob_stat(
1181
- pattern="",
1182
- recursive=recursive,
1183
- missing_ok=missing_ok,
1184
- followlinks=followlinks)
1273
+ pattern="", recursive=recursive, missing_ok=missing_ok, followlinks=followlinks
1274
+ )
1185
1275
 
1186
1276
 
1187
1277
  def s3_iglob(
@@ -1190,22 +1280,26 @@ def s3_iglob(
1190
1280
  missing_ok: bool = True,
1191
1281
  followlinks: bool = False,
1192
1282
  ) -> Iterator[str]:
1193
- '''Return s3 path iterator in ascending alphabetical order, in which path matches glob pattern
1194
- Notes: Only glob in bucket. If trying to match bucket with wildcard characters, raise UnsupportedError
1283
+ """Return s3 path iterator in ascending alphabetical order,
1284
+ in which path matches glob pattern
1285
+
1286
+ Notes: Only glob in bucket. If trying to match bucket with wildcard characters,
1287
+ raise UnsupportedError
1195
1288
 
1196
1289
  :param recursive: If False, `**` will not search directory recursively
1197
- :param missing_ok: If False and target path doesn't match any file, raise FileNotFoundError
1290
+ :param missing_ok: If False and target path doesn't match any file,
1291
+ raise FileNotFoundError
1198
1292
  :raises: UnsupportedError, when bucket part contains wildcard characters
1199
1293
  :returns: An iterator contains paths match `s3_pathname`
1200
- '''
1201
- for path_obj in S3Path(path).iglob(pattern="", recursive=recursive,
1202
- missing_ok=missing_ok,
1203
- followlinks=followlinks):
1294
+ """
1295
+ for path_obj in S3Path(path).iglob(
1296
+ pattern="", recursive=recursive, missing_ok=missing_ok, followlinks=followlinks
1297
+ ):
1204
1298
  yield path_obj.path_with_protocol
1205
1299
 
1206
1300
 
1207
1301
  def s3_makedirs(path: PathLike, exist_ok: bool = False):
1208
- '''
1302
+ """
1209
1303
  Create an s3 directory.
1210
1304
  Purely creating directory is invalid because it's unavailable on OSS.
1211
1305
  This function is to test the target bucket have WRITE access.
@@ -1213,13 +1307,12 @@ def s3_makedirs(path: PathLike, exist_ok: bool = False):
1213
1307
  :param path: Given path
1214
1308
  :param exist_ok: If False and target directory exists, raise S3FileExistsError
1215
1309
  :raises: S3BucketNotFoundError, S3FileExistsError
1216
- '''
1310
+ """
1217
1311
  return S3Path(path).mkdir(parents=True, exist_ok=exist_ok)
1218
1312
 
1219
1313
 
1220
1314
  def _group_src_paths_by_block(
1221
- src_paths: List[PathLike],
1222
- block_size: int = DEFAULT_BLOCK_SIZE
1315
+ src_paths: List[PathLike], block_size: int = DEFAULT_BLOCK_SIZE
1223
1316
  ) -> List[List[Tuple[PathLike, Optional[str]]]]:
1224
1317
  groups = []
1225
1318
  current_group, current_group_size = [], 0
@@ -1232,18 +1325,18 @@ def _group_src_paths_by_block(
1232
1325
  if len(groups) == 0:
1233
1326
  if current_group_size + current_file_size > 2 * block_size:
1234
1327
  group_lack_size = block_size - current_group_size
1235
- current_group.append(
1236
- (src_path, f'bytes=0-{group_lack_size-1}'))
1328
+ current_group.append((src_path, f"bytes=0-{group_lack_size-1}"))
1237
1329
  groups.extend(
1238
1330
  [
1239
1331
  current_group,
1240
1332
  [
1241
1333
  (
1242
1334
  src_path,
1243
- f'bytes={group_lack_size}-{current_file_size-1}'
1335
+ f"bytes={group_lack_size}-{current_file_size-1}",
1244
1336
  )
1245
- ]
1246
- ])
1337
+ ],
1338
+ ]
1339
+ )
1247
1340
  else:
1248
1341
  current_group.append((src_path, None))
1249
1342
  groups.append(current_group)
@@ -1263,15 +1356,16 @@ def _group_src_paths_by_block(
1263
1356
 
1264
1357
 
1265
1358
  def s3_concat(
1266
- src_paths: List[PathLike],
1267
- dst_path: PathLike,
1268
- block_size: int = DEFAULT_BLOCK_SIZE,
1269
- max_workers: int = GLOBAL_MAX_WORKERS) -> None:
1270
- '''Concatenate s3 files to one file.
1359
+ src_paths: List[PathLike],
1360
+ dst_path: PathLike,
1361
+ block_size: int = DEFAULT_BLOCK_SIZE,
1362
+ max_workers: int = GLOBAL_MAX_WORKERS,
1363
+ ) -> None:
1364
+ """Concatenate s3 files to one file.
1271
1365
 
1272
1366
  :param src_paths: Given source paths
1273
1367
  :param dst_path: Given destination path
1274
- '''
1368
+ """
1275
1369
  client = S3Path(dst_path)._client
1276
1370
  with raise_s3_error(dst_path):
1277
1371
  if block_size == 0:
@@ -1280,24 +1374,27 @@ def s3_concat(
1280
1374
  groups = _group_src_paths_by_block(src_paths, block_size=block_size)
1281
1375
 
1282
1376
  with MultiPartWriter(client, dst_path) as writer, ThreadPoolExecutor(
1283
- max_workers=max_workers) as executor:
1377
+ max_workers=max_workers
1378
+ ) as executor:
1284
1379
  for index, group in enumerate(groups, start=1):
1285
1380
  if len(group) == 1:
1286
1381
  executor.submit(
1287
- writer.upload_part_copy, index, group[0][0],
1288
- group[0][1])
1382
+ writer.upload_part_copy, index, group[0][0], group[0][1]
1383
+ )
1289
1384
  else:
1290
1385
  executor.submit(writer.upload_part_by_paths, index, group)
1291
1386
 
1292
1387
 
1293
1388
  def s3_lstat(path: PathLike) -> StatResult:
1294
- '''Like Path.stat() but, if the path points to a symbolic link, return the symbolic link’s information rather than its target’s.'''
1389
+ """
1390
+ Like Path.stat() but, if the path points to a symbolic link,
1391
+ return the symbolic link’s information rather than its target’s.
1392
+ """
1295
1393
  return S3Path(path).lstat()
1296
1394
 
1297
1395
 
1298
1396
  @SmartPath.register
1299
1397
  class S3Path(URIPath):
1300
-
1301
1398
  protocol = "s3"
1302
1399
 
1303
1400
  def __init__(self, path: "PathLike", *other_paths: "PathLike"):
@@ -1305,7 +1402,7 @@ class S3Path(URIPath):
1305
1402
  protocol = get_url_scheme(self.path)
1306
1403
  self._protocol_with_profile = self.protocol
1307
1404
  self._profile_name = None
1308
- if protocol.startswith('s3+'):
1405
+ if protocol.startswith("s3+"):
1309
1406
  self._protocol_with_profile = protocol
1310
1407
  self._profile_name = protocol[3:]
1311
1408
  self._s3_path = f"s3://{self.path[len(protocol)+3:]}"
@@ -1316,30 +1413,33 @@ class S3Path(URIPath):
1316
1413
 
1317
1414
  @cached_property
1318
1415
  def path_with_protocol(self) -> str:
1319
- '''Return path with protocol, like file:///root, s3://bucket/key'''
1416
+ """Return path with protocol, like file:///root, s3://bucket/key"""
1320
1417
  path = self.path
1321
1418
  protocol_prefix = self._protocol_with_profile + "://"
1322
1419
  if path.startswith(protocol_prefix):
1323
1420
  return path
1324
- return protocol_prefix + path.lstrip('/')
1421
+ return protocol_prefix + path.lstrip("/")
1325
1422
 
1326
1423
  @cached_property
1327
1424
  def path_without_protocol(self) -> str:
1328
- '''Return path without protocol, example: if path is s3://bucket/key, return bucket/key'''
1425
+ """
1426
+ Return path without protocol, example: if path is s3://bucket/key,
1427
+ return bucket/key
1428
+ """
1329
1429
  path = self.path
1330
1430
  protocol_prefix = self._protocol_with_profile + "://"
1331
1431
  if path.startswith(protocol_prefix):
1332
- path = path[len(protocol_prefix):]
1432
+ path = path[len(protocol_prefix) :]
1333
1433
  return path
1334
1434
 
1335
1435
  @cached_property
1336
1436
  def parts(self) -> Tuple[str, ...]:
1337
- '''A tuple giving access to the path’s various components'''
1437
+ """A tuple giving access to the path’s various components"""
1338
1438
  parts = [f"{self._protocol_with_profile}://"]
1339
1439
  path = self.path_without_protocol
1340
- path = path.lstrip('/')
1341
- if path != '':
1342
- parts.extend(path.split('/'))
1440
+ path = path.lstrip("/")
1441
+ if path != "":
1442
+ parts.extend(path.split("/"))
1343
1443
  return tuple(parts)
1344
1444
 
1345
1445
  @cached_property
@@ -1347,38 +1447,33 @@ class S3Path(URIPath):
1347
1447
  return get_s3_client_with_cache(profile_name=self._profile_name)
1348
1448
 
1349
1449
  def _s3_get_metadata(self) -> dict:
1350
- '''
1450
+ """
1351
1451
  Get object metadata
1352
1452
 
1353
1453
  :param path: Object path
1354
1454
  :returns: Object metadata
1355
- '''
1455
+ """
1356
1456
  bucket, key = parse_s3_url(self.path_with_protocol)
1357
1457
  if not bucket:
1358
1458
  return {}
1359
- if not key or key.endswith('/'):
1459
+ if not key or key.endswith("/"):
1360
1460
  return {}
1361
1461
  try:
1362
1462
  with raise_s3_error(self.path_with_protocol):
1363
1463
  resp = self._client.head_object(Bucket=bucket, Key=key)
1364
- return dict(
1365
- (key.lower(), value) for key, value in resp['Metadata'].items())
1464
+ return dict((key.lower(), value) for key, value in resp["Metadata"].items())
1366
1465
  except Exception as error:
1367
- if isinstance(error,
1368
- (S3UnknownError, S3ConfigError, S3PermissionError)):
1466
+ if isinstance(error, (S3UnknownError, S3ConfigError, S3PermissionError)):
1369
1467
  raise error
1370
1468
  return {}
1371
1469
 
1372
- def access(
1373
- self,
1374
- mode: Access = Access.READ,
1375
- followlinks: bool = False) -> bool:
1376
- '''
1470
+ def access(self, mode: Access = Access.READ, followlinks: bool = False) -> bool:
1471
+ """
1377
1472
  Test if path has access permission described by mode
1378
1473
 
1379
1474
  :param mode: access mode
1380
1475
  :returns: bool, if the bucket of s3_url has read/write access.
1381
- '''
1476
+ """
1382
1477
  s3_url = self.path_with_protocol
1383
1478
  if followlinks:
1384
1479
  try:
@@ -1390,11 +1485,13 @@ class S3Path(URIPath):
1390
1485
  raise Exception("No available bucket")
1391
1486
  if not isinstance(mode, Access):
1392
1487
  raise TypeError(
1393
- 'Unsupported mode: {} -- Mode should use one of the enums belonging to: {}'
1394
- .format(mode, ', '.join([str(a) for a in Access])))
1488
+ "Unsupported mode: {} -- Mode should use one of "
1489
+ "the enums belonging to: {}".format(
1490
+ mode, ", ".join([str(a) for a in Access])
1491
+ )
1492
+ )
1395
1493
  if mode not in (Access.READ, Access.WRITE):
1396
- raise TypeError('Unsupported mode: {}'.format(mode))
1397
-
1494
+ raise TypeError("Unsupported mode: {}".format(mode))
1398
1495
  try:
1399
1496
  if not self.exists():
1400
1497
  return False
@@ -1408,17 +1505,14 @@ class S3Path(URIPath):
1408
1505
  return True
1409
1506
  try:
1410
1507
  if not key:
1411
- key = 'test'
1412
- elif key.endswith('/'):
1508
+ key = "test"
1509
+ elif key.endswith("/"):
1413
1510
  key = key[:-1]
1414
- upload_id = self._client.create_multipart_upload(
1415
- Bucket=bucket,
1416
- Key=key,
1417
- )['UploadId']
1511
+ upload_id = self._client.create_multipart_upload(Bucket=bucket, Key=key)[
1512
+ "UploadId"
1513
+ ]
1418
1514
  self._client.abort_multipart_upload(
1419
- Bucket=bucket,
1420
- Key=key,
1421
- UploadId=upload_id,
1515
+ Bucket=bucket, Key=key, UploadId=upload_id
1422
1516
  )
1423
1517
  return True
1424
1518
  except Exception as error:
@@ -1428,13 +1522,13 @@ class S3Path(URIPath):
1428
1522
  raise error
1429
1523
 
1430
1524
  def exists(self, followlinks: bool = False) -> bool:
1431
- '''
1525
+ """
1432
1526
  Test if s3_url exists
1433
1527
 
1434
1528
  If the bucket of s3_url are not permitted to read, return False
1435
1529
 
1436
1530
  :returns: True if s3_url exists, else False
1437
- '''
1531
+ """
1438
1532
  bucket, key = parse_s3_url(self.path_with_protocol)
1439
1533
  if not bucket: # s3:// => True, s3:///key => False
1440
1534
  return not key
@@ -1442,28 +1536,37 @@ class S3Path(URIPath):
1442
1536
  return self.is_file(followlinks) or self.is_dir()
1443
1537
 
1444
1538
  def getmtime(self, follow_symlinks: bool = False) -> float:
1445
- '''
1446
- Get last-modified time of the file on the given s3_url path (in Unix timestamp format).
1447
- If the path is an existent directory, return the latest modified time of all file in it. The mtime of empty directory is 1970-01-01 00:00:00
1539
+ """
1540
+ Get last-modified time of the file on the given s3_url path
1541
+ (in Unix timestamp format).
1542
+
1543
+ If the path is an existent directory, return the latest modified time of
1544
+ all file in it. The mtime of empty directory is 1970-01-01 00:00:00
1448
1545
 
1449
- If s3_url is not an existent path, which means s3_exist(s3_url) returns False, then raise S3FileNotFoundError
1546
+ If s3_url is not an existent path, which means s3_exist(s3_url) returns False,
1547
+ then raise S3FileNotFoundError
1450
1548
 
1451
1549
  :returns: Last-modified time
1452
1550
  :raises: S3FileNotFoundError, UnsupportedError
1453
- '''
1551
+ """
1454
1552
  return self.stat(follow_symlinks=follow_symlinks).mtime
1455
1553
 
1456
1554
  def getsize(self, follow_symlinks: bool = False) -> int:
1457
- '''
1555
+ """
1458
1556
  Get file size on the given s3_url path (in bytes).
1459
- If the path in a directory, return the sum of all file size in it, including file in subdirectories (if exist).
1460
- The result excludes the size of directory itself. In other words, return 0 Byte on an empty directory path.
1461
1557
 
1462
- If s3_url is not an existent path, which means s3_exist(s3_url) returns False, then raise S3FileNotFoundError
1558
+ If the path in a directory, return the sum of all file size in it,
1559
+ including file in subdirectories (if exist).
1560
+
1561
+ The result excludes the size of directory itself.
1562
+ In other words, return 0 Byte on an empty directory path.
1563
+
1564
+ If s3_url is not an existent path, which means s3_exist(s3_url) returns False,
1565
+ then raise S3FileNotFoundError
1463
1566
 
1464
1567
  :returns: File size
1465
1568
  :raises: S3FileNotFoundError, UnsupportedError
1466
- '''
1569
+ """
1467
1570
  return self.stat(follow_symlinks=follow_symlinks).size
1468
1571
 
1469
1572
  def glob(
@@ -1472,38 +1575,52 @@ class S3Path(URIPath):
1472
1575
  recursive: bool = True,
1473
1576
  missing_ok: bool = True,
1474
1577
  followlinks: bool = False,
1475
- ) -> List['S3Path']:
1476
- '''Return s3 path list in ascending alphabetical order, in which path matches glob pattern
1477
- Notes: Only glob in bucket. If trying to match bucket with wildcard characters, raise UnsupportedError
1578
+ ) -> List["S3Path"]:
1579
+ """Return s3 path list in ascending alphabetical order,
1580
+ in which path matches glob pattern
1581
+
1582
+ Notes: Only glob in bucket. If trying to match bucket with wildcard characters,
1583
+ raise UnsupportedError
1478
1584
 
1479
- :param pattern: Glob the given relative pattern in the directory represented by this path
1585
+ :param pattern: Glob the given relative pattern in the directory represented
1586
+ by this path
1480
1587
  :param recursive: If False, `**` will not search directory recursively
1481
- :param missing_ok: If False and target path doesn't match any file, raise FileNotFoundError
1588
+ :param missing_ok: If False and target path doesn't match any file,
1589
+ raise FileNotFoundError
1482
1590
  :raises: UnsupportedError, when bucket part contains wildcard characters
1483
1591
  :returns: A list contains paths match `s3_pathname`
1484
- '''
1592
+ """
1485
1593
  return list(
1486
1594
  self.iglob(
1487
1595
  pattern=pattern,
1488
1596
  recursive=recursive,
1489
1597
  missing_ok=missing_ok,
1490
- followlinks=followlinks))
1598
+ followlinks=followlinks,
1599
+ )
1600
+ )
1491
1601
 
1492
1602
  def glob_stat(
1493
- self,
1494
- pattern,
1495
- recursive: bool = True,
1496
- missing_ok: bool = True,
1497
- followlinks: bool = False) -> Iterator[FileEntry]:
1498
- '''Return a generator contains tuples of path and file stat, in ascending alphabetical order, in which path matches glob pattern
1499
- Notes: Only glob in bucket. If trying to match bucket with wildcard characters, raise UnsupportedError
1500
-
1501
- :param pattern: Glob the given relative pattern in the directory represented by this path
1603
+ self,
1604
+ pattern,
1605
+ recursive: bool = True,
1606
+ missing_ok: bool = True,
1607
+ followlinks: bool = False,
1608
+ ) -> Iterator[FileEntry]:
1609
+ """Return a generator contains tuples of path and file stat,
1610
+ in ascending alphabetical order, in which path matches glob pattern
1611
+
1612
+ Notes: Only glob in bucket. If trying to match bucket with wildcard characters,
1613
+ raise UnsupportedError
1614
+
1615
+ :param pattern: Glob the given relative pattern in the directory represented
1616
+ by this path
1502
1617
  :param recursive: If False, `**` will not search directory recursively
1503
- :param missing_ok: If False and target path doesn't match any file, raise FileNotFoundError
1618
+ :param missing_ok: If False and target path doesn't match any file,
1619
+ raise FileNotFoundError
1504
1620
  :raises: UnsupportedError, when bucket part contains wildcard characters
1505
- :returns: A generator contains tuples of path and file stat, in which paths match `s3_pathname`
1506
- '''
1621
+ :returns: A generator contains tuples of path and file stat,
1622
+ in which paths match `s3_pathname`
1623
+ """
1507
1624
  glob_path = self._s3_path
1508
1625
  if pattern:
1509
1626
  glob_path = self.joinpath(pattern)._s3_path
@@ -1511,23 +1628,27 @@ class S3Path(URIPath):
1511
1628
 
1512
1629
  def create_generator():
1513
1630
  for group_s3_pathname_1 in _group_s3path_by_bucket(
1514
- s3_pathname, self._profile_name):
1515
- for group_s3_pathname_2 in _group_s3path_by_prefix(
1516
- group_s3_pathname_1):
1631
+ s3_pathname, self._profile_name
1632
+ ):
1633
+ for group_s3_pathname_2 in _group_s3path_by_prefix(group_s3_pathname_1):
1517
1634
  for file_entry in _s3_glob_stat_single_path(
1518
- group_s3_pathname_2, recursive, missing_ok,
1519
- followlinks=followlinks,
1520
- profile_name=self._profile_name):
1635
+ group_s3_pathname_2,
1636
+ recursive,
1637
+ missing_ok,
1638
+ followlinks=followlinks,
1639
+ profile_name=self._profile_name,
1640
+ ):
1521
1641
  if self._profile_name:
1522
1642
  file_entry = file_entry._replace(
1523
- path=
1524
- f"{self._protocol_with_profile}://{file_entry.path[5:]}"
1643
+ path=f"{self._protocol_with_profile}://{file_entry.path[5:]}"
1525
1644
  )
1526
1645
  yield file_entry
1527
1646
 
1528
1647
  return _create_missing_ok_generator(
1529
- create_generator(), missing_ok,
1530
- S3FileNotFoundError('No match any file: %r' % s3_pathname))
1648
+ create_generator(),
1649
+ missing_ok,
1650
+ S3FileNotFoundError("No match any file: %r" % s3_pathname),
1651
+ )
1531
1652
 
1532
1653
  def iglob(
1533
1654
  self,
@@ -1535,60 +1656,70 @@ class S3Path(URIPath):
1535
1656
  recursive: bool = True,
1536
1657
  missing_ok: bool = True,
1537
1658
  followlinks: bool = False,
1538
- ) -> Iterator['S3Path']:
1539
- '''Return s3 path iterator in ascending alphabetical order, in which path matches glob pattern
1540
- Notes: Only glob in bucket. If trying to match bucket with wildcard characters, raise UnsupportedError
1659
+ ) -> Iterator["S3Path"]:
1660
+ """Return s3 path iterator in ascending alphabetical order,
1661
+ in which path matches glob pattern
1541
1662
 
1542
- :param pattern: Glob the given relative pattern in the directory represented by this path
1663
+ Notes: Only glob in bucket. If trying to match bucket with wildcard characters,
1664
+ raise UnsupportedError
1665
+
1666
+ :param pattern: Glob the given relative pattern in the directory represented
1667
+ by this path
1543
1668
  :param recursive: If False, `**` will not search directory recursively
1544
- :param missing_ok: If False and target path doesn't match any file, raise FileNotFoundError
1669
+ :param missing_ok: If False and target path doesn't match any file,
1670
+ raise FileNotFoundError
1545
1671
  :raises: UnsupportedError, when bucket part contains wildcard characters
1546
1672
  :returns: An iterator contains paths match `s3_pathname`
1547
- '''
1548
- for file_entry in self.glob_stat(pattern=pattern, recursive=recursive,
1549
- missing_ok=missing_ok,
1550
- followlinks=followlinks):
1673
+ """
1674
+ for file_entry in self.glob_stat(
1675
+ pattern=pattern,
1676
+ recursive=recursive,
1677
+ missing_ok=missing_ok,
1678
+ followlinks=followlinks,
1679
+ ):
1551
1680
  yield self.from_path(file_entry.path)
1552
1681
 
1553
1682
  def is_dir(self, followlinks: bool = False) -> bool:
1554
- '''
1683
+ """
1555
1684
  Test if an s3 url is directory
1556
1685
  Specific procedures are as follows:
1557
1686
  If there exists a suffix, of which ``os.path.join(s3_url, suffix)`` is a file
1558
1687
  If the url is empty bucket or s3://
1559
1688
 
1560
- :param followlinks: whether followlinks is True or False, result is the same. Because s3 symlink not support dir.
1689
+ :param followlinks: whether followlinks is True or False, result is the same.
1690
+ Because s3 symlink not support dir.
1561
1691
  :returns: True if path is s3 directory, else False
1562
- '''
1692
+ """
1563
1693
  bucket, key = parse_s3_url(self.path_with_protocol)
1564
1694
  if not bucket: # s3:// => True, s3:///key => False
1565
1695
  return not key
1566
1696
  prefix = _become_prefix(key)
1567
1697
  try:
1568
1698
  resp = self._client.list_objects_v2(
1569
- Bucket=bucket, Prefix=prefix, Delimiter='/', MaxKeys=1)
1699
+ Bucket=bucket, Prefix=prefix, Delimiter="/", MaxKeys=1
1700
+ )
1570
1701
  except Exception as error:
1571
1702
  error = translate_s3_error(error, self.path_with_protocol)
1572
- if isinstance(error,
1573
- (S3UnknownError, S3ConfigError, S3PermissionError)):
1703
+ if isinstance(error, (S3UnknownError, S3ConfigError, S3PermissionError)):
1574
1704
  raise error
1575
1705
  return False
1576
1706
 
1577
1707
  if not key: # bucket is accessible
1578
1708
  return True
1579
1709
 
1580
- if 'KeyCount' in resp:
1581
- return resp['KeyCount'] > 0
1710
+ if "KeyCount" in resp:
1711
+ return resp["KeyCount"] > 0
1582
1712
 
1583
- return len(resp.get('Contents', [])) > 0 or \
1584
- len(resp.get('CommonPrefixes', [])) > 0
1713
+ return (
1714
+ len(resp.get("Contents", [])) > 0 or len(resp.get("CommonPrefixes", [])) > 0
1715
+ )
1585
1716
 
1586
1717
  def is_file(self, followlinks: bool = False) -> bool:
1587
- '''
1718
+ """
1588
1719
  Test if an s3_url is file
1589
1720
 
1590
1721
  :returns: True if path is s3 file, else False
1591
- '''
1722
+ """
1592
1723
  s3_url = self.path_with_protocol
1593
1724
  if followlinks:
1594
1725
  try:
@@ -1596,46 +1727,45 @@ class S3Path(URIPath):
1596
1727
  except S3NotALinkError:
1597
1728
  pass
1598
1729
  bucket, key = parse_s3_url(s3_url)
1599
- if not bucket or not key or key.endswith('/'):
1730
+ if not bucket or not key or key.endswith("/"):
1600
1731
  # s3://, s3:///key, s3://bucket, s3://bucket/prefix/
1601
1732
  return False
1602
1733
  try:
1603
1734
  self._client.head_object(Bucket=bucket, Key=key)
1604
1735
  except Exception as error:
1605
1736
  error = translate_s3_error(error, s3_url)
1606
- if isinstance(error,
1607
- (S3UnknownError, S3ConfigError, S3PermissionError)):
1737
+ if isinstance(error, (S3UnknownError, S3ConfigError, S3PermissionError)):
1608
1738
  raise error
1609
1739
  return False
1610
1740
  return True
1611
1741
 
1612
1742
  def listdir(self, followlinks: bool = False) -> List[str]:
1613
- '''
1743
+ """
1614
1744
  Get all contents of given s3_url. The result is in ascending alphabetical order.
1615
1745
 
1616
1746
  :returns: All contents have prefix of s3_url in ascending alphabetical order
1617
1747
  :raises: S3FileNotFoundError, S3NotADirectoryError
1618
- '''
1748
+ """
1619
1749
  entries = list(self.scandir(followlinks=followlinks))
1620
1750
  return sorted([entry.name for entry in entries])
1621
1751
 
1622
- def iterdir(self, followlinks: bool = False) -> Iterator['S3Path']:
1623
- '''
1752
+ def iterdir(self, followlinks: bool = False) -> Iterator["S3Path"]:
1753
+ """
1624
1754
  Get all contents of given s3_url. The result is in ascending alphabetical order.
1625
1755
 
1626
1756
  :returns: All contents have prefix of s3_url in ascending alphabetical order
1627
1757
  :raises: S3FileNotFoundError, S3NotADirectoryError
1628
- '''
1758
+ """
1629
1759
  for path in self.listdir(followlinks=followlinks):
1630
1760
  yield self.joinpath(path)
1631
1761
 
1632
1762
  def load(self, followlinks: bool = False) -> BinaryIO:
1633
- '''Read all content in binary on specified path and write into memory
1763
+ """Read all content in binary on specified path and write into memory
1634
1764
 
1635
1765
  User should close the BinaryIO manually
1636
1766
 
1637
1767
  :returns: BinaryIO
1638
- '''
1768
+ """
1639
1769
  s3_url = self.path_with_protocol
1640
1770
  if followlinks:
1641
1771
  try:
@@ -1644,9 +1774,9 @@ class S3Path(URIPath):
1644
1774
  pass
1645
1775
  bucket, key = parse_s3_url(s3_url)
1646
1776
  if not bucket:
1647
- raise S3BucketNotFoundError('Empty bucket name: %r' % s3_url)
1648
- if not key or key.endswith('/'):
1649
- raise S3IsADirectoryError('Is a directory: %r' % s3_url)
1777
+ raise S3BucketNotFoundError("Empty bucket name: %r" % s3_url)
1778
+ if not key or key.endswith("/"):
1779
+ raise S3IsADirectoryError("Is a directory: %r" % s3_url)
1650
1780
 
1651
1781
  buffer = io.BytesIO()
1652
1782
  with raise_s3_error(s3_url):
@@ -1655,11 +1785,11 @@ class S3Path(URIPath):
1655
1785
  return buffer
1656
1786
 
1657
1787
  def hasbucket(self) -> bool:
1658
- '''
1788
+ """
1659
1789
  Test if the bucket of s3_url exists
1660
1790
 
1661
1791
  :returns: True if bucket of s3_url exists, else False
1662
- '''
1792
+ """
1663
1793
  bucket, _ = parse_s3_url(self.path_with_protocol)
1664
1794
  if not bucket:
1665
1795
  return False
@@ -1669,15 +1799,16 @@ class S3Path(URIPath):
1669
1799
  except Exception as error:
1670
1800
  error = translate_s3_error(error, self.path_with_protocol)
1671
1801
  if isinstance(error, S3PermissionError):
1672
- # Aliyun OSS doesn't give bucket api permission when you only have read and write permission
1802
+ # Aliyun OSS doesn't give bucket api permission when you only have read
1803
+ # and write permission
1673
1804
  try:
1674
1805
  self._client.list_objects_v2(Bucket=bucket, MaxKeys=1)
1675
1806
  return True
1676
1807
  except Exception as error2:
1677
1808
  error2 = translate_s3_error(error2, self.path_with_protocol)
1678
1809
  if isinstance(
1679
- error2,
1680
- (S3UnknownError, S3ConfigError, S3PermissionError)):
1810
+ error2, (S3UnknownError, S3ConfigError, S3PermissionError)
1811
+ ):
1681
1812
  raise error2
1682
1813
  return False
1683
1814
  elif isinstance(error, (S3UnknownError, S3ConfigError)):
@@ -1688,7 +1819,7 @@ class S3Path(URIPath):
1688
1819
  return True
1689
1820
 
1690
1821
  def mkdir(self, mode=0o777, parents: bool = False, exist_ok: bool = False):
1691
- '''
1822
+ """
1692
1823
  Create an s3 directory.
1693
1824
  Purely creating directory is invalid because it's unavailable on OSS.
1694
1825
  This function is to test the target bucket have WRITE access.
@@ -1697,54 +1828,57 @@ class S3Path(URIPath):
1697
1828
  :param parents: parents is ignored, only be compatible with pathlib.Path
1698
1829
  :param exist_ok: If False and target directory exists, raise S3FileExistsError
1699
1830
  :raises: S3BucketNotFoundError, S3FileExistsError
1700
- '''
1831
+ """
1701
1832
  bucket, _ = parse_s3_url(self.path_with_protocol)
1702
1833
  if not bucket:
1703
1834
  raise S3BucketNotFoundError(
1704
- 'Empty bucket name: %r' % self.path_with_protocol)
1835
+ "Empty bucket name: %r" % self.path_with_protocol
1836
+ )
1705
1837
  if not self.hasbucket():
1706
- raise S3BucketNotFoundError(
1707
- 'No such bucket: %r' % self.path_with_protocol)
1838
+ raise S3BucketNotFoundError("No such bucket: %r" % self.path_with_protocol)
1708
1839
  if exist_ok:
1709
1840
  if self.is_file():
1710
- raise S3FileExistsError(
1711
- 'File exists: %r' % self.path_with_protocol)
1841
+ raise S3FileExistsError("File exists: %r" % self.path_with_protocol)
1712
1842
  return
1713
1843
  if self.exists():
1714
- raise S3FileExistsError('File exists: %r' % self.path_with_protocol)
1844
+ raise S3FileExistsError("File exists: %r" % self.path_with_protocol)
1715
1845
 
1716
1846
  def move(self, dst_url: PathLike, overwrite: bool = True) -> None:
1717
- '''
1847
+ """
1718
1848
  Move file/directory path from src_url to dst_url
1719
1849
 
1720
1850
  :param dst_url: Given destination path
1721
1851
  :param overwrite: whether or not overwrite file when exists
1722
- '''
1852
+ """
1723
1853
  for src_file_path, dst_file_path in _s3_scan_pairs(
1724
- self.path_with_protocol, dst_url):
1854
+ self.path_with_protocol, dst_url
1855
+ ):
1725
1856
  S3Path(src_file_path).rename(dst_file_path, overwrite)
1726
1857
 
1727
1858
  def remove(self, missing_ok: bool = False) -> None:
1728
- '''
1729
- Remove the file or directory on s3, `s3://` and `s3://bucket` are not permitted to remove
1859
+ """
1860
+ Remove the file or directory on s3, `s3://` and `s3://bucket`
1861
+ are not permitted to remove
1730
1862
 
1731
- :param missing_ok: if False and target file/directory not exists, raise S3FileNotFoundError
1863
+ :param missing_ok: if False and target file/directory not exists,
1864
+ raise S3FileNotFoundError
1732
1865
  :raises: S3PermissionError, S3FileNotFoundError, UnsupportedError
1733
- '''
1866
+ """
1734
1867
  bucket, key = parse_s3_url(self.path_with_protocol)
1735
1868
  if not bucket:
1736
1869
  if not key:
1737
- raise UnsupportedError(
1738
- 'Remove whole s3', self.path_with_protocol)
1870
+ raise UnsupportedError("Remove whole s3", self.path_with_protocol)
1739
1871
  raise S3BucketNotFoundError(
1740
- 'Empty bucket name: %r' % self.path_with_protocol)
1872
+ "Empty bucket name: %r" % self.path_with_protocol
1873
+ )
1741
1874
  if not key:
1742
- raise UnsupportedError('Remove bucket', self.path_with_protocol)
1875
+ raise UnsupportedError("Remove bucket", self.path_with_protocol)
1743
1876
  if not self.exists():
1744
1877
  if missing_ok:
1745
1878
  return
1746
1879
  raise S3FileNotFoundError(
1747
- 'No such file or directory: %r' % self.path_with_protocol)
1880
+ "No such file or directory: %r" % self.path_with_protocol
1881
+ )
1748
1882
 
1749
1883
  client = self._client
1750
1884
  with raise_s3_error(self.path_with_protocol):
@@ -1754,55 +1888,61 @@ class S3Path(URIPath):
1754
1888
  prefix = _become_prefix(key)
1755
1889
  total_count, error_count = 0, 0
1756
1890
  for resp in _list_objects_recursive(client, bucket, prefix):
1757
- if 'Contents' in resp:
1758
- keys = [
1759
- {
1760
- 'Key': content['Key']
1761
- } for content in resp['Contents']
1762
- ]
1891
+ if "Contents" in resp:
1892
+ keys = [{"Key": content["Key"]} for content in resp["Contents"]]
1763
1893
  total_count += len(keys)
1764
1894
  errors = []
1765
1895
  retries = 2
1766
1896
  retry_interval = min(0.1 * 2**retries, 30)
1767
1897
  for i in range(retries):
1768
- # doc: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.delete_objects
1898
+ # doc:
1899
+ # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.delete_objects
1769
1900
  if not keys:
1770
1901
  break
1771
1902
  response = client.delete_objects(
1772
- Bucket=bucket, Delete={'Objects': keys})
1903
+ Bucket=bucket, Delete={"Objects": keys}
1904
+ )
1773
1905
  keys = []
1774
- for error_info in response.get('Errors', []):
1775
- if s3_error_code_should_retry(
1776
- error_info.get('Code')):
1906
+ for error_info in response.get("Errors", []):
1907
+ if s3_error_code_should_retry(error_info.get("Code")):
1777
1908
  error_logger.warning(
1778
- "retry %s times, removing file: %s, with error %s: %s"
1909
+ "retry %s times, removing file: %s, "
1910
+ "with error %s: %s"
1779
1911
  % (
1780
- i + 1, error_info['Key'],
1781
- error_info['Code'],
1782
- error_info['Message']))
1783
- keys.append({'Key': error_info['Key']})
1912
+ i + 1,
1913
+ error_info["Key"],
1914
+ error_info["Code"],
1915
+ error_info["Message"],
1916
+ )
1917
+ )
1918
+ keys.append({"Key": error_info["Key"]})
1784
1919
  else:
1785
1920
  errors.append(error_info)
1786
1921
  time.sleep(retry_interval)
1787
1922
  for error_info in errors:
1788
1923
  error_logger.error(
1789
- "failed remove file: %s, with error %s: %s" % (
1790
- error_info['Key'], error_info['Code'],
1791
- error_info['Message']))
1924
+ "failed remove file: %s, with error %s: %s"
1925
+ % (
1926
+ error_info["Key"],
1927
+ error_info["Code"],
1928
+ error_info["Message"],
1929
+ )
1930
+ )
1792
1931
  error_count += len(errors)
1793
1932
  if error_count > 0:
1794
- error_msg = "failed remove path: %s, total file count: %s, failed count: %s" % (
1795
- self.path_with_protocol, total_count, error_count)
1796
- raise S3UnknownError(
1797
- Exception(error_msg), self.path_with_protocol)
1798
-
1799
- def rename(self, dst_path: PathLike, overwrite: bool = True) -> 'S3Path':
1800
- '''
1933
+ error_msg = (
1934
+ "failed remove path: %s, total file count: %s, failed count: %s"
1935
+ % (self.path_with_protocol, total_count, error_count)
1936
+ )
1937
+ raise S3UnknownError(Exception(error_msg), self.path_with_protocol)
1938
+
1939
+ def rename(self, dst_path: PathLike, overwrite: bool = True) -> "S3Path":
1940
+ """
1801
1941
  Move s3 file path from src_url to dst_url
1802
1942
 
1803
1943
  :param dst_path: Given destination path
1804
1944
  :param overwrite: whether or not overwrite file when exists
1805
- '''
1945
+ """
1806
1946
  if self.is_file():
1807
1947
  self.copy(dst_path, overwrite=overwrite)
1808
1948
  else:
@@ -1810,25 +1950,28 @@ class S3Path(URIPath):
1810
1950
  self.remove(missing_ok=True)
1811
1951
  return self.from_path(dst_path)
1812
1952
 
1813
- def scan(self,
1814
- missing_ok: bool = True,
1815
- followlinks: bool = False) -> Iterator[str]:
1816
- '''
1953
+ def scan(self, missing_ok: bool = True, followlinks: bool = False) -> Iterator[str]:
1954
+ """
1817
1955
  Iteratively traverse only files in given s3 directory, in alphabetical order.
1818
1956
  Every iteration on generator yields a path string.
1819
1957
 
1820
1958
  If s3_url is a file path, yields the file only
1959
+
1821
1960
  If s3_url is a non-existent path, return an empty generator
1961
+
1822
1962
  If s3_url is a bucket path, return all file paths in the bucket
1963
+
1823
1964
  If s3_url is an empty bucket, return an empty generator
1824
- If s3_url doesn't contain any bucket, which is s3_url == 's3://', raise UnsupportedError. walk() on complete s3 is not supported in megfile
1825
1965
 
1826
- :param missing_ok: If False and there's no file in the directory, raise FileNotFoundError
1966
+ If s3_url doesn't contain any bucket, which is s3_url == 's3://',
1967
+ raise UnsupportedError. walk() on complete s3 is not supported in megfile
1968
+
1969
+ :param missing_ok: If False and there's no file in the directory,
1970
+ raise FileNotFoundError
1827
1971
  :raises: UnsupportedError
1828
1972
  :returns: A file path generator
1829
- '''
1830
- scan_stat_iter = self.scan_stat(
1831
- missing_ok=missing_ok, followlinks=followlinks)
1973
+ """
1974
+ scan_stat_iter = self.scan_stat(missing_ok=missing_ok, followlinks=followlinks)
1832
1975
 
1833
1976
  def create_generator() -> Iterator[str]:
1834
1977
  for file_entry in scan_stat_iter:
@@ -1836,83 +1979,89 @@ class S3Path(URIPath):
1836
1979
 
1837
1980
  return create_generator()
1838
1981
 
1839
- def scan_stat(self,
1840
- missing_ok: bool = True,
1841
- followlinks: bool = False) -> Iterator[FileEntry]:
1842
- '''
1982
+ def scan_stat(
1983
+ self, missing_ok: bool = True, followlinks: bool = False
1984
+ ) -> Iterator[FileEntry]:
1985
+ """
1843
1986
  Iteratively traverse only files in given directory, in alphabetical order.
1844
1987
  Every iteration on generator yields a tuple of path string and file stat
1845
1988
 
1846
- :param missing_ok: If False and there's no file in the directory, raise FileNotFoundError
1989
+ :param missing_ok: If False and there's no file in the directory,
1990
+ raise FileNotFoundError
1847
1991
  :raises: UnsupportedError
1848
1992
  :returns: A file path generator
1849
- '''
1993
+ """
1850
1994
  bucket, key = parse_s3_url(self.path_with_protocol)
1851
1995
  if not bucket:
1852
- raise UnsupportedError('Scan whole s3', self.path_with_protocol)
1996
+ raise UnsupportedError("Scan whole s3", self.path_with_protocol)
1853
1997
 
1854
1998
  def create_generator() -> Iterator[FileEntry]:
1855
1999
  if not self.is_dir():
1856
2000
  if self.is_file():
1857
- # On s3, file and directory may be of same name and level, so need to test the path is file or directory
2001
+ # On s3, file and directory may be of same name and level, so need
2002
+ # to test the path is file or directory
1858
2003
  yield FileEntry(
1859
- self.name, fspath(self.path_with_protocol),
1860
- self.stat(follow_symlinks=followlinks))
2004
+ self.name,
2005
+ fspath(self.path_with_protocol),
2006
+ self.stat(follow_symlinks=followlinks),
2007
+ )
1861
2008
  return
1862
2009
 
1863
- if not key.endswith('/') and self.is_file():
2010
+ if not key.endswith("/") and self.is_file():
1864
2011
  yield FileEntry(
1865
- self.name, fspath(self.path_with_protocol),
1866
- self.stat(follow_symlinks=followlinks))
2012
+ self.name,
2013
+ fspath(self.path_with_protocol),
2014
+ self.stat(follow_symlinks=followlinks),
2015
+ )
1867
2016
 
1868
2017
  prefix = _become_prefix(key)
1869
2018
  client = self._client
1870
2019
  with raise_s3_error(self.path_with_protocol):
1871
2020
  for resp in _list_objects_recursive(client, bucket, prefix):
1872
- for content in resp.get('Contents', []):
2021
+ for content in resp.get("Contents", []):
1873
2022
  full_path = s3_path_join(
1874
- f'{self._protocol_with_profile}://', bucket,
1875
- content['Key'])
2023
+ f"{self._protocol_with_profile}://", bucket, content["Key"]
2024
+ )
1876
2025
 
1877
2026
  if followlinks:
1878
2027
  try:
1879
- origin_path = self.from_path(
1880
- full_path).readlink()
2028
+ origin_path = self.from_path(full_path).readlink()
1881
2029
  yield FileEntry(
1882
2030
  origin_path.name,
1883
2031
  origin_path.path_with_protocol,
1884
- origin_path.lstat())
2032
+ origin_path.lstat(),
2033
+ )
1885
2034
  continue
1886
2035
  except S3NotALinkError:
1887
2036
  pass
1888
2037
 
1889
2038
  yield FileEntry(
1890
- S3Path(full_path).name, full_path,
1891
- _make_stat(content))
2039
+ S3Path(full_path).name, full_path, _make_stat(content)
2040
+ )
1892
2041
 
1893
2042
  return _create_missing_ok_generator(
1894
- create_generator(), missing_ok,
1895
- S3FileNotFoundError(
1896
- 'No match any file in: %r' % self.path_with_protocol))
2043
+ create_generator(),
2044
+ missing_ok,
2045
+ S3FileNotFoundError("No match any file in: %r" % self.path_with_protocol),
2046
+ )
1897
2047
 
1898
2048
  def scandir(self, followlinks: bool = False) -> Iterator[FileEntry]:
1899
- '''
2049
+ """
1900
2050
  Get all contents of given s3_url, the order of result is not guaranteed.
1901
2051
 
1902
2052
  :returns: All contents have prefix of s3_url
1903
2053
  :raises: S3FileNotFoundError, S3NotADirectoryError
1904
- '''
2054
+ """
1905
2055
  bucket, key = parse_s3_url(self.path_with_protocol)
1906
2056
  if not bucket and key:
1907
2057
  raise S3BucketNotFoundError(
1908
- 'Empty bucket name: %r' % self.path_with_protocol)
2058
+ "Empty bucket name: %r" % self.path_with_protocol
2059
+ )
1909
2060
 
1910
2061
  if self.is_file():
1911
- raise S3NotADirectoryError(
1912
- 'Not a directory: %r' % self.path_with_protocol)
2062
+ raise S3NotADirectoryError("Not a directory: %r" % self.path_with_protocol)
1913
2063
  elif not self.is_dir():
1914
- raise S3FileNotFoundError(
1915
- 'No such directory: %r' % self.path_with_protocol)
2064
+ raise S3FileNotFoundError("No such directory: %r" % self.path_with_protocol)
1916
2065
  prefix = _become_prefix(key)
1917
2066
  client = self._client
1918
2067
 
@@ -1921,34 +2070,38 @@ class S3Path(URIPath):
1921
2070
  def create_generator() -> Iterator[FileEntry]:
1922
2071
  with raise_s3_error(self.path_with_protocol):
1923
2072
 
1924
- def generate_s3_path(
1925
- protocol: str, bucket: str, key: str) -> str:
2073
+ def generate_s3_path(protocol: str, bucket: str, key: str) -> str:
1926
2074
  return "%s://%s/%s" % (protocol, bucket, key)
1927
2075
 
1928
2076
  if not bucket and not key: # list buckets
1929
2077
  response = client.list_buckets()
1930
- for content in response['Buckets']:
2078
+ for content in response["Buckets"]:
1931
2079
  yield FileEntry(
1932
- content['Name'], f"s3://{content['Name']}",
2080
+ content["Name"],
2081
+ f"s3://{content['Name']}",
1933
2082
  StatResult(
1934
- ctime=content['CreationDate'].timestamp(),
2083
+ ctime=content["CreationDate"].timestamp(),
1935
2084
  isdir=True,
1936
2085
  extra=content,
1937
- ))
2086
+ ),
2087
+ )
1938
2088
  return
1939
2089
 
1940
- for resp in _list_objects_recursive(client, bucket, prefix,
1941
- '/'):
1942
- for common_prefix in resp.get('CommonPrefixes', []):
2090
+ for resp in _list_objects_recursive(client, bucket, prefix, "/"):
2091
+ for common_prefix in resp.get("CommonPrefixes", []):
1943
2092
  yield FileEntry(
1944
- common_prefix['Prefix'][len(prefix):-1],
2093
+ common_prefix["Prefix"][len(prefix) : -1],
1945
2094
  generate_s3_path(
1946
- self._protocol_with_profile, bucket,
1947
- common_prefix['Prefix']),
1948
- StatResult(isdir=True, extra=common_prefix))
1949
- for content in resp.get('Contents', []):
2095
+ self._protocol_with_profile,
2096
+ bucket,
2097
+ common_prefix["Prefix"],
2098
+ ),
2099
+ StatResult(isdir=True, extra=common_prefix),
2100
+ )
2101
+ for content in resp.get("Contents", []):
1950
2102
  src_url = generate_s3_path(
1951
- self._protocol_with_profile, bucket, content['Key'])
2103
+ self._protocol_with_profile, bucket, content["Key"]
2104
+ )
1952
2105
 
1953
2106
  if followlinks:
1954
2107
  try:
@@ -1956,30 +2109,35 @@ class S3Path(URIPath):
1956
2109
  yield FileEntry(
1957
2110
  origin_path.name,
1958
2111
  origin_path.path_with_protocol,
1959
- origin_path.lstat())
2112
+ origin_path.lstat(),
2113
+ )
1960
2114
  continue
1961
2115
  except S3NotALinkError:
1962
2116
  pass
1963
2117
 
1964
2118
  yield FileEntry(
1965
- content['Key'][len(prefix):], src_url,
1966
- _make_stat(content))
2119
+ content["Key"][len(prefix) :], src_url, _make_stat(content)
2120
+ )
1967
2121
 
1968
2122
  return ContextIterator(create_generator())
1969
2123
 
1970
2124
  def _get_dir_stat(self) -> StatResult:
1971
- '''
1972
- Return StatResult of given s3_url directory, including:
2125
+ """
2126
+ Return StatResult of given s3_url directory, including:
1973
2127
 
1974
- 1. Directory size: the sum of all file size in it, including file in subdirectories (if exist).
1975
- The result excludes the size of directory itself. In other words, return 0 Byte on an empty directory path
1976
- 2. Last-modified time of directory: return the latest modified time of all file in it. The mtime of empty directory is 1970-01-01 00:00:00
2128
+ 1. Directory size: the sum of all file size in it,
2129
+ including file in subdirectories (if exist).
2130
+ The result excludes the size of directory itself.
2131
+ In other words, return 0 Byte on an empty directory path
2132
+ 2. Last-modified time of directory: return the latest modified time
2133
+ of all file in it. The mtime of empty directory is 1970-01-01 00:00:00
1977
2134
 
1978
2135
  :returns: An int indicates size in Bytes
1979
- '''
2136
+ """
1980
2137
  if not self.is_dir():
1981
2138
  raise S3FileNotFoundError(
1982
- 'No such file or directory: %r' % self.path_with_protocol)
2139
+ "No such file or directory: %r" % self.path_with_protocol
2140
+ )
1983
2141
 
1984
2142
  bucket, key = parse_s3_url(self.path_with_protocol)
1985
2143
  prefix = _become_prefix(key)
@@ -1988,29 +2146,34 @@ class S3Path(URIPath):
1988
2146
  mtime = 0.0
1989
2147
  with raise_s3_error(self.path_with_protocol):
1990
2148
  for resp in _list_objects_recursive(client, bucket, prefix):
1991
- for content in resp.get('Contents', []):
1992
- size += content['Size']
1993
- last_modified = content['LastModified'].timestamp()
2149
+ for content in resp.get("Contents", []):
2150
+ size += content["Size"]
2151
+ last_modified = content["LastModified"].timestamp()
1994
2152
  if mtime < last_modified:
1995
2153
  mtime = last_modified
1996
2154
 
1997
2155
  return StatResult(size=size, mtime=mtime, isdir=True)
1998
2156
 
1999
2157
  def stat(self, follow_symlinks=True) -> StatResult:
2000
- '''
2001
- Get StatResult of s3_url file, including file size and mtime, referring to s3_getsize and s3_getmtime
2158
+ """
2159
+ Get StatResult of s3_url file, including file size and mtime,
2160
+ referring to s3_getsize and s3_getmtime
2161
+
2162
+ If s3_url is not an existent path, which means s3_exist(s3_url) returns False,
2163
+ then raise S3FileNotFoundError
2002
2164
 
2003
- If s3_url is not an existent path, which means s3_exist(s3_url) returns False, then raise S3FileNotFoundError
2004
- If attempt to get StatResult of complete s3, such as s3_dir_url == 's3://', raise S3BucketNotFoundError
2165
+ If attempt to get StatResult of complete s3, such as s3_dir_url == 's3://',
2166
+ raise S3BucketNotFoundError
2005
2167
 
2006
2168
  :returns: StatResult
2007
2169
  :raises: S3FileNotFoundError, S3BucketNotFoundError
2008
- '''
2170
+ """
2009
2171
  islnk = False
2010
2172
  bucket, key = parse_s3_url(self.path_with_protocol)
2011
2173
  if not bucket:
2012
2174
  raise S3BucketNotFoundError(
2013
- 'Empty bucket name: %r' % self.path_with_protocol)
2175
+ "Empty bucket name: %r" % self.path_with_protocol
2176
+ )
2014
2177
 
2015
2178
  if not self.is_file():
2016
2179
  return self._get_dir_stat()
@@ -2018,68 +2181,80 @@ class S3Path(URIPath):
2018
2181
  client = self._client
2019
2182
  with raise_s3_error(self.path_with_protocol):
2020
2183
  content = client.head_object(Bucket=bucket, Key=key)
2021
- if 'Metadata' in content:
2184
+ if "Metadata" in content:
2022
2185
  metadata = dict(
2023
- (key.lower(), value)
2024
- for key, value in content['Metadata'].items())
2025
- if metadata and 'symlink_to' in metadata:
2186
+ (key.lower(), value) for key, value in content["Metadata"].items()
2187
+ )
2188
+ if metadata and "symlink_to" in metadata:
2026
2189
  islnk = True
2027
2190
  if islnk and follow_symlinks:
2028
- s3_url = metadata['symlink_to']
2191
+ s3_url = metadata["symlink_to"]
2029
2192
  bucket, key = parse_s3_url(s3_url)
2030
2193
  content = client.head_object(Bucket=bucket, Key=key)
2031
2194
  stat_record = StatResult(
2032
2195
  islnk=islnk,
2033
- size=content['ContentLength'],
2034
- mtime=content['LastModified'].timestamp(),
2035
- extra=content)
2196
+ size=content["ContentLength"],
2197
+ mtime=content["LastModified"].timestamp(),
2198
+ extra=content,
2199
+ )
2036
2200
  return stat_record
2037
2201
 
2038
2202
  def unlink(self, missing_ok: bool = False) -> None:
2039
- '''
2203
+ """
2040
2204
  Remove the file on s3
2041
2205
 
2042
- :param missing_ok: if False and target file not exists, raise S3FileNotFoundError
2206
+ :param missing_ok: if False and target file not exists,
2207
+ raise S3FileNotFoundError
2043
2208
  :raises: S3PermissionError, S3FileNotFoundError, S3IsADirectoryError
2044
- '''
2209
+ """
2045
2210
  bucket, key = parse_s3_url(self.path_with_protocol)
2046
- if not bucket or not key or key.endswith('/'):
2047
- raise S3IsADirectoryError(
2048
- 'Is a directory: %r' % self.path_with_protocol)
2211
+ if not bucket or not key or key.endswith("/"):
2212
+ raise S3IsADirectoryError("Is a directory: %r" % self.path_with_protocol)
2049
2213
  if not self.is_file():
2050
2214
  if missing_ok:
2051
2215
  return
2052
- raise S3FileNotFoundError(
2053
- 'No such file: %r' % self.path_with_protocol)
2216
+ raise S3FileNotFoundError("No such file: %r" % self.path_with_protocol)
2054
2217
 
2055
2218
  with raise_s3_error(self.path_with_protocol):
2056
2219
  self._client.delete_object(Bucket=bucket, Key=key)
2057
2220
 
2058
2221
  def walk(
2059
- self,
2060
- followlinks: bool = False
2222
+ self, followlinks: bool = False
2061
2223
  ) -> Iterator[Tuple[str, List[str], List[str]]]:
2062
- '''
2063
- Iteratively traverse the given s3 directory, in top-bottom order. In other words, firstly traverse parent directory, if subdirectories exist, traverse the subdirectories in alphabetical order.
2224
+ """
2225
+ Iteratively traverse the given s3 directory, in top-bottom order.
2226
+ In other words, firstly traverse parent directory, if subdirectories exist,
2227
+ traverse the subdirectories in alphabetical order.
2228
+
2064
2229
  Every iteration on generator yields a 3-tuple: (root, dirs, files)
2065
2230
 
2066
2231
  - root: Current s3 path;
2067
- - dirs: Name list of subdirectories in current directory. The list is sorted by name in ascending alphabetical order;
2068
- - files: Name list of files in current directory. The list is sorted by name in ascending alphabetical order;
2232
+ - dirs: Name list of subdirectories in current directory.
2233
+ The list is sorted by name in ascending alphabetical order;
2234
+ - files: Name list of files in current directory.
2235
+ The list is sorted by name in ascending alphabetical order;
2069
2236
 
2070
2237
  If s3_url is a file path, return an empty generator
2238
+
2071
2239
  If s3_url is a non-existent path, return an empty generator
2072
- If s3_url is a bucket path, bucket will be the top directory, and will be returned at first iteration of generator
2073
- If s3_url is an empty bucket, only yield one 3-tuple (notes: s3 doesn't have empty directory)
2074
- If s3_url doesn't contain any bucket, which is s3_url == 's3://', raise UnsupportedError. walk() on complete s3 is not supported in megfile
2075
2240
 
2076
- :param followlinks: whether followlinks is True or False, result is the same. Because s3 symlink not support dir.
2241
+ If s3_url is a bucket path, bucket will be the top directory,
2242
+ and will be returned at first iteration of generator
2243
+
2244
+ If s3_url is an empty bucket, only yield one 3-tuple
2245
+ (notes: s3 doesn't have empty directory)
2246
+
2247
+ If s3_url doesn't contain any bucket, which is s3_url == 's3://',
2248
+ raise UnsupportedError. walk() on complete s3 is not supported in megfile
2249
+
2250
+ :param followlinks: whether followlinks is True or False, result is the same.
2251
+ Because s3 symlink not support dir.
2077
2252
  :raises: UnsupportedError
2078
2253
  :returns: A 3-tuple generator
2079
- '''
2254
+ """
2080
2255
  bucket, key = parse_s3_url(self.path_with_protocol)
2081
2256
  if not bucket:
2082
- raise UnsupportedError('Walk whole s3', self.path_with_protocol)
2257
+ raise UnsupportedError("Walk whole s3", self.path_with_protocol)
2083
2258
 
2084
2259
  if not self.is_dir():
2085
2260
  return
@@ -2089,23 +2264,24 @@ class S3Path(URIPath):
2089
2264
  while len(stack) > 0:
2090
2265
  current = _become_prefix(stack.pop())
2091
2266
  dirs, files = [], []
2092
- for resp in _list_objects_recursive(client, bucket, current, '/'):
2093
- for common_prefix in resp.get('CommonPrefixes', []):
2094
- dirs.append(common_prefix['Prefix'][:-1])
2095
- for content in resp.get('Contents', []):
2096
- files.append(content['Key'])
2267
+ for resp in _list_objects_recursive(client, bucket, current, "/"):
2268
+ for common_prefix in resp.get("CommonPrefixes", []):
2269
+ dirs.append(common_prefix["Prefix"][:-1])
2270
+ for content in resp.get("Contents", []):
2271
+ files.append(content["Key"])
2097
2272
 
2098
2273
  dirs = sorted(dirs)
2099
2274
  stack.extend(reversed(dirs))
2100
2275
 
2101
- root = s3_path_join(
2102
- f'{self._protocol_with_profile}://', bucket, current)[:-1]
2103
- dirs = [path[len(current):] for path in dirs]
2104
- files = sorted(path[len(current):] for path in files)
2276
+ root = s3_path_join(f"{self._protocol_with_profile}://", bucket, current)[
2277
+ :-1
2278
+ ]
2279
+ dirs = [path[len(current) :] for path in dirs]
2280
+ files = sorted(path[len(current) :] for path in files)
2105
2281
  yield root, dirs, files
2106
2282
 
2107
2283
  def md5(self, recalculate: bool = False, followlinks: bool = False) -> str:
2108
- '''
2284
+ """
2109
2285
  Get md5 meta info in files that uploaded/copied via megfile
2110
2286
 
2111
2287
  If meta info is lost or non-existent, return None
@@ -2113,19 +2289,21 @@ class S3Path(URIPath):
2113
2289
  :param recalculate: calculate md5 in real-time or return s3 etag
2114
2290
  :param followlinks: If is True, calculate md5 for real file
2115
2291
  :returns: md5 meta info
2116
- '''
2292
+ """
2117
2293
  bucket, _ = parse_s3_url(self.path_with_protocol)
2118
2294
  if not bucket:
2119
2295
  raise S3BucketNotFoundError(
2120
- 'Empty bucket name: %r' % self.path_with_protocol)
2296
+ "Empty bucket name: %r" % self.path_with_protocol
2297
+ )
2121
2298
  stat = self.stat(follow_symlinks=followlinks)
2122
2299
  if stat.isdir:
2123
2300
  hash_md5 = hashlib.md5() # nosec
2124
2301
  for file_name in self.listdir():
2125
- chunk = S3Path(
2126
- s3_path_join(
2127
- self.path_with_protocol,
2128
- file_name)).md5(recalculate=recalculate).encode()
2302
+ chunk = (
2303
+ S3Path(s3_path_join(self.path_with_protocol, file_name))
2304
+ .md5(recalculate=recalculate)
2305
+ .encode()
2306
+ )
2129
2307
  hash_md5.update(chunk)
2130
2308
  return hash_md5.hexdigest()
2131
2309
  if recalculate:
@@ -2135,45 +2313,45 @@ class S3Path(URIPath):
2135
2313
  path_instance = self.readlink()
2136
2314
  except S3NotALinkError:
2137
2315
  pass
2138
- with path_instance.open('rb') as f:
2316
+ with path_instance.open("rb") as f:
2139
2317
  return calculate_md5(f)
2140
- return stat.extra.get('ETag', '')[1:-1]
2318
+ return stat.extra.get("ETag", "")[1:-1]
2141
2319
 
2142
2320
  def copy(
2143
- self,
2144
- dst_url: PathLike,
2145
- callback: Optional[Callable[[int], None]] = None,
2146
- followlinks: bool = False,
2147
- overwrite: bool = True) -> None:
2148
- ''' File copy on S3
2321
+ self,
2322
+ dst_url: PathLike,
2323
+ callback: Optional[Callable[[int], None]] = None,
2324
+ followlinks: bool = False,
2325
+ overwrite: bool = True,
2326
+ ) -> None:
2327
+ """File copy on S3
2149
2328
  Copy content of file on `src_path` to `dst_path`.
2150
- It's caller's responsibility to ensure the s3_isfile(src_url) == True
2329
+ It's caller's responsibility to ensure the s3_isfile(src_url) is True
2151
2330
 
2152
2331
  :param dst_path: Target file path
2153
- :param callback: Called periodically during copy, and the input parameter is the data size (in bytes) of copy since the last call
2332
+ :param callback: Called periodically during copy, and the input parameter is
2333
+ the data size (in bytes) of copy since the last call
2154
2334
  :param followlinks: False if regard symlink as file, else True
2155
2335
  :param overwrite: whether or not overwrite file when exists, default is True
2156
- '''
2336
+ """
2157
2337
  if not overwrite and self.from_path(dst_url).is_file():
2158
2338
  return
2159
2339
 
2160
2340
  src_url = self.path_with_protocol
2161
2341
  src_bucket, src_key = parse_s3_url(src_url)
2162
2342
  dst_bucket, dst_key = parse_s3_url(dst_url)
2163
- if dst_bucket == src_bucket and src_key.rstrip('/') == dst_key.rstrip(
2164
- '/'):
2165
- raise SameFileError(
2166
- f"'{src_url}' and '{dst_url}' are the same file")
2343
+ if dst_bucket == src_bucket and src_key.rstrip("/") == dst_key.rstrip("/"):
2344
+ raise SameFileError(f"'{src_url}' and '{dst_url}' are the same file")
2167
2345
 
2168
2346
  if not src_bucket:
2169
- raise S3BucketNotFoundError('Empty bucket name: %r' % src_url)
2347
+ raise S3BucketNotFoundError("Empty bucket name: %r" % src_url)
2170
2348
  if self.is_dir():
2171
- raise S3IsADirectoryError('Is a directory: %r' % src_url)
2349
+ raise S3IsADirectoryError("Is a directory: %r" % src_url)
2172
2350
 
2173
2351
  if not dst_bucket:
2174
- raise S3BucketNotFoundError('Empty bucket name: %r' % dst_url)
2175
- if not dst_key or dst_key.endswith('/'):
2176
- raise S3IsADirectoryError('Is a directory: %r' % dst_url)
2352
+ raise S3BucketNotFoundError("Empty bucket name: %r" % dst_url)
2353
+ if not dst_key or dst_key.endswith("/"):
2354
+ raise S3IsADirectoryError("Is a directory: %r" % dst_url)
2177
2355
 
2178
2356
  if followlinks:
2179
2357
  try:
@@ -2184,30 +2362,31 @@ class S3Path(URIPath):
2184
2362
 
2185
2363
  with raise_s3_error(f"'{src_url}' or '{dst_url}'"):
2186
2364
  self._client.copy(
2187
- {
2188
- 'Bucket': src_bucket,
2189
- 'Key': src_key,
2190
- },
2365
+ {"Bucket": src_bucket, "Key": src_key},
2191
2366
  Bucket=dst_bucket,
2192
2367
  Key=dst_key,
2193
- Callback=callback)
2368
+ Callback=callback,
2369
+ )
2194
2370
 
2195
2371
  def sync(
2196
- self,
2197
- dst_url: PathLike,
2198
- followlinks: bool = False,
2199
- force: bool = False,
2200
- overwrite: bool = True) -> None:
2201
- '''
2372
+ self,
2373
+ dst_url: PathLike,
2374
+ followlinks: bool = False,
2375
+ force: bool = False,
2376
+ overwrite: bool = True,
2377
+ ) -> None:
2378
+ """
2202
2379
  Copy file/directory on src_url to dst_url
2203
2380
 
2204
2381
  :param dst_url: Given destination path
2205
2382
  :param followlinks: False if regard symlink as file, else True
2206
- :param force: Sync file forcible, do not ignore same files, priority is higher than 'overwrite', default is False
2383
+ :param force: Sync file forcible, do not ignore same files,
2384
+ priority is higher than 'overwrite', default is False
2207
2385
  :param overwrite: whether or not overwrite file when exists, default is True
2208
- '''
2386
+ """
2209
2387
  for src_file_path, dst_file_path in _s3_scan_pairs(
2210
- self.path_with_protocol, dst_url):
2388
+ self.path_with_protocol, dst_url
2389
+ ):
2211
2390
  src_file_path = self.from_path(src_file_path)
2212
2391
  dst_file_path = self.from_path(dst_file_path)
2213
2392
 
@@ -2216,29 +2395,30 @@ class S3Path(URIPath):
2216
2395
  elif not overwrite and dst_file_path.exists():
2217
2396
  continue
2218
2397
  elif dst_file_path.exists() and is_same_file(
2219
- src_file_path.stat(), dst_file_path.stat(), 'copy'):
2398
+ src_file_path.stat(), dst_file_path.stat(), "copy"
2399
+ ):
2220
2400
  continue
2221
2401
 
2222
2402
  src_file_path.copy(dst_file_path, followlinks=followlinks)
2223
2403
 
2224
2404
  def symlink(self, dst_path: PathLike) -> None:
2225
- '''
2405
+ """
2226
2406
  Create a symbolic link pointing to src_path named dst_path.
2227
2407
 
2228
2408
  :param dst_path: Destination path
2229
2409
  :raises: S3NameTooLongError, S3BucketNotFoundError, S3IsADirectoryError
2230
- '''
2410
+ """
2231
2411
  if len(fspath(self._s3_path).encode()) > 1024:
2232
- raise S3NameTooLongError('File name too long: %r' % dst_path)
2412
+ raise S3NameTooLongError("File name too long: %r" % dst_path)
2233
2413
  src_bucket, src_key = parse_s3_url(self.path_with_protocol)
2234
2414
  dst_bucket, dst_key = parse_s3_url(dst_path)
2235
2415
 
2236
2416
  if not src_bucket:
2237
- raise S3BucketNotFoundError('Empty bucket name: %r' % self.path)
2417
+ raise S3BucketNotFoundError("Empty bucket name: %r" % self.path)
2238
2418
  if not dst_bucket:
2239
- raise S3BucketNotFoundError('Empty bucket name: %r' % dst_path)
2240
- if not dst_key or dst_key.endswith('/'):
2241
- raise S3IsADirectoryError('Is a directory: %r' % dst_path)
2419
+ raise S3BucketNotFoundError("Empty bucket name: %r" % dst_path)
2420
+ if not dst_key or dst_key.endswith("/"):
2421
+ raise S3IsADirectoryError("Is a directory: %r" % dst_path)
2242
2422
 
2243
2423
  src_path = self._s3_path
2244
2424
  try:
@@ -2247,93 +2427,97 @@ class S3Path(URIPath):
2247
2427
  pass
2248
2428
  with raise_s3_error(dst_path):
2249
2429
  self._client.put_object(
2250
- Bucket=dst_bucket,
2251
- Key=dst_key,
2252
- Metadata={"symlink_to": src_path})
2430
+ Bucket=dst_bucket, Key=dst_key, Metadata={"symlink_to": src_path}
2431
+ )
2253
2432
 
2254
- def readlink(self) -> 'S3Path':
2255
- '''
2256
- Return a S3Path instance representing the path to which the symbolic link points.
2433
+ def readlink(self) -> "S3Path":
2434
+ """
2435
+ Return a S3Path instance representing the path to which the symbolic link points
2257
2436
 
2258
- :returns: Return a S3Path instance representing the path to which the symbolic link points.
2259
- :raises: S3NameTooLongError, S3BucketNotFoundError, S3IsADirectoryError, S3NotALinkError
2260
- '''
2437
+ :returns: Return a S3Path instance representing the path to
2438
+ which the symbolic link points.
2439
+ :raises: S3NameTooLongError, S3BucketNotFoundError, S3IsADirectoryError,
2440
+ S3NotALinkError
2441
+ """
2261
2442
  bucket, key = parse_s3_url(self.path_with_protocol)
2262
2443
  if not bucket:
2263
2444
  raise S3BucketNotFoundError(
2264
- 'Empty bucket name: %r' % self.path_with_protocol)
2265
- if not key or key.endswith('/'):
2266
- raise S3IsADirectoryError(
2267
- 'Is a directory: %r' % self.path_with_protocol)
2445
+ "Empty bucket name: %r" % self.path_with_protocol
2446
+ )
2447
+ if not key or key.endswith("/"):
2448
+ raise S3IsADirectoryError("Is a directory: %r" % self.path_with_protocol)
2268
2449
  metadata = self._s3_get_metadata()
2269
2450
 
2270
- if not 'symlink_to' in metadata:
2271
- raise S3NotALinkError('Not a link: %r' % self.path_with_protocol)
2451
+ if "symlink_to" not in metadata:
2452
+ raise S3NotALinkError("Not a link: %r" % self.path_with_protocol)
2272
2453
  else:
2273
- return self.from_path(metadata['symlink_to'])
2454
+ return self.from_path(metadata["symlink_to"])
2274
2455
 
2275
2456
  def is_symlink(self) -> bool:
2276
- '''
2457
+ """
2277
2458
  Test whether a path is link
2278
2459
 
2279
2460
  :returns: True if a path is link, else False
2280
2461
  :raises: S3NotALinkError
2281
- '''
2462
+ """
2282
2463
  bucket, key = parse_s3_url(self.path_with_protocol)
2283
2464
  if not bucket:
2284
2465
  return False
2285
- if not key or key.endswith('/'):
2466
+ if not key or key.endswith("/"):
2286
2467
  return False
2287
2468
  metadata = self._s3_get_metadata()
2288
- return 'symlink_to' in metadata
2469
+ return "symlink_to" in metadata
2289
2470
 
2290
2471
  def save(self, file_object: BinaryIO):
2291
- '''Write the opened binary stream to specified path, but the stream won't be closed
2472
+ """Write the opened binary stream to specified path,
2473
+ but the stream won't be closed
2292
2474
 
2293
2475
  :param file_object: Stream to be read
2294
- '''
2476
+ """
2295
2477
  bucket, key = parse_s3_url(self.path_with_protocol)
2296
2478
  if not bucket:
2297
2479
  raise S3BucketNotFoundError(
2298
- 'Empty bucket name: %r' % self.path_with_protocol)
2299
- if not key or key.endswith('/'):
2300
- raise S3IsADirectoryError(
2301
- 'Is a directory: %r' % self.path_with_protocol)
2480
+ "Empty bucket name: %r" % self.path_with_protocol
2481
+ )
2482
+ if not key or key.endswith("/"):
2483
+ raise S3IsADirectoryError("Is a directory: %r" % self.path_with_protocol)
2302
2484
 
2303
2485
  with raise_s3_error(self.path_with_protocol):
2304
2486
  self._client.upload_fileobj(file_object, Bucket=bucket, Key=key)
2305
2487
 
2306
2488
  def open(
2307
- self,
2308
- mode: str = 'r',
2309
- *,
2310
- encoding: Optional[str] = None,
2311
- errors: Optional[str] = None,
2312
- s3_open_func: Callable = s3_open,
2313
- **kwargs) -> IO:
2489
+ self,
2490
+ mode: str = "r",
2491
+ *,
2492
+ encoding: Optional[str] = None,
2493
+ errors: Optional[str] = None,
2494
+ s3_open_func: Callable = s3_open,
2495
+ **kwargs,
2496
+ ) -> IO:
2314
2497
  return s3_open_func(
2315
2498
  self,
2316
2499
  mode,
2317
2500
  encoding=encoding,
2318
2501
  errors=errors,
2319
- **necessary_params(s3_open_func, **kwargs))
2502
+ **necessary_params(s3_open_func, **kwargs),
2503
+ )
2320
2504
 
2321
- def absolute(self) -> 'S3Path':
2322
- '''
2323
- Make the path absolute, without normalization or resolving symlinks. Returns a new path object
2324
- '''
2505
+ def absolute(self) -> "S3Path":
2506
+ """
2507
+ Make the path absolute, without normalization or resolving symlinks.
2508
+ Returns a new path object
2509
+ """
2325
2510
  return self
2326
2511
 
2327
- def cwd(self) -> 'S3Path':
2328
- '''Return current working directory
2512
+ def cwd(self) -> "S3Path":
2513
+ """Return current working directory
2329
2514
 
2330
2515
  returns: Current working directory
2331
- '''
2516
+ """
2332
2517
  return self.from_path(self.path_with_protocol)
2333
2518
 
2334
2519
 
2335
2520
  class MultiPartWriter:
2336
-
2337
2521
  def __init__(self, client, path: PathLike) -> None:
2338
2522
  self._client = client
2339
2523
  self._multipart_upload_info = []
@@ -2342,7 +2526,8 @@ class MultiPartWriter:
2342
2526
  self._bucket = bucket
2343
2527
  self._key = key
2344
2528
  self._upload_id = self._client.create_multipart_upload(
2345
- Bucket=self._bucket, Key=self._key)['UploadId']
2529
+ Bucket=self._bucket, Key=self._key
2530
+ )["UploadId"]
2346
2531
 
2347
2532
  def upload_part(self, part_num: int, file_obj: io.BytesIO) -> None:
2348
2533
  response = self._client.upload_part(
@@ -2353,70 +2538,60 @@ class MultiPartWriter:
2353
2538
  Key=self._key,
2354
2539
  )
2355
2540
  self._multipart_upload_info.append(
2356
- {
2357
- 'PartNumber': part_num,
2358
- 'ETag': response['ETag']
2359
- })
2541
+ {"PartNumber": part_num, "ETag": response["ETag"]}
2542
+ )
2360
2543
 
2361
2544
  def upload_part_by_paths(
2362
- self, part_num: int, paths: List[Tuple[PathLike, str]]) -> None:
2545
+ self, part_num: int, paths: List[Tuple[PathLike, str]]
2546
+ ) -> None:
2363
2547
  file_obj = io.BytesIO()
2364
2548
 
2365
- def get_object(
2366
- client, bucket, key, range_str: Optional[str] = None) -> bytes:
2549
+ def get_object(client, bucket, key, range_str: Optional[str] = None) -> bytes:
2367
2550
  if range_str:
2368
- return client.get_object(
2369
- Bucket=bucket, Key=key, Range=range_str)['Body'].read()
2551
+ return client.get_object(Bucket=bucket, Key=key, Range=range_str)[
2552
+ "Body"
2553
+ ].read()
2370
2554
  else:
2371
- return client.get_object(Bucket=bucket, Key=key)['Body'].read()
2555
+ return client.get_object(Bucket=bucket, Key=key)["Body"].read()
2372
2556
 
2373
2557
  get_object = patch_method(
2374
- get_object,
2375
- max_retries=max_retries,
2376
- should_retry=s3_should_retry,
2558
+ get_object, max_retries=max_retries, should_retry=s3_should_retry
2377
2559
  )
2378
2560
  for path, bytes_range in paths:
2379
2561
  bucket, key = parse_s3_url(path)
2380
2562
  if bytes_range:
2381
- file_obj.write(
2382
- get_object(self._client, bucket, key, bytes_range))
2563
+ file_obj.write(get_object(self._client, bucket, key, bytes_range))
2383
2564
  else:
2384
2565
  file_obj.write(get_object(self._client, bucket, key))
2385
2566
  file_obj.seek(0, os.SEEK_SET)
2386
2567
  self.upload_part(part_num, file_obj)
2387
2568
 
2388
2569
  def upload_part_copy(
2389
- self,
2390
- part_num: int,
2391
- path: PathLike,
2392
- copy_source_range: Optional[str] = None) -> None:
2570
+ self, part_num: int, path: PathLike, copy_source_range: Optional[str] = None
2571
+ ) -> None:
2393
2572
  bucket, key = parse_s3_url(path)
2394
2573
  params = dict(
2395
2574
  UploadId=self._upload_id,
2396
2575
  PartNumber=part_num,
2397
- CopySource={
2398
- 'Bucket': bucket,
2399
- 'Key': key
2400
- },
2576
+ CopySource={"Bucket": bucket, "Key": key},
2401
2577
  Bucket=self._bucket,
2402
2578
  Key=self._key,
2403
2579
  )
2404
2580
  if copy_source_range:
2405
- params['CopySourceRange'] = copy_source_range
2581
+ params["CopySourceRange"] = copy_source_range
2406
2582
  response = self._client.upload_part_copy(**params)
2407
2583
  self._multipart_upload_info.append(
2408
- {
2409
- 'PartNumber': part_num,
2410
- 'ETag': response['CopyPartResult']['ETag']
2411
- })
2584
+ {"PartNumber": part_num, "ETag": response["CopyPartResult"]["ETag"]}
2585
+ )
2412
2586
 
2413
2587
  def close(self):
2414
- self._multipart_upload_info.sort(key=lambda t: t['PartNumber'])
2588
+ self._multipart_upload_info.sort(key=lambda t: t["PartNumber"])
2415
2589
  self._client.complete_multipart_upload(
2416
2590
  UploadId=self._upload_id,
2417
2591
  Bucket=self._bucket,
2418
2592
  Key=self._key,
2419
- MultipartUpload={'Parts': self._multipart_upload_info})
2593
+ MultipartUpload={"Parts": self._multipart_upload_info},
2594
+ )
2420
2595
 
2421
2596
  def __enter__(self):
2422
2597
  return self