megfile 4.1.3.post1__py3-none-any.whl → 4.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
megfile/s3_path.py CHANGED
@@ -290,14 +290,23 @@ def get_s3_client(
290
290
  profile_name=profile_name,
291
291
  )
292
292
 
293
+ try:
294
+ default_config = botocore.config.Config(
295
+ connect_timeout=5,
296
+ max_pool_connections=GLOBAL_MAX_WORKERS,
297
+ request_checksum_calculation="when_required",
298
+ response_checksum_validation="when_required",
299
+ )
300
+ except TypeError: # botocore < 1.36.0
301
+ default_config = botocore.config.Config(
302
+ connect_timeout=5,
303
+ max_pool_connections=GLOBAL_MAX_WORKERS,
304
+ )
305
+
293
306
  if config:
294
- config = botocore.config.Config(
295
- connect_timeout=5, max_pool_connections=GLOBAL_MAX_WORKERS
296
- ).merge(config)
307
+ config = default_config.merge(config)
297
308
  else:
298
- config = botocore.config.Config(
299
- connect_timeout=5, max_pool_connections=GLOBAL_MAX_WORKERS
300
- )
309
+ config = default_config
301
310
 
302
311
  addressing_style = get_env_var("AWS_S3_ADDRESSING_STYLE", profile_name=profile_name)
303
312
  if addressing_style:
@@ -366,12 +375,9 @@ def _list_all_buckets(profile_name: Optional[str] = None) -> List[str]:
366
375
  return [content["Name"] for content in response["Buckets"]]
367
376
 
368
377
 
369
- def _parse_s3_url_ignore_brace(s3_url: str) -> Tuple[str, str]:
370
- s3_url = fspath(s3_url)
371
- s3_scheme, right_part = s3_url[:5], s3_url[5:]
372
- if s3_scheme != "s3://":
373
- raise ValueError("Not a s3 url: %r" % s3_url)
378
+ def _parse_s3_url_ignore_brace(s3_pathname: str) -> Tuple[str, str]:
374
379
  left_brace = False
380
+ right_part = s3_pathname.split("://", maxsplit=1)[1]
375
381
  for current_index, current_character in enumerate(right_part):
376
382
  if current_character == "/" and left_brace is False:
377
383
  return right_part[:current_index], right_part[current_index + 1 :]
@@ -382,9 +388,14 @@ def _parse_s3_url_ignore_brace(s3_url: str) -> Tuple[str, str]:
382
388
  return right_part, ""
383
389
 
384
390
 
385
- def _group_s3path_by_bucket(
386
- s3_pathname: str, profile_name: Optional[str] = None
387
- ) -> List[str]:
391
+ def _parse_s3_url_profile(s3_pathname: str) -> Tuple[str, Optional[str]]:
392
+ protocol = s3_pathname.split("://", maxsplit=1)[0]
393
+ profile_name = protocol[3:] if protocol.startswith("s3+") else None
394
+ return protocol, profile_name
395
+
396
+
397
+ def _group_s3path_by_bucket(s3_pathname: str) -> List[str]:
398
+ protocol, profile_name = _parse_s3_url_profile(s3_pathname)
388
399
  bucket, key = _parse_s3_url_ignore_brace(s3_pathname)
389
400
  if not bucket:
390
401
  if not key:
@@ -395,8 +406,8 @@ def _group_s3path_by_bucket(
395
406
 
396
407
  def generate_s3_path(bucket: str, key: str) -> str:
397
408
  if key:
398
- return "s3://%s/%s" % (bucket, key)
399
- return "s3://%s%s" % (bucket, "/" if s3_pathname.endswith("/") else "")
409
+ return f"{protocol}://{bucket}/{key}"
410
+ return f"{protocol}://{bucket}{'/' if s3_pathname.endswith('/') else ''}"
400
411
 
401
412
  all_bucket = lru_cache(maxsize=1)(_list_all_buckets)
402
413
  for bucket_name in ungloblize(bucket):
@@ -418,23 +429,11 @@ def _group_s3path_by_bucket(
418
429
 
419
430
 
420
431
  def _s3_split_magic_ignore_brace(s3_pathname: str) -> Tuple[str, str]:
421
- if not s3_pathname:
422
- raise ValueError("s3_pathname: %s", s3_pathname)
423
-
424
- has_protocol = False
425
- if s3_pathname.startswith("s3://"):
426
- has_protocol = True
427
- s3_pathname = s3_pathname[5:]
428
-
429
- has_delimiter = False
430
- if s3_pathname.endswith("/"):
431
- has_delimiter = True
432
- s3_pathname = s3_pathname[:-1]
433
-
434
- normal_parts = []
435
- magic_parts = []
436
- left_brace = False
437
- left_index = 0
432
+ left_brace, left_index = False, 0
433
+ normal_parts, magic_parts = [], []
434
+ s3_pathname_with_suffix = s3_pathname
435
+ s3_pathname = s3_pathname.rstrip("/")
436
+ suffix = (len(s3_pathname_with_suffix) - len(s3_pathname)) * "/"
438
437
  for current_index, current_character in enumerate(s3_pathname):
439
438
  if current_character == "/" and left_brace is False:
440
439
  if has_magic_ignore_brace(s3_pathname[left_index:current_index]):
@@ -454,18 +453,13 @@ def _s3_split_magic_ignore_brace(s3_pathname: str) -> Tuple[str, str]:
454
453
  magic_parts.append(s3_pathname[left_index:])
455
454
  else:
456
455
  normal_parts.append(s3_pathname[left_index:])
457
-
458
- if has_protocol and normal_parts:
459
- normal_parts.insert(0, "s3:/")
460
- elif has_protocol:
461
- magic_parts.insert(0, "s3:/")
462
-
463
- if has_delimiter and magic_parts:
464
- magic_parts.append("")
465
- elif has_delimiter:
466
- normal_parts.append("")
467
-
468
- return "/".join(normal_parts), "/".join(magic_parts)
456
+ top_dir, magic_part = "/".join(normal_parts), "/".join(magic_parts)
457
+ if suffix:
458
+ if magic_part:
459
+ magic_part += suffix
460
+ else:
461
+ top_dir += suffix
462
+ return top_dir, magic_part
469
463
 
470
464
 
471
465
  def _group_s3path_by_prefix(s3_pathname: str) -> List[str]:
@@ -493,17 +487,15 @@ def _become_prefix(prefix: str) -> str:
493
487
  def _s3_split_magic(s3_pathname: str) -> Tuple[str, str]:
494
488
  if not has_magic(s3_pathname):
495
489
  return s3_pathname, ""
496
- delimiter = "/"
497
490
  normal_parts = []
498
491
  magic_parts = []
499
- all_parts = s3_pathname.split(delimiter)
492
+ all_parts = s3_pathname.split("/")
500
493
  for i, part in enumerate(all_parts):
501
- if not has_magic(part):
502
- normal_parts.append(part)
503
- else:
494
+ if has_magic(part):
504
495
  magic_parts = all_parts[i:]
505
496
  break
506
- return delimiter.join(normal_parts), delimiter.join(magic_parts)
497
+ normal_parts.append(part)
498
+ return "/".join(normal_parts), "/".join(magic_parts)
507
499
 
508
500
 
509
501
  def _list_objects_recursive(s3_client, bucket: str, prefix: str, delimiter: str = ""):
@@ -574,12 +566,12 @@ def _s3_glob_stat_single_path(
574
566
  recursive: bool = True,
575
567
  missing_ok: bool = True,
576
568
  followlinks: bool = False,
577
- profile_name: Optional[str] = None,
578
569
  ) -> Iterator[FileEntry]:
579
570
  s3_pathname = fspath(s3_pathname)
580
571
  if not recursive:
581
572
  # If not recursive, replace ** with *
582
573
  s3_pathname = re.sub(r"\*{2,}", "*", s3_pathname)
574
+ protocol, profile_name = _parse_s3_url_profile(s3_pathname)
583
575
  top_dir, wildcard_part = _s3_split_magic(s3_pathname)
584
576
  search_dir = wildcard_part.endswith("/")
585
577
 
@@ -596,7 +588,7 @@ def _s3_glob_stat_single_path(
596
588
  if not has_magic(_s3_pathname):
597
589
  _s3_pathname_obj = S3Path(_s3_pathname)
598
590
  if _s3_pathname_obj.is_file():
599
- stat = S3Path(_s3_pathname).stat(follow_symlinks=followlinks)
591
+ stat = _s3_pathname_obj.stat(follow_symlinks=followlinks)
600
592
  yield FileEntry(_s3_pathname_obj.name, _s3_pathname_obj.path, stat)
601
593
  if _s3_pathname_obj.is_dir():
602
594
  yield FileEntry(
@@ -616,7 +608,7 @@ def _s3_glob_stat_single_path(
616
608
  with raise_s3_error(_s3_pathname, S3BucketNotFoundError):
617
609
  for resp in _list_objects_recursive(client, bucket, prefix, delimiter):
618
610
  for content in resp.get("Contents", []):
619
- path = s3_path_join("s3://", bucket, content["Key"])
611
+ path = s3_path_join(f"{protocol}://", bucket, content["Key"])
620
612
  if not search_dir and pattern.match(path):
621
613
  yield FileEntry(S3Path(path).name, path, _make_stat(content))
622
614
  dirname = os.path.dirname(path)
@@ -629,7 +621,9 @@ def _s3_glob_stat_single_path(
629
621
  )
630
622
  dirname = os.path.dirname(dirname)
631
623
  for common_prefix in resp.get("CommonPrefixes", []):
632
- path = s3_path_join("s3://", bucket, common_prefix["Prefix"])
624
+ path = s3_path_join(
625
+ f"{protocol}://", bucket, common_prefix["Prefix"]
626
+ )
633
627
  dirname = os.path.dirname(path)
634
628
  if dirname not in dirnames and dirname != top_dir:
635
629
  dirnames.add(dirname)
@@ -1595,26 +1589,19 @@ class S3Path(URIPath):
1595
1589
  :returns: A generator contains tuples of path and file stat,
1596
1590
  in which paths match `s3_pathname`
1597
1591
  """
1598
- glob_path = self._s3_path
1592
+ glob_path = self.path_with_protocol
1599
1593
  if pattern:
1600
- glob_path = self.joinpath(pattern)._s3_path
1594
+ glob_path = self.joinpath(pattern).path_with_protocol
1601
1595
  s3_pathname = fspath(glob_path)
1602
1596
 
1603
1597
  def create_generator():
1604
- for group_s3_pathname_1 in _group_s3path_by_bucket(
1605
- s3_pathname, self._profile_name
1606
- ):
1598
+ for group_s3_pathname_1 in _group_s3path_by_bucket(s3_pathname):
1607
1599
  for group_s3_pathname_2 in _group_s3path_by_prefix(group_s3_pathname_1):
1608
1600
  for file_entry in _s3_glob_stat_single_path(
1609
1601
  group_s3_pathname_2,
1610
1602
  recursive,
1611
1603
  missing_ok,
1612
- profile_name=self._profile_name,
1613
1604
  ):
1614
- if self._profile_name:
1615
- file_entry = file_entry._replace(
1616
- path=f"{self._protocol_with_profile}://{file_entry.path[5:]}"
1617
- )
1618
1605
  yield file_entry
1619
1606
 
1620
1607
  return _create_missing_ok_generator(
@@ -2044,7 +2031,7 @@ class S3Path(URIPath):
2044
2031
  for content in response["Buckets"]:
2045
2032
  yield FileEntry(
2046
2033
  content["Name"],
2047
- f"s3://{content['Name']}",
2034
+ f"{self._protocol_with_profile}://{content['Name']}",
2048
2035
  StatResult(
2049
2036
  ctime=content["CreationDate"].timestamp(),
2050
2037
  isdir=True,
megfile/version.py CHANGED
@@ -1 +1 @@
1
- VERSION = "4.1.3.post1"
1
+ VERSION = "4.1.4"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: megfile
3
- Version: 4.1.3.post1
3
+ Version: 4.1.4
4
4
  Summary: Megvii file operation library
5
5
  Author-email: megvii <megfile@megvii.com>
6
6
  Project-URL: Homepage, https://github.com/megvii-research/megfile
@@ -11,14 +11,14 @@ megfile/http_path.py,sha256=yRIk-fNbrsY8rUS5KVOfocS_PS520dX5KOs8lImpLaY,14173
11
11
  megfile/interfaces.py,sha256=p4UvVZpeLx5djd6bqqDaygIx_s-_AxIVj-gudTch4JE,8467
12
12
  megfile/pathlike.py,sha256=3Hnw-fn6RcIe9iPrJt00QdHSA--UfDyxnVBuZ_ymYYQ,31278
13
13
  megfile/s3.py,sha256=abBxnI7RIyn7n7qjGszP1VruYd6Gi9I8QnUOvsHkx1Y,16325
14
- megfile/s3_path.py,sha256=zelXhlRVOVSWBE6HJz0vXrrcRzSuj6Cnjd9HLGwPbCM,93644
14
+ megfile/s3_path.py,sha256=S8iulho1btVLLvNU-OtcskkbaAC8yNXnBrFNnF3fwS8,93510
15
15
  megfile/sftp.py,sha256=uBcLQs-j6Q-q-sWAdd-pgi5Qmb_kq7boJM-0sCfcNO0,26540
16
16
  megfile/sftp_path.py,sha256=Wz4VcQ0pBUuWDGMSxPpPbutrT09mnY6jZNiAqTi5tO4,43840
17
17
  megfile/smart.py,sha256=Sae2KJzaU0k_qV_Bk0YifOMq8WsV5qQ2pGInDRF546I,36411
18
18
  megfile/smart_path.py,sha256=Up_6xNZ2019iSzMn_JAU_1H--z-AP6O7SxdXGdeTG0c,7659
19
19
  megfile/stdio.py,sha256=ZwxsnJNJYIT7Iyg5pIw4qiyH8bszG6oAhEJuR-hXGG4,658
20
20
  megfile/stdio_path.py,sha256=cxaDr8rtisTPnN-rjtaEpqQnshwiqwXFUJBM9xWY7Cg,2711
21
- megfile/version.py,sha256=eKLHLHdKHcrWOl_XPErEVRYmoIEhUtehLCugzxSUR-E,25
21
+ megfile/version.py,sha256=oeG77LWvRtozNToerGYb2jXBRGCoB_YTBpQNMOpT1Gc,19
22
22
  megfile/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  megfile/lib/base_prefetch_reader.py,sha256=uxVwYknOjc8hLF7q_T2QKMsBqFcrf411ZsuK25CN1eQ,12848
24
24
  megfile/lib/combine_reader.py,sha256=Kp2wEloOUpTlIU7dve87MBpSzmIM-F9OtpTawAjFkiU,4828
@@ -43,10 +43,10 @@ megfile/lib/stdio_handler.py,sha256=IDdgENLQlhigEwkLL4zStueVSzdWg7xVcTF_koof_Ek,
43
43
  megfile/lib/url.py,sha256=ER32pWy9Q2MAk3TraAaNEBWIqUeBmLuM57ol2cs7-Ks,103
44
44
  megfile/utils/__init__.py,sha256=xAzmICA0MtAbg-I2yPfeHjA1N4CiMP4sBrC9BgrfZLw,10151
45
45
  megfile/utils/mutex.py,sha256=asb8opGLgK22RiuBJUnfsvB8LnMmodP8KzCVHKmQBWA,2561
46
- megfile-4.1.3.post1.dist-info/licenses/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
47
- megfile-4.1.3.post1.dist-info/licenses/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
48
- megfile-4.1.3.post1.dist-info/METADATA,sha256=oI1b4MMW9GRz3mav_TZ2MSX6C8y1z1QebHkcCZcrcB0,9601
49
- megfile-4.1.3.post1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
50
- megfile-4.1.3.post1.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
51
- megfile-4.1.3.post1.dist-info/top_level.txt,sha256=i3rMgdU1ZAJekAceojhA-bkm3749PzshtRmLTbeLUPQ,8
52
- megfile-4.1.3.post1.dist-info/RECORD,,
46
+ megfile-4.1.4.dist-info/licenses/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
47
+ megfile-4.1.4.dist-info/licenses/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
48
+ megfile-4.1.4.dist-info/METADATA,sha256=eK8epRuIQh0igo6DhBw30mRrSw4ssvgt4_voFo-zJYc,9595
49
+ megfile-4.1.4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
50
+ megfile-4.1.4.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
51
+ megfile-4.1.4.dist-info/top_level.txt,sha256=i3rMgdU1ZAJekAceojhA-bkm3749PzshtRmLTbeLUPQ,8
52
+ megfile-4.1.4.dist-info/RECORD,,