megfile 4.1.3.post1__py3-none-any.whl → 4.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megfile/s3_path.py +53 -66
- megfile/version.py +1 -1
- {megfile-4.1.3.post1.dist-info → megfile-4.1.4.dist-info}/METADATA +1 -1
- {megfile-4.1.3.post1.dist-info → megfile-4.1.4.dist-info}/RECORD +9 -9
- {megfile-4.1.3.post1.dist-info → megfile-4.1.4.dist-info}/WHEEL +0 -0
- {megfile-4.1.3.post1.dist-info → megfile-4.1.4.dist-info}/entry_points.txt +0 -0
- {megfile-4.1.3.post1.dist-info → megfile-4.1.4.dist-info}/licenses/LICENSE +0 -0
- {megfile-4.1.3.post1.dist-info → megfile-4.1.4.dist-info}/licenses/LICENSE.pyre +0 -0
- {megfile-4.1.3.post1.dist-info → megfile-4.1.4.dist-info}/top_level.txt +0 -0
megfile/s3_path.py
CHANGED
|
@@ -290,14 +290,23 @@ def get_s3_client(
|
|
|
290
290
|
profile_name=profile_name,
|
|
291
291
|
)
|
|
292
292
|
|
|
293
|
+
try:
|
|
294
|
+
default_config = botocore.config.Config(
|
|
295
|
+
connect_timeout=5,
|
|
296
|
+
max_pool_connections=GLOBAL_MAX_WORKERS,
|
|
297
|
+
request_checksum_calculation="when_required",
|
|
298
|
+
response_checksum_validation="when_required",
|
|
299
|
+
)
|
|
300
|
+
except TypeError: # botocore < 1.36.0
|
|
301
|
+
default_config = botocore.config.Config(
|
|
302
|
+
connect_timeout=5,
|
|
303
|
+
max_pool_connections=GLOBAL_MAX_WORKERS,
|
|
304
|
+
)
|
|
305
|
+
|
|
293
306
|
if config:
|
|
294
|
-
config =
|
|
295
|
-
connect_timeout=5, max_pool_connections=GLOBAL_MAX_WORKERS
|
|
296
|
-
).merge(config)
|
|
307
|
+
config = default_config.merge(config)
|
|
297
308
|
else:
|
|
298
|
-
config =
|
|
299
|
-
connect_timeout=5, max_pool_connections=GLOBAL_MAX_WORKERS
|
|
300
|
-
)
|
|
309
|
+
config = default_config
|
|
301
310
|
|
|
302
311
|
addressing_style = get_env_var("AWS_S3_ADDRESSING_STYLE", profile_name=profile_name)
|
|
303
312
|
if addressing_style:
|
|
@@ -366,12 +375,9 @@ def _list_all_buckets(profile_name: Optional[str] = None) -> List[str]:
|
|
|
366
375
|
return [content["Name"] for content in response["Buckets"]]
|
|
367
376
|
|
|
368
377
|
|
|
369
|
-
def _parse_s3_url_ignore_brace(
|
|
370
|
-
s3_url = fspath(s3_url)
|
|
371
|
-
s3_scheme, right_part = s3_url[:5], s3_url[5:]
|
|
372
|
-
if s3_scheme != "s3://":
|
|
373
|
-
raise ValueError("Not a s3 url: %r" % s3_url)
|
|
378
|
+
def _parse_s3_url_ignore_brace(s3_pathname: str) -> Tuple[str, str]:
|
|
374
379
|
left_brace = False
|
|
380
|
+
right_part = s3_pathname.split("://", maxsplit=1)[1]
|
|
375
381
|
for current_index, current_character in enumerate(right_part):
|
|
376
382
|
if current_character == "/" and left_brace is False:
|
|
377
383
|
return right_part[:current_index], right_part[current_index + 1 :]
|
|
@@ -382,9 +388,14 @@ def _parse_s3_url_ignore_brace(s3_url: str) -> Tuple[str, str]:
|
|
|
382
388
|
return right_part, ""
|
|
383
389
|
|
|
384
390
|
|
|
385
|
-
def
|
|
386
|
-
s3_pathname
|
|
387
|
-
|
|
391
|
+
def _parse_s3_url_profile(s3_pathname: str) -> Tuple[str, Optional[str]]:
|
|
392
|
+
protocol = s3_pathname.split("://", maxsplit=1)[0]
|
|
393
|
+
profile_name = protocol[3:] if protocol.startswith("s3+") else None
|
|
394
|
+
return protocol, profile_name
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def _group_s3path_by_bucket(s3_pathname: str) -> List[str]:
|
|
398
|
+
protocol, profile_name = _parse_s3_url_profile(s3_pathname)
|
|
388
399
|
bucket, key = _parse_s3_url_ignore_brace(s3_pathname)
|
|
389
400
|
if not bucket:
|
|
390
401
|
if not key:
|
|
@@ -395,8 +406,8 @@ def _group_s3path_by_bucket(
|
|
|
395
406
|
|
|
396
407
|
def generate_s3_path(bucket: str, key: str) -> str:
|
|
397
408
|
if key:
|
|
398
|
-
return "
|
|
399
|
-
return "
|
|
409
|
+
return f"{protocol}://{bucket}/{key}"
|
|
410
|
+
return f"{protocol}://{bucket}{'/' if s3_pathname.endswith('/') else ''}"
|
|
400
411
|
|
|
401
412
|
all_bucket = lru_cache(maxsize=1)(_list_all_buckets)
|
|
402
413
|
for bucket_name in ungloblize(bucket):
|
|
@@ -418,23 +429,11 @@ def _group_s3path_by_bucket(
|
|
|
418
429
|
|
|
419
430
|
|
|
420
431
|
def _s3_split_magic_ignore_brace(s3_pathname: str) -> Tuple[str, str]:
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
has_protocol = True
|
|
427
|
-
s3_pathname = s3_pathname[5:]
|
|
428
|
-
|
|
429
|
-
has_delimiter = False
|
|
430
|
-
if s3_pathname.endswith("/"):
|
|
431
|
-
has_delimiter = True
|
|
432
|
-
s3_pathname = s3_pathname[:-1]
|
|
433
|
-
|
|
434
|
-
normal_parts = []
|
|
435
|
-
magic_parts = []
|
|
436
|
-
left_brace = False
|
|
437
|
-
left_index = 0
|
|
432
|
+
left_brace, left_index = False, 0
|
|
433
|
+
normal_parts, magic_parts = [], []
|
|
434
|
+
s3_pathname_with_suffix = s3_pathname
|
|
435
|
+
s3_pathname = s3_pathname.rstrip("/")
|
|
436
|
+
suffix = (len(s3_pathname_with_suffix) - len(s3_pathname)) * "/"
|
|
438
437
|
for current_index, current_character in enumerate(s3_pathname):
|
|
439
438
|
if current_character == "/" and left_brace is False:
|
|
440
439
|
if has_magic_ignore_brace(s3_pathname[left_index:current_index]):
|
|
@@ -454,18 +453,13 @@ def _s3_split_magic_ignore_brace(s3_pathname: str) -> Tuple[str, str]:
|
|
|
454
453
|
magic_parts.append(s3_pathname[left_index:])
|
|
455
454
|
else:
|
|
456
455
|
normal_parts.append(s3_pathname[left_index:])
|
|
457
|
-
|
|
458
|
-
if
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
magic_parts.append("")
|
|
465
|
-
elif has_delimiter:
|
|
466
|
-
normal_parts.append("")
|
|
467
|
-
|
|
468
|
-
return "/".join(normal_parts), "/".join(magic_parts)
|
|
456
|
+
top_dir, magic_part = "/".join(normal_parts), "/".join(magic_parts)
|
|
457
|
+
if suffix:
|
|
458
|
+
if magic_part:
|
|
459
|
+
magic_part += suffix
|
|
460
|
+
else:
|
|
461
|
+
top_dir += suffix
|
|
462
|
+
return top_dir, magic_part
|
|
469
463
|
|
|
470
464
|
|
|
471
465
|
def _group_s3path_by_prefix(s3_pathname: str) -> List[str]:
|
|
@@ -493,17 +487,15 @@ def _become_prefix(prefix: str) -> str:
|
|
|
493
487
|
def _s3_split_magic(s3_pathname: str) -> Tuple[str, str]:
|
|
494
488
|
if not has_magic(s3_pathname):
|
|
495
489
|
return s3_pathname, ""
|
|
496
|
-
delimiter = "/"
|
|
497
490
|
normal_parts = []
|
|
498
491
|
magic_parts = []
|
|
499
|
-
all_parts = s3_pathname.split(
|
|
492
|
+
all_parts = s3_pathname.split("/")
|
|
500
493
|
for i, part in enumerate(all_parts):
|
|
501
|
-
if
|
|
502
|
-
normal_parts.append(part)
|
|
503
|
-
else:
|
|
494
|
+
if has_magic(part):
|
|
504
495
|
magic_parts = all_parts[i:]
|
|
505
496
|
break
|
|
506
|
-
|
|
497
|
+
normal_parts.append(part)
|
|
498
|
+
return "/".join(normal_parts), "/".join(magic_parts)
|
|
507
499
|
|
|
508
500
|
|
|
509
501
|
def _list_objects_recursive(s3_client, bucket: str, prefix: str, delimiter: str = ""):
|
|
@@ -574,12 +566,12 @@ def _s3_glob_stat_single_path(
|
|
|
574
566
|
recursive: bool = True,
|
|
575
567
|
missing_ok: bool = True,
|
|
576
568
|
followlinks: bool = False,
|
|
577
|
-
profile_name: Optional[str] = None,
|
|
578
569
|
) -> Iterator[FileEntry]:
|
|
579
570
|
s3_pathname = fspath(s3_pathname)
|
|
580
571
|
if not recursive:
|
|
581
572
|
# If not recursive, replace ** with *
|
|
582
573
|
s3_pathname = re.sub(r"\*{2,}", "*", s3_pathname)
|
|
574
|
+
protocol, profile_name = _parse_s3_url_profile(s3_pathname)
|
|
583
575
|
top_dir, wildcard_part = _s3_split_magic(s3_pathname)
|
|
584
576
|
search_dir = wildcard_part.endswith("/")
|
|
585
577
|
|
|
@@ -596,7 +588,7 @@ def _s3_glob_stat_single_path(
|
|
|
596
588
|
if not has_magic(_s3_pathname):
|
|
597
589
|
_s3_pathname_obj = S3Path(_s3_pathname)
|
|
598
590
|
if _s3_pathname_obj.is_file():
|
|
599
|
-
stat =
|
|
591
|
+
stat = _s3_pathname_obj.stat(follow_symlinks=followlinks)
|
|
600
592
|
yield FileEntry(_s3_pathname_obj.name, _s3_pathname_obj.path, stat)
|
|
601
593
|
if _s3_pathname_obj.is_dir():
|
|
602
594
|
yield FileEntry(
|
|
@@ -616,7 +608,7 @@ def _s3_glob_stat_single_path(
|
|
|
616
608
|
with raise_s3_error(_s3_pathname, S3BucketNotFoundError):
|
|
617
609
|
for resp in _list_objects_recursive(client, bucket, prefix, delimiter):
|
|
618
610
|
for content in resp.get("Contents", []):
|
|
619
|
-
path = s3_path_join("
|
|
611
|
+
path = s3_path_join(f"{protocol}://", bucket, content["Key"])
|
|
620
612
|
if not search_dir and pattern.match(path):
|
|
621
613
|
yield FileEntry(S3Path(path).name, path, _make_stat(content))
|
|
622
614
|
dirname = os.path.dirname(path)
|
|
@@ -629,7 +621,9 @@ def _s3_glob_stat_single_path(
|
|
|
629
621
|
)
|
|
630
622
|
dirname = os.path.dirname(dirname)
|
|
631
623
|
for common_prefix in resp.get("CommonPrefixes", []):
|
|
632
|
-
path = s3_path_join(
|
|
624
|
+
path = s3_path_join(
|
|
625
|
+
f"{protocol}://", bucket, common_prefix["Prefix"]
|
|
626
|
+
)
|
|
633
627
|
dirname = os.path.dirname(path)
|
|
634
628
|
if dirname not in dirnames and dirname != top_dir:
|
|
635
629
|
dirnames.add(dirname)
|
|
@@ -1595,26 +1589,19 @@ class S3Path(URIPath):
|
|
|
1595
1589
|
:returns: A generator contains tuples of path and file stat,
|
|
1596
1590
|
in which paths match `s3_pathname`
|
|
1597
1591
|
"""
|
|
1598
|
-
glob_path = self.
|
|
1592
|
+
glob_path = self.path_with_protocol
|
|
1599
1593
|
if pattern:
|
|
1600
|
-
glob_path = self.joinpath(pattern).
|
|
1594
|
+
glob_path = self.joinpath(pattern).path_with_protocol
|
|
1601
1595
|
s3_pathname = fspath(glob_path)
|
|
1602
1596
|
|
|
1603
1597
|
def create_generator():
|
|
1604
|
-
for group_s3_pathname_1 in _group_s3path_by_bucket(
|
|
1605
|
-
s3_pathname, self._profile_name
|
|
1606
|
-
):
|
|
1598
|
+
for group_s3_pathname_1 in _group_s3path_by_bucket(s3_pathname):
|
|
1607
1599
|
for group_s3_pathname_2 in _group_s3path_by_prefix(group_s3_pathname_1):
|
|
1608
1600
|
for file_entry in _s3_glob_stat_single_path(
|
|
1609
1601
|
group_s3_pathname_2,
|
|
1610
1602
|
recursive,
|
|
1611
1603
|
missing_ok,
|
|
1612
|
-
profile_name=self._profile_name,
|
|
1613
1604
|
):
|
|
1614
|
-
if self._profile_name:
|
|
1615
|
-
file_entry = file_entry._replace(
|
|
1616
|
-
path=f"{self._protocol_with_profile}://{file_entry.path[5:]}"
|
|
1617
|
-
)
|
|
1618
1605
|
yield file_entry
|
|
1619
1606
|
|
|
1620
1607
|
return _create_missing_ok_generator(
|
|
@@ -2044,7 +2031,7 @@ class S3Path(URIPath):
|
|
|
2044
2031
|
for content in response["Buckets"]:
|
|
2045
2032
|
yield FileEntry(
|
|
2046
2033
|
content["Name"],
|
|
2047
|
-
f"
|
|
2034
|
+
f"{self._protocol_with_profile}://{content['Name']}",
|
|
2048
2035
|
StatResult(
|
|
2049
2036
|
ctime=content["CreationDate"].timestamp(),
|
|
2050
2037
|
isdir=True,
|
megfile/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
VERSION = "4.1.
|
|
1
|
+
VERSION = "4.1.4"
|
|
@@ -11,14 +11,14 @@ megfile/http_path.py,sha256=yRIk-fNbrsY8rUS5KVOfocS_PS520dX5KOs8lImpLaY,14173
|
|
|
11
11
|
megfile/interfaces.py,sha256=p4UvVZpeLx5djd6bqqDaygIx_s-_AxIVj-gudTch4JE,8467
|
|
12
12
|
megfile/pathlike.py,sha256=3Hnw-fn6RcIe9iPrJt00QdHSA--UfDyxnVBuZ_ymYYQ,31278
|
|
13
13
|
megfile/s3.py,sha256=abBxnI7RIyn7n7qjGszP1VruYd6Gi9I8QnUOvsHkx1Y,16325
|
|
14
|
-
megfile/s3_path.py,sha256=
|
|
14
|
+
megfile/s3_path.py,sha256=S8iulho1btVLLvNU-OtcskkbaAC8yNXnBrFNnF3fwS8,93510
|
|
15
15
|
megfile/sftp.py,sha256=uBcLQs-j6Q-q-sWAdd-pgi5Qmb_kq7boJM-0sCfcNO0,26540
|
|
16
16
|
megfile/sftp_path.py,sha256=Wz4VcQ0pBUuWDGMSxPpPbutrT09mnY6jZNiAqTi5tO4,43840
|
|
17
17
|
megfile/smart.py,sha256=Sae2KJzaU0k_qV_Bk0YifOMq8WsV5qQ2pGInDRF546I,36411
|
|
18
18
|
megfile/smart_path.py,sha256=Up_6xNZ2019iSzMn_JAU_1H--z-AP6O7SxdXGdeTG0c,7659
|
|
19
19
|
megfile/stdio.py,sha256=ZwxsnJNJYIT7Iyg5pIw4qiyH8bszG6oAhEJuR-hXGG4,658
|
|
20
20
|
megfile/stdio_path.py,sha256=cxaDr8rtisTPnN-rjtaEpqQnshwiqwXFUJBM9xWY7Cg,2711
|
|
21
|
-
megfile/version.py,sha256=
|
|
21
|
+
megfile/version.py,sha256=oeG77LWvRtozNToerGYb2jXBRGCoB_YTBpQNMOpT1Gc,19
|
|
22
22
|
megfile/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
23
|
megfile/lib/base_prefetch_reader.py,sha256=uxVwYknOjc8hLF7q_T2QKMsBqFcrf411ZsuK25CN1eQ,12848
|
|
24
24
|
megfile/lib/combine_reader.py,sha256=Kp2wEloOUpTlIU7dve87MBpSzmIM-F9OtpTawAjFkiU,4828
|
|
@@ -43,10 +43,10 @@ megfile/lib/stdio_handler.py,sha256=IDdgENLQlhigEwkLL4zStueVSzdWg7xVcTF_koof_Ek,
|
|
|
43
43
|
megfile/lib/url.py,sha256=ER32pWy9Q2MAk3TraAaNEBWIqUeBmLuM57ol2cs7-Ks,103
|
|
44
44
|
megfile/utils/__init__.py,sha256=xAzmICA0MtAbg-I2yPfeHjA1N4CiMP4sBrC9BgrfZLw,10151
|
|
45
45
|
megfile/utils/mutex.py,sha256=asb8opGLgK22RiuBJUnfsvB8LnMmodP8KzCVHKmQBWA,2561
|
|
46
|
-
megfile-4.1.
|
|
47
|
-
megfile-4.1.
|
|
48
|
-
megfile-4.1.
|
|
49
|
-
megfile-4.1.
|
|
50
|
-
megfile-4.1.
|
|
51
|
-
megfile-4.1.
|
|
52
|
-
megfile-4.1.
|
|
46
|
+
megfile-4.1.4.dist-info/licenses/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
|
47
|
+
megfile-4.1.4.dist-info/licenses/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
|
|
48
|
+
megfile-4.1.4.dist-info/METADATA,sha256=eK8epRuIQh0igo6DhBw30mRrSw4ssvgt4_voFo-zJYc,9595
|
|
49
|
+
megfile-4.1.4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
50
|
+
megfile-4.1.4.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
|
|
51
|
+
megfile-4.1.4.dist-info/top_level.txt,sha256=i3rMgdU1ZAJekAceojhA-bkm3749PzshtRmLTbeLUPQ,8
|
|
52
|
+
megfile-4.1.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|