megfile 4.0.4__py3-none-any.whl → 4.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megfile/errors.py +2 -2
- megfile/fs.py +1 -1
- megfile/fs_path.py +18 -11
- megfile/hdfs.py +7 -7
- megfile/hdfs_path.py +39 -25
- megfile/lib/base_prefetch_reader.py +7 -13
- megfile/lib/combine_reader.py +1 -1
- megfile/lib/glob.py +6 -16
- megfile/lib/s3_cached_handler.py +3 -3
- megfile/lib/s3_limited_seekable_writer.py +1 -1
- megfile/lib/s3_memory_handler.py +3 -3
- megfile/lib/s3_pipe_handler.py +4 -4
- megfile/pathlike.py +5 -5
- megfile/s3.py +11 -21
- megfile/s3_path.py +174 -164
- megfile/sftp.py +7 -4
- megfile/sftp_path.py +33 -27
- megfile/smart.py +3 -4
- megfile/stdio.py +2 -1
- megfile/stdio_path.py +1 -0
- megfile/version.py +1 -1
- {megfile-4.0.4.dist-info → megfile-4.1.0.dist-info}/METADATA +2 -2
- {megfile-4.0.4.dist-info → megfile-4.1.0.dist-info}/RECORD +28 -28
- {megfile-4.0.4.dist-info → megfile-4.1.0.dist-info}/WHEEL +1 -1
- {megfile-4.0.4.dist-info → megfile-4.1.0.dist-info}/LICENSE +0 -0
- {megfile-4.0.4.dist-info → megfile-4.1.0.dist-info}/LICENSE.pyre +0 -0
- {megfile-4.0.4.dist-info → megfile-4.1.0.dist-info}/entry_points.txt +0 -0
- {megfile-4.0.4.dist-info → megfile-4.1.0.dist-info}/top_level.txt +0 -0
megfile/s3_path.py
CHANGED
|
@@ -200,10 +200,9 @@ def parse_s3_url(s3_url: PathLike) -> Tuple[str, str]:
|
|
|
200
200
|
|
|
201
201
|
def get_scoped_config(profile_name: Optional[str] = None) -> Dict:
|
|
202
202
|
try:
|
|
203
|
-
|
|
203
|
+
return get_s3_session(profile_name=profile_name)._session.get_scoped_config()
|
|
204
204
|
except botocore.exceptions.ProfileNotFound:
|
|
205
|
-
|
|
206
|
-
return session._session.get_scoped_config()
|
|
205
|
+
return {}
|
|
207
206
|
|
|
208
207
|
|
|
209
208
|
@lru_cache()
|
|
@@ -225,15 +224,12 @@ def get_endpoint_url(profile_name: Optional[str] = None) -> str:
|
|
|
225
224
|
if environ_endpoint_url:
|
|
226
225
|
warning_endpoint_url(environ_key, environ_endpoint_url)
|
|
227
226
|
return environ_endpoint_url
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
return config_endpoint_url
|
|
235
|
-
except botocore.exceptions.ProfileNotFound:
|
|
236
|
-
pass
|
|
227
|
+
config = get_scoped_config(profile_name=profile_name)
|
|
228
|
+
config_endpoint_url = config.get("s3", {}).get("endpoint_url")
|
|
229
|
+
config_endpoint_url = config_endpoint_url or config.get("endpoint_url")
|
|
230
|
+
if config_endpoint_url:
|
|
231
|
+
warning_endpoint_url("~/.aws/config", config_endpoint_url)
|
|
232
|
+
return config_endpoint_url
|
|
237
233
|
return endpoint_url
|
|
238
234
|
|
|
239
235
|
|
|
@@ -540,6 +536,40 @@ def _make_stat(content: Dict[str, Any]):
|
|
|
540
536
|
)
|
|
541
537
|
|
|
542
538
|
|
|
539
|
+
class StatResultForIsLink:
|
|
540
|
+
def __init__(self, path: "S3Path", *args, **kwargs):
|
|
541
|
+
self._islnk = None
|
|
542
|
+
self._path = path
|
|
543
|
+
self._stat_result = StatResult(*args, **kwargs)
|
|
544
|
+
|
|
545
|
+
def __getattr__(self, name):
|
|
546
|
+
return getattr(self._stat_result, name)
|
|
547
|
+
|
|
548
|
+
@property
|
|
549
|
+
def islnk(self) -> bool:
|
|
550
|
+
if self._islnk is None:
|
|
551
|
+
self._islnk = self._path.is_symlink()
|
|
552
|
+
return self._islnk
|
|
553
|
+
|
|
554
|
+
def is_file(self) -> bool:
|
|
555
|
+
return not self._stat_result.isdir or self.islnk
|
|
556
|
+
|
|
557
|
+
def is_dir(self) -> bool:
|
|
558
|
+
return self._stat_result.isdir and not self.islnk
|
|
559
|
+
|
|
560
|
+
def is_symlink(self) -> bool:
|
|
561
|
+
return self.islnk
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
def _make_stat_without_metadata(content, path):
|
|
565
|
+
return StatResultForIsLink(
|
|
566
|
+
path=path,
|
|
567
|
+
size=content["Size"],
|
|
568
|
+
mtime=content["LastModified"].timestamp(),
|
|
569
|
+
extra=content,
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
|
|
543
573
|
def _s3_glob_stat_single_path(
|
|
544
574
|
s3_pathname: PathLike,
|
|
545
575
|
recursive: bool = True,
|
|
@@ -564,11 +594,6 @@ def _s3_glob_stat_single_path(
|
|
|
564
594
|
return False
|
|
565
595
|
|
|
566
596
|
def create_generator(_s3_pathname) -> Iterator[FileEntry]:
|
|
567
|
-
top_dir_with_profile = top_dir
|
|
568
|
-
if profile_name:
|
|
569
|
-
top_dir_with_profile = f"s3+{profile_name}://{top_dir[5:]}"
|
|
570
|
-
if not S3Path(top_dir_with_profile).exists():
|
|
571
|
-
return
|
|
572
597
|
if not has_magic(_s3_pathname):
|
|
573
598
|
_s3_pathname_obj = S3Path(_s3_pathname)
|
|
574
599
|
if _s3_pathname_obj.is_file():
|
|
@@ -589,7 +614,7 @@ def _s3_glob_stat_single_path(
|
|
|
589
614
|
bucket, key = parse_s3_url(top_dir)
|
|
590
615
|
prefix = _become_prefix(key)
|
|
591
616
|
client = get_s3_client_with_cache(profile_name=profile_name)
|
|
592
|
-
with raise_s3_error(_s3_pathname):
|
|
617
|
+
with raise_s3_error(_s3_pathname, S3BucketNotFoundError):
|
|
593
618
|
for resp in _list_objects_recursive(client, bucket, prefix, delimiter):
|
|
594
619
|
for content in resp.get("Contents", []):
|
|
595
620
|
path = s3_path_join("s3://", bucket, content["Key"])
|
|
@@ -1123,10 +1148,9 @@ def s3_download(
|
|
|
1123
1148
|
"Empty bucket name: %r" % src_url.path_with_protocol
|
|
1124
1149
|
)
|
|
1125
1150
|
|
|
1126
|
-
if not src_url.exists():
|
|
1127
|
-
raise S3FileNotFoundError("File not found: %r" % src_url.path_with_protocol)
|
|
1128
|
-
|
|
1129
1151
|
if not src_url.is_file():
|
|
1152
|
+
if not src_url.is_dir():
|
|
1153
|
+
raise S3FileNotFoundError("File not found: %r" % src_url.path_with_protocol)
|
|
1130
1154
|
raise S3IsADirectoryError("Is a directory: %r" % src_url.path_with_protocol)
|
|
1131
1155
|
|
|
1132
1156
|
dst_directory = os.path.dirname(dst_path.path_without_protocol) # pyre-ignore[6]
|
|
@@ -1221,7 +1245,6 @@ def s3_load_content(
|
|
|
1221
1245
|
s3_url,
|
|
1222
1246
|
start: Optional[int] = None,
|
|
1223
1247
|
stop: Optional[int] = None,
|
|
1224
|
-
followlinks: bool = False,
|
|
1225
1248
|
) -> bytes:
|
|
1226
1249
|
"""
|
|
1227
1250
|
Get specified file from [start, stop) in bytes
|
|
@@ -1236,11 +1259,6 @@ def s3_load_content(
|
|
|
1236
1259
|
return client.get_object(Bucket=bucket, Key=key, Range=range_str)["Body"].read()
|
|
1237
1260
|
|
|
1238
1261
|
s3_url = S3Path(s3_url)
|
|
1239
|
-
if followlinks:
|
|
1240
|
-
try:
|
|
1241
|
-
s3_url = s3_url.readlink()
|
|
1242
|
-
except S3NotALinkError:
|
|
1243
|
-
pass
|
|
1244
1262
|
|
|
1245
1263
|
bucket, key = parse_s3_url(s3_url.path_with_protocol)
|
|
1246
1264
|
if not bucket:
|
|
@@ -1430,7 +1448,7 @@ class S3Path(URIPath):
|
|
|
1430
1448
|
raise error
|
|
1431
1449
|
return {}
|
|
1432
1450
|
|
|
1433
|
-
def access(self, mode: Access = Access.READ
|
|
1451
|
+
def access(self, mode: Access = Access.READ) -> bool:
|
|
1434
1452
|
"""
|
|
1435
1453
|
Test if path has access permission described by mode
|
|
1436
1454
|
|
|
@@ -1438,11 +1456,6 @@ class S3Path(URIPath):
|
|
|
1438
1456
|
:returns: bool, if the bucket of s3_url has read/write access.
|
|
1439
1457
|
"""
|
|
1440
1458
|
s3_url = self.path_with_protocol
|
|
1441
|
-
if followlinks:
|
|
1442
|
-
try:
|
|
1443
|
-
s3_url = self.readlink().path_with_protocol
|
|
1444
|
-
except S3NotALinkError:
|
|
1445
|
-
pass
|
|
1446
1459
|
bucket, key = parse_s3_url(s3_url) # only check bucket accessibility
|
|
1447
1460
|
if not bucket:
|
|
1448
1461
|
raise Exception("No available bucket")
|
|
@@ -1537,7 +1550,6 @@ class S3Path(URIPath):
|
|
|
1537
1550
|
pattern,
|
|
1538
1551
|
recursive: bool = True,
|
|
1539
1552
|
missing_ok: bool = True,
|
|
1540
|
-
followlinks: bool = False,
|
|
1541
1553
|
) -> List["S3Path"]:
|
|
1542
1554
|
"""Return s3 path list in ascending alphabetical order,
|
|
1543
1555
|
in which path matches glob pattern
|
|
@@ -1558,7 +1570,6 @@ class S3Path(URIPath):
|
|
|
1558
1570
|
pattern=pattern,
|
|
1559
1571
|
recursive=recursive,
|
|
1560
1572
|
missing_ok=missing_ok,
|
|
1561
|
-
followlinks=followlinks,
|
|
1562
1573
|
)
|
|
1563
1574
|
)
|
|
1564
1575
|
|
|
@@ -1567,7 +1578,6 @@ class S3Path(URIPath):
|
|
|
1567
1578
|
pattern,
|
|
1568
1579
|
recursive: bool = True,
|
|
1569
1580
|
missing_ok: bool = True,
|
|
1570
|
-
followlinks: bool = False,
|
|
1571
1581
|
) -> Iterator[FileEntry]:
|
|
1572
1582
|
"""Return a generator contains tuples of path and file stat,
|
|
1573
1583
|
in ascending alphabetical order, in which path matches glob pattern
|
|
@@ -1598,7 +1608,6 @@ class S3Path(URIPath):
|
|
|
1598
1608
|
group_s3_pathname_2,
|
|
1599
1609
|
recursive,
|
|
1600
1610
|
missing_ok,
|
|
1601
|
-
followlinks=followlinks,
|
|
1602
1611
|
profile_name=self._profile_name,
|
|
1603
1612
|
):
|
|
1604
1613
|
if self._profile_name:
|
|
@@ -1618,7 +1627,6 @@ class S3Path(URIPath):
|
|
|
1618
1627
|
pattern,
|
|
1619
1628
|
recursive: bool = True,
|
|
1620
1629
|
missing_ok: bool = True,
|
|
1621
|
-
followlinks: bool = False,
|
|
1622
1630
|
) -> Iterator["S3Path"]:
|
|
1623
1631
|
"""Return s3 path iterator in ascending alphabetical order,
|
|
1624
1632
|
in which path matches glob pattern
|
|
@@ -1638,7 +1646,6 @@ class S3Path(URIPath):
|
|
|
1638
1646
|
pattern=pattern,
|
|
1639
1647
|
recursive=recursive,
|
|
1640
1648
|
missing_ok=missing_ok,
|
|
1641
|
-
followlinks=followlinks,
|
|
1642
1649
|
):
|
|
1643
1650
|
yield self.from_path(file_entry.path)
|
|
1644
1651
|
|
|
@@ -1702,29 +1709,30 @@ class S3Path(URIPath):
|
|
|
1702
1709
|
return False
|
|
1703
1710
|
return True
|
|
1704
1711
|
|
|
1705
|
-
def listdir(self
|
|
1712
|
+
def listdir(self) -> List[str]:
|
|
1706
1713
|
"""
|
|
1707
1714
|
Get all contents of given s3_url. The result is in ascending alphabetical order.
|
|
1708
1715
|
|
|
1716
|
+
:param missing_ok: if True and target directory not exists return empty list,
|
|
1717
|
+
default is True.
|
|
1709
1718
|
:returns: All contents have prefix of s3_url in ascending alphabetical order
|
|
1710
1719
|
:raises: S3FileNotFoundError, S3NotADirectoryError
|
|
1711
1720
|
"""
|
|
1712
|
-
|
|
1713
|
-
|
|
1721
|
+
with self.scandir() as entries:
|
|
1722
|
+
return sorted([entry.name for entry in entries])
|
|
1714
1723
|
|
|
1715
|
-
def iterdir(
|
|
1716
|
-
self, followlinks: bool = False, missing_ok: bool = True
|
|
1717
|
-
) -> Iterator["S3Path"]:
|
|
1724
|
+
def iterdir(self) -> Iterator["S3Path"]:
|
|
1718
1725
|
"""
|
|
1719
|
-
Get all contents of given s3_url. The result is in
|
|
1726
|
+
Get all contents of given s3_url. The order of result is in arbitrary order.
|
|
1720
1727
|
|
|
1721
|
-
:returns: All contents have prefix of s3_url
|
|
1728
|
+
:returns: All contents have prefix of s3_url
|
|
1722
1729
|
:raises: S3FileNotFoundError, S3NotADirectoryError
|
|
1723
1730
|
"""
|
|
1724
|
-
|
|
1725
|
-
|
|
1731
|
+
with self.scandir() as entries:
|
|
1732
|
+
for entry in entries:
|
|
1733
|
+
yield self.joinpath(entry.name)
|
|
1726
1734
|
|
|
1727
|
-
def load(self
|
|
1735
|
+
def load(self) -> BinaryIO:
|
|
1728
1736
|
"""Read all content in binary on specified path and write into memory
|
|
1729
1737
|
|
|
1730
1738
|
User should close the BinaryIO manually
|
|
@@ -1732,11 +1740,6 @@ class S3Path(URIPath):
|
|
|
1732
1740
|
:returns: BinaryIO
|
|
1733
1741
|
"""
|
|
1734
1742
|
s3_url = self.path_with_protocol
|
|
1735
|
-
if followlinks:
|
|
1736
|
-
try:
|
|
1737
|
-
s3_url = self.readlink().path_with_protocol
|
|
1738
|
-
except S3NotALinkError:
|
|
1739
|
-
pass
|
|
1740
1743
|
bucket, key = parse_s3_url(s3_url)
|
|
1741
1744
|
if not bucket:
|
|
1742
1745
|
raise S3BucketNotFoundError("Empty bucket name: %r" % s3_url)
|
|
@@ -1799,11 +1802,14 @@ class S3Path(URIPath):
|
|
|
1799
1802
|
raise S3BucketNotFoundError(
|
|
1800
1803
|
"Empty bucket name: %r" % self.path_with_protocol
|
|
1801
1804
|
)
|
|
1802
|
-
|
|
1803
|
-
|
|
1805
|
+
try:
|
|
1806
|
+
if not self.hasbucket():
|
|
1807
|
+
raise S3BucketNotFoundError(
|
|
1808
|
+
"No such bucket: %r" % self.path_with_protocol
|
|
1809
|
+
)
|
|
1810
|
+
except S3PermissionError:
|
|
1811
|
+
pass
|
|
1804
1812
|
if exist_ok:
|
|
1805
|
-
if self.is_file():
|
|
1806
|
-
raise S3FileExistsError("File exists: %r" % self.path_with_protocol)
|
|
1807
1813
|
return
|
|
1808
1814
|
if self.exists():
|
|
1809
1815
|
raise S3FileExistsError("File exists: %r" % self.path_with_protocol)
|
|
@@ -1972,8 +1978,13 @@ class S3Path(URIPath):
|
|
|
1972
1978
|
|
|
1973
1979
|
prefix = _become_prefix(key)
|
|
1974
1980
|
client = self._client
|
|
1975
|
-
|
|
1976
|
-
|
|
1981
|
+
|
|
1982
|
+
def suppress_error_callback(e):
|
|
1983
|
+
if missing_ok and isinstance(e, S3BucketNotFoundError):
|
|
1984
|
+
return True
|
|
1985
|
+
return False
|
|
1986
|
+
|
|
1987
|
+
with raise_s3_error(self.path_with_protocol, suppress_error_callback):
|
|
1977
1988
|
for resp in _list_objects_recursive(client, bucket, prefix):
|
|
1978
1989
|
for content in resp.get("Contents", []):
|
|
1979
1990
|
full_path = s3_path_join(
|
|
@@ -2002,14 +2013,12 @@ class S3Path(URIPath):
|
|
|
2002
2013
|
S3FileNotFoundError("No match any file in: %r" % self.path_with_protocol),
|
|
2003
2014
|
)
|
|
2004
2015
|
|
|
2005
|
-
def scandir(
|
|
2006
|
-
self, followlinks: bool = False, missing_ok: bool = False
|
|
2007
|
-
) -> Iterator[FileEntry]:
|
|
2016
|
+
def scandir(self) -> ContextIterator:
|
|
2008
2017
|
"""
|
|
2009
|
-
Get all contents of given s3_url, the order of result is
|
|
2018
|
+
Get all contents of given s3_url, the order of result is in arbitrary order.
|
|
2010
2019
|
|
|
2011
2020
|
:returns: All contents have prefix of s3_url
|
|
2012
|
-
:raises: S3FileNotFoundError, S3NotADirectoryError
|
|
2021
|
+
:raises: S3BucketNotFoundError, S3FileNotFoundError, S3NotADirectoryError
|
|
2013
2022
|
"""
|
|
2014
2023
|
bucket, key = parse_s3_url(self.path_with_protocol)
|
|
2015
2024
|
if not bucket and key:
|
|
@@ -2023,67 +2032,65 @@ class S3Path(URIPath):
|
|
|
2023
2032
|
# In order to do check on creation,
|
|
2024
2033
|
# we need to wrap the iterator in another function
|
|
2025
2034
|
def create_generator() -> Iterator[FileEntry]:
|
|
2026
|
-
|
|
2027
|
-
|
|
2028
|
-
prefix = _become_prefix(key)
|
|
2029
|
-
client = self._client
|
|
2030
|
-
|
|
2031
|
-
def generate_s3_path(protocol: str, bucket: str, key: str) -> str:
|
|
2032
|
-
return "%s://%s/%s" % (protocol, bucket, key)
|
|
2035
|
+
prefix = _become_prefix(key)
|
|
2036
|
+
client = self._client
|
|
2033
2037
|
|
|
2034
|
-
|
|
2035
|
-
|
|
2036
|
-
|
|
2037
|
-
|
|
2038
|
-
|
|
2039
|
-
|
|
2040
|
-
|
|
2041
|
-
|
|
2042
|
-
|
|
2043
|
-
|
|
2044
|
-
),
|
|
2045
|
-
|
|
2046
|
-
|
|
2038
|
+
def generate_s3_path(protocol: str, bucket: str, key: str) -> str:
|
|
2039
|
+
return "%s://%s/%s" % (protocol, bucket, key)
|
|
2040
|
+
|
|
2041
|
+
if not bucket and not key: # list buckets
|
|
2042
|
+
response = client.list_buckets()
|
|
2043
|
+
for content in response["Buckets"]:
|
|
2044
|
+
yield FileEntry(
|
|
2045
|
+
content["Name"],
|
|
2046
|
+
f"s3://{content['Name']}",
|
|
2047
|
+
StatResult(
|
|
2048
|
+
ctime=content["CreationDate"].timestamp(),
|
|
2049
|
+
isdir=True,
|
|
2050
|
+
extra=content,
|
|
2051
|
+
),
|
|
2052
|
+
)
|
|
2053
|
+
return
|
|
2047
2054
|
|
|
2048
|
-
|
|
2049
|
-
|
|
2050
|
-
|
|
2051
|
-
|
|
2052
|
-
|
|
2053
|
-
|
|
2054
|
-
|
|
2055
|
-
|
|
2056
|
-
|
|
2057
|
-
|
|
2058
|
-
|
|
2059
|
-
|
|
2060
|
-
|
|
2061
|
-
|
|
2062
|
-
|
|
2055
|
+
for resp in _list_objects_recursive(client, bucket, prefix, "/"):
|
|
2056
|
+
for common_prefix in resp.get("CommonPrefixes", []):
|
|
2057
|
+
yield FileEntry(
|
|
2058
|
+
common_prefix["Prefix"][len(prefix) : -1],
|
|
2059
|
+
generate_s3_path(
|
|
2060
|
+
self._protocol_with_profile,
|
|
2061
|
+
bucket,
|
|
2062
|
+
common_prefix["Prefix"],
|
|
2063
|
+
),
|
|
2064
|
+
StatResult(isdir=True, extra=common_prefix),
|
|
2065
|
+
)
|
|
2066
|
+
for content in resp.get("Contents", []):
|
|
2067
|
+
src_url = generate_s3_path(
|
|
2068
|
+
self._protocol_with_profile, bucket, content["Key"]
|
|
2069
|
+
)
|
|
2070
|
+
yield FileEntry( # pytype: disable=wrong-arg-types
|
|
2071
|
+
content["Key"][len(prefix) :],
|
|
2072
|
+
src_url,
|
|
2073
|
+
_make_stat_without_metadata(content, self.from_path(src_url)),
|
|
2074
|
+
)
|
|
2063
2075
|
|
|
2064
|
-
|
|
2065
|
-
|
|
2066
|
-
|
|
2067
|
-
|
|
2068
|
-
|
|
2069
|
-
|
|
2070
|
-
|
|
2071
|
-
)
|
|
2072
|
-
continue
|
|
2073
|
-
except S3NotALinkError:
|
|
2074
|
-
pass
|
|
2076
|
+
def missing_ok_generator():
|
|
2077
|
+
def suppress_error_callback(e):
|
|
2078
|
+
if isinstance(e, S3BucketNotFoundError):
|
|
2079
|
+
return False
|
|
2080
|
+
elif not key and isinstance(e, S3FileNotFoundError):
|
|
2081
|
+
return True
|
|
2082
|
+
return False
|
|
2075
2083
|
|
|
2076
|
-
|
|
2077
|
-
|
|
2078
|
-
|
|
2084
|
+
with raise_s3_error(self.path_with_protocol, suppress_error_callback):
|
|
2085
|
+
yield from _create_missing_ok_generator(
|
|
2086
|
+
create_generator(),
|
|
2087
|
+
missing_ok=False,
|
|
2088
|
+
error=S3FileNotFoundError(
|
|
2089
|
+
"No such directory: %r" % self.path_with_protocol
|
|
2090
|
+
),
|
|
2091
|
+
)
|
|
2079
2092
|
|
|
2080
|
-
return ContextIterator(
|
|
2081
|
-
_create_missing_ok_generator(
|
|
2082
|
-
create_generator(),
|
|
2083
|
-
missing_ok,
|
|
2084
|
-
S3FileNotFoundError("No such directory: %r" % self.path_with_protocol),
|
|
2085
|
-
)
|
|
2086
|
-
)
|
|
2093
|
+
return ContextIterator(missing_ok_generator())
|
|
2087
2094
|
|
|
2088
2095
|
def _get_dir_stat(self) -> StatResult:
|
|
2089
2096
|
"""
|
|
@@ -2098,24 +2105,24 @@ class S3Path(URIPath):
|
|
|
2098
2105
|
|
|
2099
2106
|
:returns: An int indicates size in Bytes
|
|
2100
2107
|
"""
|
|
2101
|
-
if not self.is_dir():
|
|
2102
|
-
raise S3FileNotFoundError(
|
|
2103
|
-
"No such file or directory: %r" % self.path_with_protocol
|
|
2104
|
-
)
|
|
2105
|
-
|
|
2106
2108
|
bucket, key = parse_s3_url(self.path_with_protocol)
|
|
2107
2109
|
prefix = _become_prefix(key)
|
|
2108
2110
|
client = self._client
|
|
2109
|
-
size = 0
|
|
2110
|
-
mtime = 0.0
|
|
2111
|
+
count, size, mtime = 0, 0, 0.0
|
|
2111
2112
|
with raise_s3_error(self.path_with_protocol):
|
|
2112
2113
|
for resp in _list_objects_recursive(client, bucket, prefix):
|
|
2113
2114
|
for content in resp.get("Contents", []):
|
|
2115
|
+
count += 1
|
|
2114
2116
|
size += content["Size"]
|
|
2115
2117
|
last_modified = content["LastModified"].timestamp()
|
|
2116
2118
|
if mtime < last_modified:
|
|
2117
2119
|
mtime = last_modified
|
|
2118
2120
|
|
|
2121
|
+
if count == 0:
|
|
2122
|
+
raise S3FileNotFoundError(
|
|
2123
|
+
"No such file or directory: %r" % self.path_with_protocol
|
|
2124
|
+
)
|
|
2125
|
+
|
|
2119
2126
|
return StatResult(size=size, mtime=mtime, isdir=True)
|
|
2120
2127
|
|
|
2121
2128
|
def stat(self, follow_symlinks=True) -> StatResult:
|
|
@@ -2220,29 +2227,28 @@ class S3Path(URIPath):
|
|
|
2220
2227
|
if not bucket:
|
|
2221
2228
|
raise UnsupportedError("Walk whole s3", self.path_with_protocol)
|
|
2222
2229
|
|
|
2223
|
-
|
|
2224
|
-
|
|
2225
|
-
|
|
2226
|
-
|
|
2227
|
-
|
|
2228
|
-
|
|
2229
|
-
|
|
2230
|
-
|
|
2231
|
-
|
|
2232
|
-
|
|
2233
|
-
|
|
2234
|
-
for content in resp.get("Contents", []):
|
|
2235
|
-
files.append(content["Key"])
|
|
2230
|
+
with raise_s3_error(self.path_with_protocol, S3BucketNotFoundError):
|
|
2231
|
+
stack = [key]
|
|
2232
|
+
client = self._client
|
|
2233
|
+
while len(stack) > 0:
|
|
2234
|
+
current = _become_prefix(stack.pop())
|
|
2235
|
+
dirs, files = [], []
|
|
2236
|
+
for resp in _list_objects_recursive(client, bucket, current, "/"):
|
|
2237
|
+
for common_prefix in resp.get("CommonPrefixes", []):
|
|
2238
|
+
dirs.append(common_prefix["Prefix"][:-1])
|
|
2239
|
+
for content in resp.get("Contents", []):
|
|
2240
|
+
files.append(content["Key"])
|
|
2236
2241
|
|
|
2237
|
-
|
|
2238
|
-
|
|
2242
|
+
dirs = sorted(dirs)
|
|
2243
|
+
stack.extend(reversed(dirs))
|
|
2239
2244
|
|
|
2240
|
-
|
|
2241
|
-
|
|
2242
|
-
|
|
2243
|
-
|
|
2244
|
-
|
|
2245
|
-
|
|
2245
|
+
root = s3_path_join(
|
|
2246
|
+
f"{self._protocol_with_profile}://", bucket, current
|
|
2247
|
+
)[:-1]
|
|
2248
|
+
dirs = [path[len(current) :] for path in dirs]
|
|
2249
|
+
files = sorted(path[len(current) :] for path in files)
|
|
2250
|
+
if files or dirs or not current:
|
|
2251
|
+
yield root, dirs, files
|
|
2246
2252
|
|
|
2247
2253
|
def md5(self, recalculate: bool = False, followlinks: bool = False) -> str:
|
|
2248
2254
|
"""
|
|
@@ -2259,13 +2265,15 @@ class S3Path(URIPath):
|
|
|
2259
2265
|
raise S3BucketNotFoundError(
|
|
2260
2266
|
"Empty bucket name: %r" % self.path_with_protocol
|
|
2261
2267
|
)
|
|
2262
|
-
stat = self.stat(follow_symlinks=
|
|
2263
|
-
if stat.
|
|
2268
|
+
stat = self.stat(follow_symlinks=False)
|
|
2269
|
+
if followlinks and stat.is_symlink():
|
|
2270
|
+
return self.readlink().md5(recalculate=recalculate, followlinks=followlinks)
|
|
2271
|
+
elif stat.is_dir():
|
|
2264
2272
|
hash_md5 = hashlib.md5() # nosec
|
|
2265
2273
|
for file_name in self.listdir():
|
|
2266
2274
|
chunk = (
|
|
2267
|
-
|
|
2268
|
-
.md5(recalculate=recalculate)
|
|
2275
|
+
self.joinpath(file_name)
|
|
2276
|
+
.md5(recalculate=recalculate, followlinks=followlinks)
|
|
2269
2277
|
.encode()
|
|
2270
2278
|
)
|
|
2271
2279
|
hash_md5.update(chunk)
|
|
@@ -2309,9 +2317,6 @@ class S3Path(URIPath):
|
|
|
2309
2317
|
|
|
2310
2318
|
if not src_bucket:
|
|
2311
2319
|
raise S3BucketNotFoundError("Empty bucket name: %r" % src_url)
|
|
2312
|
-
if self.is_dir():
|
|
2313
|
-
raise S3IsADirectoryError("Is a directory: %r" % src_url)
|
|
2314
|
-
|
|
2315
2320
|
if not dst_bucket:
|
|
2316
2321
|
raise S3BucketNotFoundError("Empty bucket name: %r" % dst_url)
|
|
2317
2322
|
if not dst_key or dst_key.endswith("/"):
|
|
@@ -2324,13 +2329,18 @@ class S3Path(URIPath):
|
|
|
2324
2329
|
except S3NotALinkError:
|
|
2325
2330
|
pass
|
|
2326
2331
|
|
|
2327
|
-
|
|
2328
|
-
|
|
2329
|
-
|
|
2330
|
-
|
|
2331
|
-
|
|
2332
|
-
|
|
2333
|
-
|
|
2332
|
+
try:
|
|
2333
|
+
with raise_s3_error(f"'{src_url}' or '{dst_url}'"):
|
|
2334
|
+
self._client.copy(
|
|
2335
|
+
{"Bucket": src_bucket, "Key": src_key},
|
|
2336
|
+
Bucket=dst_bucket,
|
|
2337
|
+
Key=dst_key,
|
|
2338
|
+
Callback=callback,
|
|
2339
|
+
)
|
|
2340
|
+
except S3FileNotFoundError:
|
|
2341
|
+
if self.is_dir():
|
|
2342
|
+
raise S3IsADirectoryError("Is a directory: %r" % src_url)
|
|
2343
|
+
raise
|
|
2334
2344
|
|
|
2335
2345
|
def sync(
|
|
2336
2346
|
self,
|
megfile/sftp.py
CHANGED
|
@@ -613,7 +613,7 @@ def sftp_walk(
|
|
|
613
613
|
return SftpPath(path).walk(followlinks)
|
|
614
614
|
|
|
615
615
|
|
|
616
|
-
def sftp_getmd5(path: PathLike, recalculate: bool = False, followlinks: bool =
|
|
616
|
+
def sftp_getmd5(path: PathLike, recalculate: bool = False, followlinks: bool = False):
|
|
617
617
|
"""
|
|
618
618
|
Calculate the md5 value of the file
|
|
619
619
|
|
|
@@ -659,6 +659,7 @@ def sftp_save_as(file_object: BinaryIO, path: PathLike):
|
|
|
659
659
|
def sftp_open(
|
|
660
660
|
path: PathLike,
|
|
661
661
|
mode: str = "r",
|
|
662
|
+
*,
|
|
662
663
|
buffering=-1,
|
|
663
664
|
encoding: Optional[str] = None,
|
|
664
665
|
errors: Optional[str] = None,
|
|
@@ -676,10 +677,12 @@ def sftp_open(
|
|
|
676
677
|
decoding errors are to be handled—this cannot be used in binary mode.
|
|
677
678
|
:returns: File-Like object
|
|
678
679
|
"""
|
|
679
|
-
return SftpPath(path).open(
|
|
680
|
+
return SftpPath(path).open(
|
|
681
|
+
mode, buffering=buffering, encoding=encoding, errors=errors
|
|
682
|
+
)
|
|
680
683
|
|
|
681
684
|
|
|
682
|
-
def sftp_chmod(path: PathLike, mode: int, follow_symlinks: bool = True):
|
|
685
|
+
def sftp_chmod(path: PathLike, mode: int, *, follow_symlinks: bool = True):
|
|
683
686
|
"""
|
|
684
687
|
Change the file mode and permissions, like os.chmod().
|
|
685
688
|
|
|
@@ -687,7 +690,7 @@ def sftp_chmod(path: PathLike, mode: int, follow_symlinks: bool = True):
|
|
|
687
690
|
:param mode: the file mode you want to change
|
|
688
691
|
:param followlinks: Ignore this parameter, just for compatibility
|
|
689
692
|
"""
|
|
690
|
-
return SftpPath(path).chmod(mode, follow_symlinks)
|
|
693
|
+
return SftpPath(path).chmod(mode, follow_symlinks=follow_symlinks)
|
|
691
694
|
|
|
692
695
|
|
|
693
696
|
def sftp_absolute(path: PathLike) -> "SftpPath":
|