cloud-files 5.8.2__py3-none-any.whl → 5.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cloud_files-5.8.2.dist-info → cloud_files-5.9.0.dist-info}/METADATA +1 -1
- {cloud_files-5.8.2.dist-info → cloud_files-5.9.0.dist-info}/RECORD +11 -11
- cloud_files-5.9.0.dist-info/pbr.json +1 -0
- cloudfiles/cloudfiles.py +50 -1
- cloudfiles/interfaces.py +101 -15
- cloudfiles_cli/cloudfiles_cli.py +8 -2
- cloud_files-5.8.2.dist-info/pbr.json +0 -1
- {cloud_files-5.8.2.dist-info → cloud_files-5.9.0.dist-info}/AUTHORS +0 -0
- {cloud_files-5.8.2.dist-info → cloud_files-5.9.0.dist-info}/LICENSE +0 -0
- {cloud_files-5.8.2.dist-info → cloud_files-5.9.0.dist-info}/WHEEL +0 -0
- {cloud_files-5.8.2.dist-info → cloud_files-5.9.0.dist-info}/entry_points.txt +0 -0
- {cloud_files-5.8.2.dist-info → cloud_files-5.9.0.dist-info}/top_level.txt +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
cloudfiles/__init__.py,sha256=pLB4CcV2l3Jgv_ni1520Np1pfzFj8Cpr87vNxFT3rNI,493
|
|
2
|
-
cloudfiles/cloudfiles.py,sha256=
|
|
2
|
+
cloudfiles/cloudfiles.py,sha256=eUFf_PKaLtOIkDmGjDRggPMkMY46BHrXOvNSoAnsDWU,57930
|
|
3
3
|
cloudfiles/compression.py,sha256=WXJHnoNLJ_NWyoY9ygZmFA2qMou35_9xS5dzF7-2H-M,6262
|
|
4
4
|
cloudfiles/connectionpools.py,sha256=aL8RiSjRepECfgAFmJcz80aJFKbou7hsbuEgugDKwB8,4814
|
|
5
5
|
cloudfiles/exceptions.py,sha256=N0oGQNG-St6RvnT8e5p_yC_E61q2kgAe2scwAL0F49c,843
|
|
6
6
|
cloudfiles/gcs.py,sha256=unqu5KxGKaPq6N4QeHSpCDdtnK1BzPOAerTZ8FLt2_4,3820
|
|
7
|
-
cloudfiles/interfaces.py,sha256=
|
|
7
|
+
cloudfiles/interfaces.py,sha256=Eurpmwv6sbn44AfPGp1Pahb2drhqN9lo5J7CRDTyzWU,47190
|
|
8
8
|
cloudfiles/lib.py,sha256=HHjCvjmOjA0nZWSvHGoSeYpxqd6FAG8xk8LM212LAUA,5382
|
|
9
9
|
cloudfiles/monitoring.py,sha256=N5Xq0PYZK1OxoYtwBFsnnfaq7dElTgY8Rn2Ez_I3aoo,20897
|
|
10
10
|
cloudfiles/paths.py,sha256=FLdShqkOg1XlkHurU9eiKzLadx2JFYG1EmleCpOFsYQ,12229
|
|
@@ -16,12 +16,12 @@ cloudfiles/threaded_queue.py,sha256=Nl4vfXhQ6nDLF8PZpSSBpww0M2zWtcd4DLs3W3BArBw,
|
|
|
16
16
|
cloudfiles/typing.py,sha256=f3ZYkNfN9poxhGu5j-P0KCxjCCqSn9HAg5KiIPkjnCg,416
|
|
17
17
|
cloudfiles_cli/LICENSE,sha256=Jna4xYE8CCQmaxjr5Fs-wmUBnIQJ1DGcNn9MMjbkprk,1538
|
|
18
18
|
cloudfiles_cli/__init__.py,sha256=Wftt3R3F21QsHtWqx49ODuqT9zcSr0em7wk48kcH0WM,29
|
|
19
|
-
cloudfiles_cli/cloudfiles_cli.py,sha256=
|
|
20
|
-
cloud_files-5.
|
|
21
|
-
cloud_files-5.
|
|
22
|
-
cloud_files-5.
|
|
23
|
-
cloud_files-5.
|
|
24
|
-
cloud_files-5.
|
|
25
|
-
cloud_files-5.
|
|
26
|
-
cloud_files-5.
|
|
27
|
-
cloud_files-5.
|
|
19
|
+
cloudfiles_cli/cloudfiles_cli.py,sha256=GTQj0UZB34Cfy4q-hIbXqRUnbLYCTQ6OeXjAb930i5Q,38602
|
|
20
|
+
cloud_files-5.9.0.dist-info/AUTHORS,sha256=BFVmobgAhaVFI5fqbuqAY5XmBQxe09ZZAsAOTy87hKQ,318
|
|
21
|
+
cloud_files-5.9.0.dist-info/LICENSE,sha256=Jna4xYE8CCQmaxjr5Fs-wmUBnIQJ1DGcNn9MMjbkprk,1538
|
|
22
|
+
cloud_files-5.9.0.dist-info/METADATA,sha256=4qhGrbkuqEdwCuq-Nqedo7nBNn_QkA5qHFLxfskqid4,30530
|
|
23
|
+
cloud_files-5.9.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
24
|
+
cloud_files-5.9.0.dist-info/entry_points.txt,sha256=xlirb1FVhn1mbcv4IoyMEGumDqKOA4VMVd3drsRQxIg,51
|
|
25
|
+
cloud_files-5.9.0.dist-info/pbr.json,sha256=9M5V77fSgk_LF2IUco2G8NcksQ_1cmz7cGYU3OSqRzY,46
|
|
26
|
+
cloud_files-5.9.0.dist-info/top_level.txt,sha256=xPyrST3okJbsmdCF5IC2gYAVxg_aD5AYVTnNo8UuoZU,26
|
|
27
|
+
cloud_files-5.9.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"git_version": "623052c", "is_release": true}
|
cloudfiles/cloudfiles.py
CHANGED
|
@@ -18,6 +18,7 @@ import platform
|
|
|
18
18
|
import posixpath
|
|
19
19
|
import re
|
|
20
20
|
import shutil
|
|
21
|
+
import threading
|
|
21
22
|
import types
|
|
22
23
|
import time
|
|
23
24
|
|
|
@@ -1007,6 +1008,34 @@ class CloudFiles:
|
|
|
1007
1008
|
return results
|
|
1008
1009
|
return first(results.values())
|
|
1009
1010
|
|
|
1011
|
+
def subtree_size(self, prefix:GetPathType = "") -> int:
|
|
1012
|
+
"""High performance size calculation for directory trees."""
|
|
1013
|
+
prefix, return_multiple = toiter(prefix, is_iter=True)
|
|
1014
|
+
total_bytes = 0
|
|
1015
|
+
|
|
1016
|
+
total = totalfn(prefix, None)
|
|
1017
|
+
|
|
1018
|
+
lock = threading.Lock()
|
|
1019
|
+
|
|
1020
|
+
def size_thunk(prefix):
|
|
1021
|
+
nonlocal total_bytes
|
|
1022
|
+
nonlocal lock
|
|
1023
|
+
|
|
1024
|
+
with self._get_connection() as conn:
|
|
1025
|
+
subtree_bytes = conn.subtree_size(prefix)
|
|
1026
|
+
with lock:
|
|
1027
|
+
total_bytes += subtree_bytes
|
|
1028
|
+
|
|
1029
|
+
schedule_jobs(
|
|
1030
|
+
fns=( partial(size_thunk, path) for path in prefix ),
|
|
1031
|
+
concurrency=self.num_threads,
|
|
1032
|
+
progress=self.progress,
|
|
1033
|
+
green=self.green,
|
|
1034
|
+
total=total,
|
|
1035
|
+
)
|
|
1036
|
+
|
|
1037
|
+
return total_bytes
|
|
1038
|
+
|
|
1010
1039
|
@parallelize(desc="Delete")
|
|
1011
1040
|
def delete(
|
|
1012
1041
|
self, paths:GetPathType, total:Optional[int] = None,
|
|
@@ -1666,6 +1695,12 @@ class CloudFiles:
|
|
|
1666
1695
|
return os.path.join(*paths)
|
|
1667
1696
|
return posixpath.join(*paths)
|
|
1668
1697
|
|
|
1698
|
+
@property
|
|
1699
|
+
def sep(self) -> str:
|
|
1700
|
+
if self._path.protocol == "file":
|
|
1701
|
+
return os.sep
|
|
1702
|
+
return posixpath.sep
|
|
1703
|
+
|
|
1669
1704
|
def dirname(self, path:str) -> str:
|
|
1670
1705
|
if self._path.protocol == "file":
|
|
1671
1706
|
return os.path.dirname(path)
|
|
@@ -1706,11 +1741,17 @@ class CloudFiles:
|
|
|
1706
1741
|
|
|
1707
1742
|
class CloudFile:
|
|
1708
1743
|
def __init__(
|
|
1709
|
-
self,
|
|
1744
|
+
self,
|
|
1745
|
+
path:str,
|
|
1746
|
+
cache_meta:bool = False,
|
|
1710
1747
|
secrets:SecretsType = None,
|
|
1711
1748
|
composite_upload_threshold:int = int(1e8),
|
|
1712
1749
|
locking:bool = True,
|
|
1713
1750
|
lock_dir:Optional[str] = None,
|
|
1751
|
+
endpoint:Optional[str] = None,
|
|
1752
|
+
no_sign_request:bool = False,
|
|
1753
|
+
request_payer:Optional[str] = None,
|
|
1754
|
+
use_https:bool = False,
|
|
1714
1755
|
):
|
|
1715
1756
|
path = paths.normalize(path)
|
|
1716
1757
|
self.cf = CloudFiles(
|
|
@@ -1719,6 +1760,10 @@ class CloudFile:
|
|
|
1719
1760
|
composite_upload_threshold=composite_upload_threshold,
|
|
1720
1761
|
locking=locking,
|
|
1721
1762
|
lock_dir=lock_dir,
|
|
1763
|
+
use_https=use_https,
|
|
1764
|
+
endpoint=endpoint,
|
|
1765
|
+
request_payer=request_payer,
|
|
1766
|
+
no_sign_request=no_sign_request,
|
|
1722
1767
|
)
|
|
1723
1768
|
self.filename = paths.basename(path)
|
|
1724
1769
|
|
|
@@ -1726,6 +1771,10 @@ class CloudFile:
|
|
|
1726
1771
|
self._size:Optional[int] = None
|
|
1727
1772
|
self._head = None
|
|
1728
1773
|
|
|
1774
|
+
@property
|
|
1775
|
+
def sep(self) -> str:
|
|
1776
|
+
return self.cf.sep
|
|
1777
|
+
|
|
1729
1778
|
@property
|
|
1730
1779
|
def protocol(self):
|
|
1731
1780
|
return self.cf.protocol
|
cloudfiles/interfaces.py
CHANGED
|
@@ -48,6 +48,7 @@ MEM_POOL = None
|
|
|
48
48
|
|
|
49
49
|
S3_ACLS = {
|
|
50
50
|
"tigerdata": "private",
|
|
51
|
+
"nokura": "public-read",
|
|
51
52
|
}
|
|
52
53
|
|
|
53
54
|
S3ConnectionPoolParams = namedtuple('S3ConnectionPoolParams', 'service bucket_name request_payer')
|
|
@@ -303,6 +304,19 @@ class FileInterface(StorageInterface):
|
|
|
303
304
|
|
|
304
305
|
return self.io_with_lock(do_size, path, exclusive=False)
|
|
305
306
|
|
|
307
|
+
def subtree_size(self, prefix:str = "") -> int:
|
|
308
|
+
total_bytes = 0
|
|
309
|
+
|
|
310
|
+
subdir = self.get_path_to_file("")
|
|
311
|
+
if prefix:
|
|
312
|
+
subdir = os.path.join(subdir, os.path.dirname(prefix))
|
|
313
|
+
|
|
314
|
+
for root, dirs, files in os.walk(subdir):
|
|
315
|
+
files = ( os.path.join(root, f) for f in files )
|
|
316
|
+
total_bytes += sum(( os.path.getsize(f) for f in files ))
|
|
317
|
+
|
|
318
|
+
return total_bytes
|
|
319
|
+
|
|
306
320
|
def exists(self, file_path):
|
|
307
321
|
path = self.get_path_to_file(file_path)
|
|
308
322
|
def do_exists():
|
|
@@ -580,8 +594,7 @@ class MemoryInterface(StorageInterface):
|
|
|
580
594
|
|
|
581
595
|
Returns: iterator
|
|
582
596
|
"""
|
|
583
|
-
layer_path = self.get_path_to_file("")
|
|
584
|
-
path = os.path.join(layer_path, prefix) + '*'
|
|
597
|
+
layer_path = self.get_path_to_file("")
|
|
585
598
|
|
|
586
599
|
remove = layer_path
|
|
587
600
|
if len(remove) and remove[-1] != '/':
|
|
@@ -615,6 +628,21 @@ class MemoryInterface(StorageInterface):
|
|
|
615
628
|
filenames.sort()
|
|
616
629
|
return iter(filenames)
|
|
617
630
|
|
|
631
|
+
def subtree_size(self, prefix:str = "") -> int:
|
|
632
|
+
layer_path = self.get_path_to_file("")
|
|
633
|
+
|
|
634
|
+
remove = layer_path
|
|
635
|
+
if len(remove) and remove[-1] != '/':
|
|
636
|
+
remove += '/'
|
|
637
|
+
|
|
638
|
+
total_bytes = 0
|
|
639
|
+
for filename, binary in self._data.items():
|
|
640
|
+
f_prefix = f.removeprefix(remove)[:len(prefix)]
|
|
641
|
+
if f_prefix == prefix:
|
|
642
|
+
total_bytes += len(binary)
|
|
643
|
+
|
|
644
|
+
return total_bytes
|
|
645
|
+
|
|
618
646
|
class GoogleCloudStorageInterface(StorageInterface):
|
|
619
647
|
exists_batch_size = Batch._MAX_BATCH_SIZE
|
|
620
648
|
delete_batch_size = Batch._MAX_BATCH_SIZE
|
|
@@ -816,6 +844,8 @@ class GoogleCloudStorageInterface(StorageInterface):
|
|
|
816
844
|
blobs = self._bucket.list_blobs(
|
|
817
845
|
prefix=path,
|
|
818
846
|
delimiter=delimiter,
|
|
847
|
+
page_size=2500,
|
|
848
|
+
fields="items(name),nextPageToken",
|
|
819
849
|
)
|
|
820
850
|
|
|
821
851
|
for page in blobs.pages:
|
|
@@ -835,6 +865,24 @@ class GoogleCloudStorageInterface(StorageInterface):
|
|
|
835
865
|
yield filename
|
|
836
866
|
|
|
837
867
|
|
|
868
|
+
@retry
|
|
869
|
+
def subtree_size(self, prefix:str = "") -> int:
|
|
870
|
+
layer_path = self.get_path_to_file("")
|
|
871
|
+
path = posixpath.join(layer_path, prefix)
|
|
872
|
+
|
|
873
|
+
blobs = self._bucket.list_blobs(
|
|
874
|
+
prefix=path,
|
|
875
|
+
page_size=5000,
|
|
876
|
+
fields="items(name,size),nextPageToken",
|
|
877
|
+
)
|
|
878
|
+
|
|
879
|
+
total_bytes = 0
|
|
880
|
+
for page in blobs.pages:
|
|
881
|
+
for blob in page:
|
|
882
|
+
total_bytes += blob.size
|
|
883
|
+
|
|
884
|
+
return total_bytes
|
|
885
|
+
|
|
838
886
|
def release_connection(self):
|
|
839
887
|
global GC_POOL
|
|
840
888
|
with GCS_BUCKET_POOL_LOCK:
|
|
@@ -882,6 +930,8 @@ class HttpInterface(StorageInterface):
|
|
|
882
930
|
key = self.get_path_to_file(file_path)
|
|
883
931
|
headers = self.default_headers()
|
|
884
932
|
with self.session.head(key, headers=headers) as resp:
|
|
933
|
+
if resp.status_code in (404, 403):
|
|
934
|
+
return None
|
|
885
935
|
resp.raise_for_status()
|
|
886
936
|
return resp.headers
|
|
887
937
|
|
|
@@ -889,6 +939,9 @@ class HttpInterface(StorageInterface):
|
|
|
889
939
|
headers = self.head(file_path)
|
|
890
940
|
return int(headers["Content-Length"])
|
|
891
941
|
|
|
942
|
+
def subtree_size(self, prefix:str = "") -> int:
|
|
943
|
+
raise NotImplementedError()
|
|
944
|
+
|
|
892
945
|
@retry
|
|
893
946
|
def get_file(self, file_path, start=None, end=None, part_size=None):
|
|
894
947
|
key = self.get_path_to_file(file_path)
|
|
@@ -899,24 +952,20 @@ class HttpInterface(StorageInterface):
|
|
|
899
952
|
end = int(end - 1) if end is not None else ''
|
|
900
953
|
headers["Range"] = f"bytes={start}-{end}"
|
|
901
954
|
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
955
|
+
with self.session.get(key, headers=headers, stream=True) as resp:
|
|
956
|
+
if resp.status_code in (404, 403):
|
|
957
|
+
return (None, None, None, None)
|
|
958
|
+
resp.raise_for_status()
|
|
959
|
+
resp.raw.decode_content = False
|
|
960
|
+
content = resp.raw.read()
|
|
961
|
+
content_encoding = resp.headers.get('Content-Encoding', None)
|
|
908
962
|
|
|
909
963
|
# Don't check MD5 for http because the etag can come in many
|
|
910
964
|
# forms from either GCS, S3 or another service entirely. We
|
|
911
965
|
# probably won't figure out how to decode it right.
|
|
912
966
|
# etag = resp.headers.get('etag', None)
|
|
913
|
-
content_encoding = resp.headers.get('Content-Encoding', None)
|
|
914
|
-
|
|
915
|
-
# requests automatically decodes these
|
|
916
|
-
if content_encoding in (None, '', 'gzip', 'deflate', 'br'):
|
|
917
|
-
content_encoding = None
|
|
918
967
|
|
|
919
|
-
return (
|
|
968
|
+
return (content, content_encoding, None, None)
|
|
920
969
|
|
|
921
970
|
@retry
|
|
922
971
|
def save_file(self, src, dest, resumable) -> tuple[bool, int]:
|
|
@@ -1017,7 +1066,6 @@ class HttpInterface(StorageInterface):
|
|
|
1017
1066
|
)
|
|
1018
1067
|
|
|
1019
1068
|
for res in results.get("items", []):
|
|
1020
|
-
print(res["name"])
|
|
1021
1069
|
yield res["name"].removeprefix(strip)
|
|
1022
1070
|
|
|
1023
1071
|
token = results.get("nextPageToken", None)
|
|
@@ -1490,6 +1538,44 @@ class S3Interface(StorageInterface):
|
|
|
1490
1538
|
for filename in iterate(resp):
|
|
1491
1539
|
yield filename
|
|
1492
1540
|
|
|
1541
|
+
def subtree_size(self, prefix:str = "") -> int:
|
|
1542
|
+
layer_path = self.get_path_to_file("")
|
|
1543
|
+
path = posixpath.join(layer_path, prefix)
|
|
1544
|
+
|
|
1545
|
+
@retry
|
|
1546
|
+
def s3lst(path, continuation_token=None):
|
|
1547
|
+
kwargs = {
|
|
1548
|
+
'Bucket': self._path.bucket,
|
|
1549
|
+
'Prefix': path,
|
|
1550
|
+
**self._additional_attrs
|
|
1551
|
+
}
|
|
1552
|
+
|
|
1553
|
+
if continuation_token:
|
|
1554
|
+
kwargs['ContinuationToken'] = continuation_token
|
|
1555
|
+
|
|
1556
|
+
return self._conn.list_objects_v2(**kwargs)
|
|
1557
|
+
|
|
1558
|
+
resp = s3lst(path)
|
|
1559
|
+
|
|
1560
|
+
def iterate(resp):
|
|
1561
|
+
if 'Contents' not in resp.keys():
|
|
1562
|
+
resp['Contents'] = []
|
|
1563
|
+
|
|
1564
|
+
for item in resp['Contents']:
|
|
1565
|
+
yield item.get('Size', 0)
|
|
1566
|
+
|
|
1567
|
+
total_bytes = 0
|
|
1568
|
+
for num_bytes in iterate(resp):
|
|
1569
|
+
total_bytes += num_bytes
|
|
1570
|
+
|
|
1571
|
+
while resp['IsTruncated'] and resp['NextContinuationToken']:
|
|
1572
|
+
resp = s3lst(path, resp['NextContinuationToken'])
|
|
1573
|
+
|
|
1574
|
+
for num_bytes in iterate(resp):
|
|
1575
|
+
total_bytes += num_bytes
|
|
1576
|
+
|
|
1577
|
+
return total_bytes
|
|
1578
|
+
|
|
1493
1579
|
def release_connection(self):
|
|
1494
1580
|
global S3_POOL
|
|
1495
1581
|
service = self._path.alias or 's3'
|
cloudfiles_cli/cloudfiles_cli.py
CHANGED
|
@@ -809,7 +809,10 @@ def du(paths, grand_total, summarize, human_readable):
|
|
|
809
809
|
npath = normalize_path(path)
|
|
810
810
|
if ispathdir(path):
|
|
811
811
|
cf = CloudFiles(npath)
|
|
812
|
-
|
|
812
|
+
if summarize:
|
|
813
|
+
results.append(cf.subtree_size())
|
|
814
|
+
else:
|
|
815
|
+
results.append(cf.size(cf.list()))
|
|
813
816
|
else:
|
|
814
817
|
cf = CloudFiles(os.path.dirname(npath))
|
|
815
818
|
sz = cf.size(os.path.basename(npath))
|
|
@@ -839,7 +842,10 @@ def du(paths, grand_total, summarize, human_readable):
|
|
|
839
842
|
|
|
840
843
|
summary = {}
|
|
841
844
|
for path, res in zip(paths, results):
|
|
842
|
-
|
|
845
|
+
if isinstance(res, int):
|
|
846
|
+
summary[path] = res
|
|
847
|
+
else:
|
|
848
|
+
summary[path] = sum(res.values())
|
|
843
849
|
if summarize:
|
|
844
850
|
print(f"{SI(summary[path])}\t{path}")
|
|
845
851
|
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"git_version": "99528f8", "is_release": true}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|