PyPI - cloud-files - Versions diffs - 5.8.2__py3-none-any.whl → 5.9.0__py3-none-any.whl - Mend

cloud-files 5.8.2py3-none-any.whl → 5.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

{cloud_files-5.8.2.dist-info → cloud_files-5.9.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cloud-files
-Version: 5.8.2
+Version: 5.9.0
 Summary: Fast access to cloud storage and local FS.
 Home-page: https://github.com/seung-lab/cloud-files/
 Author: William Silversmith

{cloud_files-5.8.2.dist-info → cloud_files-5.9.0.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
 cloudfiles/__init__.py,sha256=pLB4CcV2l3Jgv_ni1520Np1pfzFj8Cpr87vNxFT3rNI,493
-cloudfiles/cloudfiles.py,sha256=tPG1PBLEjABPu-KLe93yf6xW_zbafPsQ6z5NuofyUoU,56743
+cloudfiles/cloudfiles.py,sha256=eUFf_PKaLtOIkDmGjDRggPMkMY46BHrXOvNSoAnsDWU,57930
 cloudfiles/compression.py,sha256=WXJHnoNLJ_NWyoY9ygZmFA2qMou35_9xS5dzF7-2H-M,6262
 cloudfiles/connectionpools.py,sha256=aL8RiSjRepECfgAFmJcz80aJFKbou7hsbuEgugDKwB8,4814
 cloudfiles/exceptions.py,sha256=N0oGQNG-St6RvnT8e5p_yC_E61q2kgAe2scwAL0F49c,843
 cloudfiles/gcs.py,sha256=unqu5KxGKaPq6N4QeHSpCDdtnK1BzPOAerTZ8FLt2_4,3820
-cloudfiles/interfaces.py,sha256=M62UdugtWcF-J4iQMClHNDEQYu7xxCSc1aT7WW2C1lU,44942
+cloudfiles/interfaces.py,sha256=Eurpmwv6sbn44AfPGp1Pahb2drhqN9lo5J7CRDTyzWU,47190
 cloudfiles/lib.py,sha256=HHjCvjmOjA0nZWSvHGoSeYpxqd6FAG8xk8LM212LAUA,5382
 cloudfiles/monitoring.py,sha256=N5Xq0PYZK1OxoYtwBFsnnfaq7dElTgY8Rn2Ez_I3aoo,20897
 cloudfiles/paths.py,sha256=FLdShqkOg1XlkHurU9eiKzLadx2JFYG1EmleCpOFsYQ,12229
@@ -16,12 +16,12 @@ cloudfiles/threaded_queue.py,sha256=Nl4vfXhQ6nDLF8PZpSSBpww0M2zWtcd4DLs3W3BArBw,
 cloudfiles/typing.py,sha256=f3ZYkNfN9poxhGu5j-P0KCxjCCqSn9HAg5KiIPkjnCg,416
 cloudfiles_cli/LICENSE,sha256=Jna4xYE8CCQmaxjr5Fs-wmUBnIQJ1DGcNn9MMjbkprk,1538
 cloudfiles_cli/__init__.py,sha256=Wftt3R3F21QsHtWqx49ODuqT9zcSr0em7wk48kcH0WM,29
-cloudfiles_cli/cloudfiles_cli.py,sha256=JlP9ocqxZbMANAZhZCQSvvjwe6syovQ1asUzSeAlNYk,38459
-cloud_files-5.8.2.dist-info/AUTHORS,sha256=BFVmobgAhaVFI5fqbuqAY5XmBQxe09ZZAsAOTy87hKQ,318
-cloud_files-5.8.2.dist-info/LICENSE,sha256=Jna4xYE8CCQmaxjr5Fs-wmUBnIQJ1DGcNn9MMjbkprk,1538
-cloud_files-5.8.2.dist-info/METADATA,sha256=iMhQdNleZM5bNnHKDZ97QNjVuA-7GIWIMHa_wZtePLU,30530
-cloud_files-5.8.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-cloud_files-5.8.2.dist-info/entry_points.txt,sha256=xlirb1FVhn1mbcv4IoyMEGumDqKOA4VMVd3drsRQxIg,51
-cloud_files-5.8.2.dist-info/pbr.json,sha256=geeaELiKgs-Cl3LBIxRry_acNuF2kxgqEmZPNhjainY,46
-cloud_files-5.8.2.dist-info/top_level.txt,sha256=xPyrST3okJbsmdCF5IC2gYAVxg_aD5AYVTnNo8UuoZU,26
-cloud_files-5.8.2.dist-info/RECORD,,
+cloudfiles_cli/cloudfiles_cli.py,sha256=GTQj0UZB34Cfy4q-hIbXqRUnbLYCTQ6OeXjAb930i5Q,38602
+cloud_files-5.9.0.dist-info/AUTHORS,sha256=BFVmobgAhaVFI5fqbuqAY5XmBQxe09ZZAsAOTy87hKQ,318
+cloud_files-5.9.0.dist-info/LICENSE,sha256=Jna4xYE8CCQmaxjr5Fs-wmUBnIQJ1DGcNn9MMjbkprk,1538
+cloud_files-5.9.0.dist-info/METADATA,sha256=4qhGrbkuqEdwCuq-Nqedo7nBNn_QkA5qHFLxfskqid4,30530
+cloud_files-5.9.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+cloud_files-5.9.0.dist-info/entry_points.txt,sha256=xlirb1FVhn1mbcv4IoyMEGumDqKOA4VMVd3drsRQxIg,51
+cloud_files-5.9.0.dist-info/pbr.json,sha256=9M5V77fSgk_LF2IUco2G8NcksQ_1cmz7cGYU3OSqRzY,46
+cloud_files-5.9.0.dist-info/top_level.txt,sha256=xPyrST3okJbsmdCF5IC2gYAVxg_aD5AYVTnNo8UuoZU,26
+cloud_files-5.9.0.dist-info/RECORD,,

cloud_files-5.9.0.dist-info/pbr.json ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"git_version": "623052c", "is_release": true}

cloudfiles/cloudfiles.py CHANGED Viewed

@@ -18,6 +18,7 @@ import platform
 import posixpath
 import re
 import shutil
+import threading
 import types
 import time
@@ -1007,6 +1008,34 @@ class CloudFiles:
       return results
     return first(results.values())
+  def subtree_size(self, prefix:GetPathType = "") -> int:
+    """High performance size calculation for directory trees."""
+    prefix, return_multiple = toiter(prefix, is_iter=True)
+    total_bytes = 0
+    total = totalfn(prefix, None)
+    lock = threading.Lock()
+    def size_thunk(prefix):
+      nonlocal total_bytes
+      nonlocal lock
+      with self._get_connection() as conn:
+        subtree_bytes = conn.subtree_size(prefix)
+        with lock:
+          total_bytes += subtree_bytes
+    schedule_jobs(
+      fns=( partial(size_thunk, path) for path in prefix ),
+      concurrency=self.num_threads,
+      progress=self.progress,
+      green=self.green,
+      total=total,
+    )
+    return total_bytes
   @parallelize(desc="Delete")
   def delete(
     self, paths:GetPathType, total:Optional[int] = None,
@@ -1666,6 +1695,12 @@ class CloudFiles:
       return os.path.join(*paths)
     return posixpath.join(*paths)
+  @property
+  def sep(self) -> str:
+    if self._path.protocol == "file":
+      return os.sep
+    return posixpath.sep
   def dirname(self, path:str) -> str:
     if self._path.protocol == "file":
       return os.path.dirname(path)
@@ -1706,11 +1741,17 @@ class CloudFiles:
 class CloudFile:
   def __init__(
-    self, path:str, cache_meta:bool = False,
+    self,
+    path:str,
+    cache_meta:bool = False,
     secrets:SecretsType = None,
     composite_upload_threshold:int = int(1e8),
     locking:bool = True,
     lock_dir:Optional[str] = None,
+    endpoint:Optional[str] = None,
+    no_sign_request:bool = False,
+    request_payer:Optional[str] = None,
+    use_https:bool = False,
   ):
     path = paths.normalize(path)
     self.cf = CloudFiles(
@@ -1719,6 +1760,10 @@ class CloudFile:
       composite_upload_threshold=composite_upload_threshold,
       locking=locking,
       lock_dir=lock_dir,
+      use_https=use_https,
+      endpoint=endpoint,
+      request_payer=request_payer,
+      no_sign_request=no_sign_request,
     )
     self.filename = paths.basename(path)
@@ -1726,6 +1771,10 @@ class CloudFile:
     self._size:Optional[int] = None
     self._head = None
+  @property
+  def sep(self) -> str:
+    return self.cf.sep
   @property
   def protocol(self):
     return self.cf.protocol

cloudfiles/interfaces.py CHANGED Viewed

@@ -48,6 +48,7 @@ MEM_POOL = None
 S3_ACLS = {
   "tigerdata": "private",
+  "nokura": "public-read",
 }
 S3ConnectionPoolParams = namedtuple('S3ConnectionPoolParams', 'service bucket_name request_payer')
@@ -303,6 +304,19 @@ class FileInterface(StorageInterface):
     return self.io_with_lock(do_size, path, exclusive=False)
+  def subtree_size(self, prefix:str = "") -> int:
+    total_bytes = 0
+    subdir = self.get_path_to_file("")
+    if prefix:
+      subdir = os.path.join(subdir, os.path.dirname(prefix))
+    for root, dirs, files in os.walk(subdir):
+      files = ( os.path.join(root, f) for f in files )
+      total_bytes += sum(( os.path.getsize(f) for f in files ))
+    return total_bytes
   def exists(self, file_path):
     path = self.get_path_to_file(file_path)
     def do_exists():
@@ -580,8 +594,7 @@ class MemoryInterface(StorageInterface):
     Returns: iterator
     """
-    layer_path = self.get_path_to_file("")
-    path = os.path.join(layer_path, prefix) + '*'
+    layer_path = self.get_path_to_file("")
     remove = layer_path
     if len(remove) and remove[-1] != '/':
@@ -615,6 +628,21 @@ class MemoryInterface(StorageInterface):
     filenames.sort()
     return iter(filenames)
+  def subtree_size(self, prefix:str = "") -> int:
+    layer_path = self.get_path_to_file("")
+    remove = layer_path
+    if len(remove) and remove[-1] != '/':
+      remove += '/'
+    total_bytes = 0
+    for filename, binary in self._data.items():
+      f_prefix = f.removeprefix(remove)[:len(prefix)]
+      if f_prefix == prefix:
+        total_bytes += len(binary)
+    return total_bytes
 class GoogleCloudStorageInterface(StorageInterface):
   exists_batch_size = Batch._MAX_BATCH_SIZE
   delete_batch_size = Batch._MAX_BATCH_SIZE
@@ -816,6 +844,8 @@ class GoogleCloudStorageInterface(StorageInterface):
     blobs = self._bucket.list_blobs(
       prefix=path,
       delimiter=delimiter,
+      page_size=2500,
+      fields="items(name),nextPageToken",
     )
     for page in blobs.pages:
@@ -835,6 +865,24 @@ class GoogleCloudStorageInterface(StorageInterface):
           yield filename
+  @retry
+  def subtree_size(self, prefix:str = "") -> int:
+    layer_path = self.get_path_to_file("")
+    path = posixpath.join(layer_path, prefix)
+    blobs = self._bucket.list_blobs(
+      prefix=path,
+      page_size=5000,
+      fields="items(name,size),nextPageToken",
+    )
+    total_bytes = 0
+    for page in blobs.pages:
+      for blob in page:
+        total_bytes += blob.size
+    return total_bytes
   def release_connection(self):
     global GC_POOL
     with GCS_BUCKET_POOL_LOCK:
@@ -882,6 +930,8 @@ class HttpInterface(StorageInterface):
     key = self.get_path_to_file(file_path)
     headers = self.default_headers()
     with self.session.head(key, headers=headers) as resp:
+      if resp.status_code in (404, 403):
+        return None
       resp.raise_for_status()
       return resp.headers
@@ -889,6 +939,9 @@ class HttpInterface(StorageInterface):
     headers = self.head(file_path)
     return int(headers["Content-Length"])
+  def subtree_size(self, prefix:str = "") -> int:
+    raise NotImplementedError()
   @retry
   def get_file(self, file_path, start=None, end=None, part_size=None):
     key = self.get_path_to_file(file_path)
@@ -899,24 +952,20 @@ class HttpInterface(StorageInterface):
       end = int(end - 1) if end is not None else ''
       headers["Range"] = f"bytes={start}-{end}"
-    resp = self.session.get(key, headers=headers)
-    if resp.status_code in (404, 403):
-      return (None, None, None, None)
-    resp.close()
-    resp.raise_for_status()
+    with self.session.get(key, headers=headers, stream=True) as resp:
+      if resp.status_code in (404, 403):
+        return (None, None, None, None)
+      resp.raise_for_status()
+      resp.raw.decode_content = False
+      content = resp.raw.read()
+      content_encoding = resp.headers.get('Content-Encoding', None)
     # Don't check MD5 for http because the etag can come in many
     # forms from either GCS, S3 or another service entirely. We
     # probably won't figure out how to decode it right.
     # etag = resp.headers.get('etag', None)
-    content_encoding = resp.headers.get('Content-Encoding', None)
-    # requests automatically decodes these
-    if content_encoding in (None, '', 'gzip', 'deflate', 'br'):
-      content_encoding = None
-    return (resp.content, content_encoding, None, None)
+    return (content, content_encoding, None, None)
   @retry
   def save_file(self, src, dest, resumable) -> tuple[bool, int]:
@@ -1017,7 +1066,6 @@ class HttpInterface(StorageInterface):
         )
       for res in results.get("items", []):
-        print(res["name"])
         yield res["name"].removeprefix(strip)
       token = results.get("nextPageToken", None)
@@ -1490,6 +1538,44 @@ class S3Interface(StorageInterface):
       for filename in iterate(resp):
         yield filename
+  def subtree_size(self, prefix:str = "") -> int:
+    layer_path = self.get_path_to_file("")
+    path = posixpath.join(layer_path, prefix)
+    @retry
+    def s3lst(path, continuation_token=None):
+      kwargs = {
+        'Bucket': self._path.bucket,
+        'Prefix': path,
+        **self._additional_attrs
+      }
+      if continuation_token:
+        kwargs['ContinuationToken'] = continuation_token
+      return self._conn.list_objects_v2(**kwargs)
+    resp = s3lst(path)
+    def iterate(resp):
+      if 'Contents' not in resp.keys():
+        resp['Contents'] = []
+      for item in resp['Contents']:
+        yield item.get('Size', 0)
+    total_bytes = 0
+    for num_bytes in iterate(resp):
+      total_bytes += num_bytes
+    while resp['IsTruncated'] and resp['NextContinuationToken']:
+      resp = s3lst(path, resp['NextContinuationToken'])
+      for num_bytes in iterate(resp):
+        total_bytes += num_bytes
+    return total_bytes
   def release_connection(self):
     global S3_POOL
     service = self._path.alias or 's3'

cloudfiles_cli/cloudfiles_cli.py CHANGED Viewed

@@ -809,7 +809,10 @@ def du(paths, grand_total, summarize, human_readable):
     npath = normalize_path(path)
     if ispathdir(path):
       cf = CloudFiles(npath)
-      results.append(cf.size(cf.list()))
+      if summarize:
+        results.append(cf.subtree_size())
+      else:
+        results.append(cf.size(cf.list()))
     else:
       cf = CloudFiles(os.path.dirname(npath))
       sz = cf.size(os.path.basename(npath))
@@ -839,7 +842,10 @@ def du(paths, grand_total, summarize, human_readable):
   summary = {}
   for path, res in zip(paths, results):
-    summary[path] = sum(res.values())
+    if isinstance(res, int):
+      summary[path] = res
+    else:
+      summary[path] = sum(res.values())
     if summarize:
       print(f"{SI(summary[path])}\t{path}")

cloud_files-5.8.2.dist-info/pbr.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"git_version": "99528f8", "is_release": true}

{cloud_files-5.8.2.dist-info → cloud_files-5.9.0.dist-info}/AUTHORS RENAMED Viewed

File without changes

{cloud_files-5.8.2.dist-info → cloud_files-5.9.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{cloud_files-5.8.2.dist-info → cloud_files-5.9.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{cloud_files-5.8.2.dist-info → cloud_files-5.9.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{cloud_files-5.8.2.dist-info → cloud_files-5.9.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

cloud-files 5.8.2__py3-none-any.whl → 5.9.0__py3-none-any.whl

cloud-files 5.8.2py3-none-any.whl → 5.9.0py3-none-any.whl