PyPI - cloud-files - Versions diffs - 4.27.0__py3-none-any.whl → 6.0.0__py3-none-any.whl - Mend

cloud-files 4.27.0py3-none-any.whl → 6.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{cloud_files-4.27.0.dist-info → cloud_files-6.0.0.dist-info}/AUTHORS +1 -0
{cloud_files-4.27.0.dist-info → cloud_files-6.0.0.dist-info}/METADATA +101 -21
cloud_files-6.0.0.dist-info/RECORD +27 -0
{cloud_files-4.27.0.dist-info → cloud_files-6.0.0.dist-info}/WHEEL +1 -1
cloud_files-6.0.0.dist-info/pbr.json +1 -0
cloudfiles/cloudfiles.py +548 -78
cloudfiles/compression.py +8 -3
cloudfiles/exceptions.py +4 -0
cloudfiles/gcs.py +7 -3
cloudfiles/interfaces.py +462 -69
cloudfiles/lib.py +12 -2
cloudfiles/monitoring.py +724 -0
cloudfiles/paths.py +61 -5
cloudfiles/resumable_tools.py +50 -15
cloudfiles/scheduler.py +6 -1
cloudfiles/secrets.py +16 -12
cloudfiles/test.py +28 -0
cloudfiles_cli/cloudfiles_cli.py +349 -41
cloud_files-4.27.0.dist-info/RECORD +0 -26
cloud_files-4.27.0.dist-info/pbr.json +0 -1
cloudfiles/buckets.py +0 -10
{cloud_files-4.27.0.dist-info → cloud_files-6.0.0.dist-info}/LICENSE +0 -0
{cloud_files-4.27.0.dist-info → cloud_files-6.0.0.dist-info}/entry_points.txt +0 -0
{cloud_files-4.27.0.dist-info → cloud_files-6.0.0.dist-info}/top_level.txt +0 -0

cloudfiles/interfaces.py CHANGED Viewed

@@ -4,14 +4,13 @@ from collections import defaultdict, namedtuple
 from datetime import datetime
 from io import BytesIO
 import json
-import os.path
+import os
 import posixpath
 import re
 import boto3
 import botocore
 import gevent.monkey
-from glob import glob
 import google.cloud.exceptions
 from google.cloud.storage import Batch, Client
 import requests
@@ -22,8 +21,8 @@ import fasteners
 from .compression import COMPRESSION_TYPES
 from .connectionpools import S3ConnectionPool, GCloudBucketPool, MemoryPool, MEMORY_DATA
-from .exceptions import MD5IntegrityError, CompressionError
-from .lib import mkdir, sip, md5, validate_s3_multipart_etag
+from .exceptions import MD5IntegrityError, CompressionError, AuthorizationError
+from .lib import mkdir, sip, md5, encode_crc32c_b64, validate_s3_multipart_etag
 from .secrets import (
   http_credentials,
   cave_credentials,
@@ -49,6 +48,7 @@ MEM_POOL = None
 S3_ACLS = {
   "tigerdata": "private",
+  "nokura": "public-read",
 }
 S3ConnectionPoolParams = namedtuple('S3ConnectionPoolParams', 'service bucket_name request_payer')
@@ -304,6 +304,22 @@ class FileInterface(StorageInterface):
     return self.io_with_lock(do_size, path, exclusive=False)
+  def subtree_size(self, prefix:str = "") -> tuple[int,int]:
+    total_bytes = 0
+    total_files = 0
+    subdir = self.get_path_to_file("")
+    if prefix:
+      subdir = os.path.join(subdir, os.path.dirname(prefix))
+    for root, dirs, files in os.walk(subdir):
+      for f in files:
+          path = os.path.join(root, f)
+          total_files += 1
+          total_bytes += os.path.getsize(path)
+    return (total_files, total_bytes)
   def exists(self, file_path):
     path = self.get_path_to_file(file_path)
     def do_exists():
@@ -339,7 +355,7 @@ class FileInterface(StorageInterface):
     """
     layer_path = self.get_path_to_file("")
-    path = os.path.join(layer_path, prefix) + '*'
+    path = os.path.join(layer_path, prefix)
     filenames = []
@@ -348,17 +364,33 @@ class FileInterface(StorageInterface):
       remove += os.path.sep
     if flat:
-      for file_path in glob(path):
-        if not os.path.isfile(file_path):
+      if os.path.isdir(path):
+        list_path = path
+        list_prefix = ''
+        prepend_prefix = prefix
+        if prepend_prefix and prepend_prefix[-1] != os.path.sep:
+          prepend_prefix += os.path.sep
+      else:
+        list_path = os.path.dirname(path)
+        list_prefix = os.path.basename(prefix)
+        prepend_prefix = os.path.dirname(prefix)
+        if prepend_prefix != '':
+          prepend_prefix += os.path.sep
+      for fobj in os.scandir(list_path):
+        if list_prefix != '' and not fobj.name.startswith(list_prefix):
           continue
-        filename = file_path.replace(remove, '')
-        filenames.append(filename)
+        if fobj.is_dir():
+          filenames.append(f"{prepend_prefix}{fobj.name}{os.path.sep}")
+        else:
+          filenames.append(f"{prepend_prefix}{fobj.name}")
     else:
       subdir = os.path.join(layer_path, os.path.dirname(prefix))
       for root, dirs, files in os.walk(subdir):
-        files = [ os.path.join(root, f) for f in files ]
-        files = [ f.replace(remove, '') for f in files ]
-        files = [ f for f in files if f[:len(prefix)] == prefix ]
+        files = ( os.path.join(root, f) for f in files )
+        files = ( f.removeprefix(remove) for f in files )
+        files = ( f for f in files if f[:len(prefix)] == prefix )
         for filename in files:
           filenames.append(filename)
@@ -452,8 +484,60 @@ class MemoryInterface(StorageInterface):
       result = result[slice(start, end)]
     return (result, encoding, None, None)
+  def save_file(self, src, dest, resumable) -> tuple[bool,int]:
+    key = self.get_path_to_file(src)
+    with EXT_TEST_SEQUENCE_LOCK:
+      exts = list(EXT_TEST_SEQUENCE)
+      exts = [ x[0] for x in exts ]
+    path = key
+    true_ext = ''
+    for ext in exts:
+      pathext = key + ext
+      if pathext in self._data:
+        path = pathext
+        true_ext = ext
+        break
+    filepath = os.path.join(dest, os.path.basename(path))
+    mkdir(os.path.dirname(dest))
+    try:
+      with open(dest + true_ext, "wb") as f:
+        f.write(self._data[path])
+    except KeyError:
+      return (False, 0)
+    return (True, len(self._data[path]))
   def head(self, file_path):
-    raise NotImplementedError()
+    path = self.get_path_to_file(file_path)
+    data = None
+    encoding = ''
+    with EXT_TEST_SEQUENCE_LOCK:
+      for ext, enc in EXT_TEST_SEQUENCE:
+        pathext = path + ext
+        if pathext in self._data:
+          data = self._data[pathext]
+          encoding = enc
+          break
+    return {
+      "Cache-Control": None,
+      "Content-Length": len(data),
+      "Content-Type": None,
+      "ETag": None,
+      "Last-Modified": None,
+      "Content-Md5": None,
+      "Content-Encoding": encoding,
+      "Content-Disposition": None,
+      "Content-Language": None,
+      "Storage-Class": None,
+      "Request-Charged": None,
+      "Parts-Count": None,
+    }
   def size(self, file_path):
     path = self.get_path_to_file(file_path)
@@ -474,6 +558,14 @@ class MemoryInterface(StorageInterface):
     return None
+  def copy_file(self, src_path, dest_bucket, dest_key) -> tuple[bool,int]:
+    key = self.get_path_to_file(src_path)
+    with MEM_BUCKET_POOL_LOCK:
+     pool = MEM_POOL[MemoryPoolParams(dest_bucket)]
+    dest_bucket = pool.get_connection(None, None)
+    dest_bucket[dest_key] = self._data[key]
+    return (True, len(self._data[key]))
   def exists(self, file_path):
     path = self.get_path_to_file(file_path)
     return path in self._data or any(( (path + ext in self._data) for ext in COMPRESSION_EXTENSIONS ))
@@ -505,18 +597,28 @@ class MemoryInterface(StorageInterface):
     Returns: iterator
     """
-    layer_path = self.get_path_to_file("")
-    path = os.path.join(layer_path, prefix) + '*'
+    layer_path = self.get_path_to_file("")
     remove = layer_path
     if len(remove) and remove[-1] != '/':
       remove += '/'
-    filenames = [ f.replace(remove, '') for f in self._data ]
-    filenames = [ f for f in filenames if f[:len(prefix)] == prefix ]
+    filenames = ( f.removeprefix(remove) for f in self._data )
+    filenames = ( f for f in filenames if f[:len(prefix)] == prefix )
     if flat:
-      filenames = [ f for f in filenames if '/' not in f.replace(prefix, '') ]
+      tmp = []
+      for f in filenames:
+        elems = f.removeprefix(prefix).split('/')
+        if len(elems) > 1 and elems[0] == '':
+          elems.pop(0)
+          elems[0] = f'/{elems[0]}'
+        if len(elems) > 1:
+          tmp.append(f"{prefix}{elems[0]}/")
+        else:
+          tmp.append(f"{prefix}{elems[0]}")
+      filenames = tmp
     def stripext(fname):
       (base, ext) = os.path.splitext(fname)
@@ -529,6 +631,23 @@ class MemoryInterface(StorageInterface):
     filenames.sort()
     return iter(filenames)
+  def subtree_size(self, prefix:str = "") -> tuple[int,int]:
+    layer_path = self.get_path_to_file("")
+    remove = layer_path
+    if len(remove) and remove[-1] != '/':
+      remove += '/'
+    total_bytes = 0
+    total_files = 0
+    for filename, binary in self._data.items():
+      f_prefix = f.removeprefix(remove)[:len(prefix)]
+      if f_prefix == prefix:
+        total_bytes += len(binary)
+        total_files += 1
+    return (total_files, total_bytes)
 class GoogleCloudStorageInterface(StorageInterface):
   exists_batch_size = Batch._MAX_BATCH_SIZE
   delete_batch_size = Batch._MAX_BATCH_SIZE
@@ -576,7 +695,7 @@ class GoogleCloudStorageInterface(StorageInterface):
     blob.upload_from_string(content, content_type)
   @retry
-  def copy_file(self, src_path, dest_bucket, dest_key):
+  def copy_file(self, src_path, dest_bucket, dest_key) -> tuple[bool,int]:
     key = self.get_path_to_file(src_path)
     source_blob = self._bucket.blob( key )
     with GCS_BUCKET_POOL_LOCK:
@@ -584,13 +703,13 @@ class GoogleCloudStorageInterface(StorageInterface):
     dest_bucket = pool.get_connection(self._secrets, None)
     try:
-      self._bucket.copy_blob(
+      blob = self._bucket.copy_blob(
         source_blob, dest_bucket, dest_key
       )
     except google.api_core.exceptions.NotFound:
-      return False
+      return (False, 0)
-    return True
+    return (True, blob.size)
   @retry_if_not(google.cloud.exceptions.NotFound)
   def get_file(self, file_path, start=None, end=None, part_size=None):
@@ -616,6 +735,28 @@ class GoogleCloudStorageInterface(StorageInterface):
     return (content, blob.content_encoding, hash_value, hash_type)
+  @retry
+  def save_file(self, src, dest, resumable) -> tuple[bool, int]:
+    key = self.get_path_to_file(src)
+    blob = self._bucket.blob(key)
+    try:
+      mkdir(os.path.dirname(dest))
+      blob.download_to_filename(
+        filename=dest,
+        raw_download=True,
+        checksum=None
+      )
+    except google.cloud.exceptions.NotFound:
+      return (False, 0)
+    num_bytes = os.path.getsize(dest)
+    ext = FileInterface.get_extension(blob.content_encoding)
+    if not dest.endswith(ext):
+      os.rename(dest, dest + ext)
+    return (True, num_bytes)
   @retry_if_not(google.cloud.exceptions.NotFound)
   def head(self, file_path):
     key = self.get_path_to_file(file_path)
@@ -690,6 +831,7 @@ class GoogleCloudStorageInterface(StorageInterface):
       except google.cloud.exceptions.NotFound:
         pass
   @retry
   def list_files(self, prefix, flat=False):
     """
@@ -703,14 +845,50 @@ class GoogleCloudStorageInterface(StorageInterface):
     path = posixpath.join(layer_path, prefix)
     delimiter = '/' if flat else None
-    for blob in self._bucket.list_blobs(prefix=path, delimiter=delimiter):
-      filename = blob.name.replace(layer_path, '')
-      if not filename:
-        continue
-      elif not flat and filename[-1] != '/':
-        yield filename
-      elif flat and '/' not in blob.name.replace(path, ''):
-        yield filename
+    blobs = self._bucket.list_blobs(
+      prefix=path,
+      delimiter=delimiter,
+      page_size=2500,
+      fields="items(name),nextPageToken",
+    )
+    for page in blobs.pages:
+      if page.prefixes:
+        yield from (
+          item.removeprefix(path)
+          for item in page.prefixes
+        )
+      for blob in page:
+        filename = blob.name.removeprefix(layer_path)
+        if not filename:
+          continue
+        elif not flat and filename[-1] != '/':
+          yield filename
+        elif flat and '/' not in blob.name.removeprefix(path):
+          yield filename
+  @retry
+  def subtree_size(self, prefix:str = "") -> tuple[int,int]:
+    layer_path = self.get_path_to_file("")
+    path = posixpath.join(layer_path, prefix)
+    blobs = self._bucket.list_blobs(
+      prefix=path,
+      page_size=5000,
+      fields="items(name,size),nextPageToken",
+    )
+    total_bytes = 0
+    total_files = 0
+    for page in blobs.pages:
+      for blob in page:
+        total_bytes += blob.size
+        total_files += 1
+    return (total_files, total_bytes)
   def release_connection(self):
     global GC_POOL
@@ -759,6 +937,8 @@ class HttpInterface(StorageInterface):
     key = self.get_path_to_file(file_path)
     headers = self.default_headers()
     with self.session.head(key, headers=headers) as resp:
+      if resp.status_code in (404, 403):
+        return None
       resp.raise_for_status()
       return resp.headers
@@ -766,6 +946,9 @@ class HttpInterface(StorageInterface):
     headers = self.head(file_path)
     return int(headers["Content-Length"])
+  def subtree_size(self, prefix:str = "") -> tuple[int,int]:
+    raise NotImplementedError()
   @retry
   def get_file(self, file_path, start=None, end=None, part_size=None):
     key = self.get_path_to_file(file_path)
@@ -776,24 +959,60 @@ class HttpInterface(StorageInterface):
       end = int(end - 1) if end is not None else ''
       headers["Range"] = f"bytes={start}-{end}"
-    resp = self.session.get(key, headers=headers)
-    if resp.status_code in (404, 403):
-      return (None, None, None, None)
-    resp.close()
-    resp.raise_for_status()
+    with self.session.get(key, headers=headers, stream=True) as resp:
+      if resp.status_code in (404, 403):
+        return (None, None, None, None)
+      resp.raise_for_status()
+      resp.raw.decode_content = False
+      content = resp.raw.read()
+      content_encoding = resp.headers.get('Content-Encoding', None)
     # Don't check MD5 for http because the etag can come in many
     # forms from either GCS, S3 or another service entirely. We
     # probably won't figure out how to decode it right.
     # etag = resp.headers.get('etag', None)
-    content_encoding = resp.headers.get('Content-Encoding', None)
-    # requests automatically decodes these
-    if content_encoding in (None, '', 'gzip', 'deflate', 'br'):
-      content_encoding = None
-    return (resp.content, content_encoding, None, None)
+    return (content, content_encoding, None, None)
+  @retry
+  def save_file(self, src, dest, resumable) -> tuple[bool, int]:
+    key = self.get_path_to_file(src)
+    headers = self.head(src)
+    content_encoding = headers.get('Content-Encoding', None)
+    try:
+      ext = FileInterface.get_extension(content_encoding)
+    except ValueError:
+      ext = ""
+    fulldest = dest + ext
+    partname = fulldest
+    if resumable:
+      partname += ".part"
+    downloaded_size = 0
+    if resumable and os.path.exists(partname):
+      downloaded_size = os.path.getsize(partname)
+    streamed_bytes = 0
+    range_headers = { "Range": f"bytes={downloaded_size}-" }
+    with self.session.get(key, headers=range_headers, stream=True) as resp:
+      if resp.status_code not in [200, 206]:
+        resp.raise_for_status()
+        return (False, 0)
+      with open(partname, 'ab') as f:
+        for chunk in resp.iter_content(chunk_size=int(10e6)):
+          f.write(chunk)
+          streamed_bytes += len(chunk)
+    if resumable:
+      os.rename(partname, fulldest)
+    return (True, streamed_bytes)
   @retry
   def exists(self, file_path):
@@ -813,29 +1032,48 @@ class HttpInterface(StorageInterface):
     )
     if prefix and prefix[0] == '/':
       prefix = prefix[1:]
-    if prefix and prefix[-1] != '/':
-      prefix += '/'
     headers = self.default_headers()
-    @retry
+    @retry_if_not(AuthorizationError)
     def request(token):
       nonlocal headers
+      params = {}
+      if prefix:
+        params["prefix"] = prefix
+      if token is not None:
+        params["pageToken"] = token
+      if flat:
+        params["delimiter"] = '/'
       results = self.session.get(
         f"https://storage.googleapis.com/storage/v1/b/{bucket}/o",
-        params={ "prefix": prefix, "pageToken": token },
+        params=params,
         headers=headers,
       )
+      if results.status_code in [401,403]:
+        raise AuthorizationError(f"http {results.status_code}")
       results.raise_for_status()
       results.close()
       return results.json()
+    strip = posixpath.dirname(prefix)
+    if strip and strip[-1] != '/':
+      strip += '/'
     token = None
     while True:
       results = request(token)
-      for res in results["items"]:
-        yield res["name"].replace(prefix, "", 1)
+      if 'prefixes' in results:
+        yield from (
+          item.removeprefix(strip)
+          for item in results["prefixes"]
+        )
+      for res in results.get("items", []):
+        yield res["name"].removeprefix(strip)
       token = results.get("nextPageToken", None)
       if token is None:
@@ -887,13 +1125,15 @@ class HttpInterface(StorageInterface):
   def list_files(self, prefix, flat=False):
     if self._path.host == "https://storage.googleapis.com":
       yield from self._list_files_google(prefix, flat)
+      return
     url = posixpath.join(self._path.host, self._path.path, prefix)
     resp = requests.head(url)
     server = resp.headers.get("Server", "").lower()
     if 'apache' in server:
       yield from self._list_files_apache(prefix, flat)
+      return
     else:
       raise NotImplementedError()
@@ -963,7 +1203,7 @@ class S3Interface(StorageInterface):
     elif compress in ("xz", "lzma"):
       attrs['ContentEncoding'] = 'xz'
     elif compress in ("bzip2", "bz2"):
-      attrs['ContentEncoding'] = 'bz2'
+      attrs['ContentEncoding'] = 'bzip2'
     elif compress:
       raise ValueError("Compression type {} not supported.".format(compress))
@@ -972,10 +1212,17 @@ class S3Interface(StorageInterface):
     if storage_class:
       attrs['StorageClass'] = storage_class
-    multipart = hasattr(content, "read") and hasattr(content, "seek")
+    multipart = False
+    is_file_handle = hasattr(content, "read") and hasattr(content, "seek")
+    if is_file_handle:
+      content_length = os.fstat(content.fileno()).st_size
+    else:
+      content_length = len(content)
-    if not multipart and len(content) > int(self.composite_upload_threshold):
-      content = BytesIO(content)
+    if not multipart and content_length > int(self.composite_upload_threshold):
+      if not is_file_handle:
+        content = BytesIO(content)
       multipart = True
     # gevent monkey patching has a bad interaction with s3's use
@@ -985,32 +1232,55 @@ class S3Interface(StorageInterface):
       multipart = False
       content = content.read()
+    # WMS 2025-07-05:
+    # Currently, boto3 does not properly support streaming smaller files.
+    # It uses an S3 API that requires a checksum up-front, but streaming
+    # checksums can only be provided at the end.
+    # https://github.com/boto/boto3/issues/3738
+    # https://github.com/boto/boto3/issues/4392
+    # https://docs.aws.amazon.com/sdkref/latest/guide/feature-dataintegrity.html
+    if not multipart and is_file_handle and content_length < int(self.composite_upload_threshold):
+      content = content.read()
     if multipart:
       self._conn.upload_fileobj(content, self._path.bucket, key, ExtraArgs=attrs)
     else:
+      if isinstance(content, str):
+        content = content.encode('utf8')
       attrs['Bucket'] = self._path.bucket
       attrs['Body'] = content
       attrs['Key'] = key
-      attrs['ContentMD5'] = md5(content)
+      attrs["ChecksumCRC32C"] = encode_crc32c_b64(content).decode('utf8')
       self._conn.put_object(**attrs)
   @retry
-  def copy_file(self, src_path, dest_bucket_name, dest_key):
+  def copy_file(self, src_path, dest_bucket_name, dest_key) -> tuple[bool,int]:
     key = self.get_path_to_file(src_path)
-    dest_bucket = self._get_bucket(dest_bucket_name)
+    s3client = self._get_bucket(dest_bucket_name)
     copy_source = {
       'Bucket': self._path.bucket,
       'Key': key,
     }
     try:
-      dest_bucket.copy(CopySource=copy_source, Bucket=dest_bucket_name, Key=dest_key)
+      response = s3client.copy_object(
+          CopySource=copy_source,
+          Bucket=dest_bucket_name,
+          Key=dest_key,
+          MetadataDirective='COPY'  # Ensure metadata like Content-Encoding is copied
+      )
     except botocore.exceptions.ClientError as err:
       if err.response['Error']['Code'] in ('NoSuchKey', '404'):
-        return False
+        return (False, 0)
       else:
         raise
-    return True
+    try:
+      num_bytes = int(response["ResponseMetadata"]["HTTPHeaders"]["content-length"])
+    except KeyError:
+      num_bytes = 0
+    return (True, num_bytes)
   @retry
   def get_file(self, file_path, start=None, end=None, part_size=None):
@@ -1038,6 +1308,11 @@ class S3Interface(StorageInterface):
       if 'ContentEncoding' in resp:
         encoding = resp['ContentEncoding']
+      encoding = ",".join([
+        enc for enc in encoding.split(",")
+        if enc != "aws-chunked"
+      ])
       # s3 etags return hex digests but we need the base64 encoding
       # to make uniform comparisons.
       # example s3 etag: "31ee76261d87fed8cb9d4c465c48158c"
@@ -1065,6 +1340,44 @@ class S3Interface(StorageInterface):
       else:
         raise
+  @retry
+  def save_file(self, src, dest, resumable) -> tuple[bool,int]:
+    key = self.get_path_to_file(src)
+    kwargs = self._additional_attrs.copy()
+    resp = self.head(src)
+    if resp is None:
+      return (False, 0)
+    mkdir(os.path.dirname(dest))
+    encoding = resp.get("Content-Encoding", "") or ""
+    encoding = ",".join([
+      enc for enc in encoding.split(",")
+      if enc != "aws-chunked"
+    ])
+    ext = FileInterface.get_extension(encoding)
+    if not dest.endswith(ext):
+      dest += ext
+    try:
+      self._conn.download_file(
+        Bucket=self._path.bucket,
+        Key=key,
+        Filename=dest,
+        **kwargs
+      )
+    except botocore.exceptions.ClientError as err:
+      if err.response['Error']['Code'] in ('NoSuchKey', '404'):
+        return (False, 0)
+      else:
+        raise
+    num_bytes = os.path.getsize(dest)
+    return (True, num_bytes)
   @retry
   def head(self, file_path):
     try:
@@ -1073,6 +1386,11 @@ class S3Interface(StorageInterface):
         Key=self.get_path_to_file(file_path),
         **self._additional_attrs,
       )
+      encoding = response.get("ContentEncoding", None)
+      if encoding == '':
+        encoding = None
       return {
         "Cache-Control": response.get("CacheControl", None),
         "Content-Length": response.get("ContentLength", None),
@@ -1080,7 +1398,7 @@ class S3Interface(StorageInterface):
         "ETag": response.get("ETag", None),
         "Last-Modified": response.get("LastModified", None),
         "Content-Md5": response["ResponseMetadata"]["HTTPHeaders"].get("content-md5", None),
-        "Content-Encoding": response.get("ContentEncoding", None),
+        "Content-Encoding": encoding,
         "Content-Disposition": response.get("ContentDisposition", None),
         "Content-Language": response.get("ContentLanguage", None),
         "Storage-Class": response.get("StorageClass", None),
@@ -1171,7 +1489,7 @@ class S3Interface(StorageInterface):
     path = posixpath.join(layer_path, prefix)
     @retry
-    def s3lst(continuation_token=None):
+    def s3lst(path, continuation_token=None):
       kwargs = {
         'Bucket': self._path.bucket,
         'Prefix': path,
@@ -1185,31 +1503,89 @@ class S3Interface(StorageInterface):
       return self._conn.list_objects_v2(**kwargs)
-    resp = s3lst()
+    resp = s3lst(path)
+    # the case where the prefix is something like "build", but "build" is a subdirectory
+    # so requery with "build/" to get the proper behavior
+    if (
+      flat
+      and path
+      and path[-1] != '/'
+      and 'Contents' not in resp
+      and len(resp.get("CommonPrefixes", [])) == 1
+    ):
+      path += '/'
+      resp = s3lst(path)
     def iterate(resp):
+      if 'CommonPrefixes' in resp.keys():
+        yield from [
+          item["Prefix"].removeprefix(layer_path)
+          for item in resp['CommonPrefixes']
+        ]
       if 'Contents' not in resp.keys():
         resp['Contents'] = []
       for item in resp['Contents']:
         key = item['Key']
-        filename = key.replace(layer_path, '')
+        filename = key.removeprefix(layer_path)
         if filename == '':
           continue
         elif not flat and filename[-1] != '/':
           yield filename
-        elif flat and '/' not in key.replace(path, ''):
+        elif flat and '/' not in key.removeprefix(path):
           yield filename
     for filename in iterate(resp):
       yield filename
     while resp['IsTruncated'] and resp['NextContinuationToken']:
-      resp = s3lst(resp['NextContinuationToken'])
+      resp = s3lst(path, resp['NextContinuationToken'])
       for filename in iterate(resp):
         yield filename
+  def subtree_size(self, prefix:str = "") -> tuple[int,int]:
+    layer_path = self.get_path_to_file("")
+    path = posixpath.join(layer_path, prefix)
+    @retry
+    def s3lst(path, continuation_token=None):
+      kwargs = {
+        'Bucket': self._path.bucket,
+        'Prefix': path,
+        **self._additional_attrs
+      }
+      if continuation_token:
+        kwargs['ContinuationToken'] = continuation_token
+      return self._conn.list_objects_v2(**kwargs)
+    resp = s3lst(path)
+    def iterate(resp):
+      if 'Contents' not in resp.keys():
+        resp['Contents'] = []
+      for item in resp['Contents']:
+        yield item.get('Size', 0)
+    total_bytes = 0
+    total_files = 0
+    for num_bytes in iterate(resp):
+      total_files += 1
+      total_bytes += num_bytes
+    while resp['IsTruncated'] and resp['NextContinuationToken']:
+      resp = s3lst(path, resp['NextContinuationToken'])
+      for num_bytes in iterate(resp):
+        total_files += 1
+        total_bytes += num_bytes
+    return (total_files, total_bytes)
   def release_connection(self):
     global S3_POOL
     service = self._path.alias or 's3'
@@ -1224,8 +1600,25 @@ class CaveInterface(HttpInterface):
   is, don't worry about it.
   see: https://github.com/CAVEconnectome
   """
-  def default_headers(self):
-    cred = cave_credentials()
+  def __init__(self, path, secrets=None, **kwargs):
+    super().__init__(path, secrets=secrets, **kwargs)
+    secrets = kwargs.get('secrets', None)
+    if secrets is None:
+      secrets = {}
+    self._token = secrets.get('token', None)
+    if self._token is None:
+      server = self._path.host.replace("https://", "", 1)
+      server = server.replace("http://", "", 1)
+      self._token = cave_credentials(server)
+      if self._token is not None:
+        self._token = self._token.get('token', None)
+  def default_headers(self) -> dict:
+    if self._token is None:
+      return {}
     return {
-      "Authorization": f"Bearer {cred['token']}",
-    }
+      "Authorization": f"Bearer {self._token}",
+    }

cloud-files 4.27.0__py3-none-any.whl → 6.0.0__py3-none-any.whl

cloud-files 4.27.0py3-none-any.whl → 6.0.0py3-none-any.whl