PyPI - cloud-files - Versions diffs - 4.30.1__tar.gz → 5.0.1__tar.gz - Mend

cloud-files 4.30.1tar.gz → 5.0.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

{cloud-files-4.30.1 → cloud_files-5.0.1}/.github/workflows/test-suite.yml RENAMED Viewed

@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
     steps:
     - uses: actions/checkout@v2

{cloud-files-4.30.1 → cloud_files-5.0.1}/ChangeLog RENAMED Viewed

@@ -1,10 +1,43 @@
 CHANGES
 =======
+5.0.1
+-----
+* fix: prevent aws-chunked from populating in Content-Encoding (#109)
+* fix: add head implementation for MemoryInterface
+* fix+test: check that content encoding is transferred correctly
+5.0.0
+-----
+* feat: efficient saving to disk (#108)
+* install: set minimum version to py39
+* ci: drop py38, add py313
+* fix: strip 'aws-chunked' from s3 encodings
+* fix: add no\_sign\_request for s3 listing
+* fix: prefix logic for no-auth gcs
+* fix: list files google http
+* feat(cli): add no-auth flag to ls
+* fix: abort auth error in list files (http, google)
+* fix: make s3 listing consistent with file and mem
+* fix(list): memory and files interface list flat more consistently
+* test: make flat more consistent in list\_files
+* fix: replaceprefix -> removeprefix
+* fix: aws-chunked does not affect byte encoding
+* fix: harmonizing definition of flat across interfaces
+* feat: adding (broken) support for listing common prefixes
+* refactor: use same pattern for removeprefix
+* fix: make "flat" listing work for s3
 4.30.1
 ------
 * fix(gcs): don't double compress when uploading to gcs w/ composite
+4.30.0
+------
 * redesign: normalize cloudpaths so file:// isn't required
 4.29.0

{cloud-files-4.30.1 → cloud_files-5.0.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cloud-files
-Version: 4.30.1
+Version: 5.0.1
 Summary: Fast access to cloud storage and local FS.
 Home-page: https://github.com/seung-lab/cloud-files/
 Author: William Silversmith
@@ -10,13 +10,13 @@ Classifier: Intended Audience :: Developers
 Classifier: Development Status :: 4 - Beta
 Classifier: License :: OSI Approved :: BSD License
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.7
-Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
-Requires-Python: >=3.7,<4.0
+Requires-Python: >=3.9,<4.0
 Description-Content-Type: text/markdown
 License-File: LICENSE
 License-File: AUTHORS

{cloud-files-4.30.1 → cloud_files-5.0.1}/automated_test.py RENAMED Viewed

@@ -376,12 +376,12 @@ def test_list(s3, protocol):
   assert set(cf.list(prefix='nofolder/')) == set([])
   # Tests (1)
-  assert set(cf.list(prefix='', flat=True)) == set(['info1','info2','info5','info.txt'])
+  assert set(cf.list(prefix='', flat=True)) == set(['info1','info2','info5','info.txt', 'build/', 'level1/'])
   assert set(cf.list(prefix='inf', flat=True)) == set(['info1','info2','info5','info.txt'])
   # Tests (2)
-  assert set(cf.list(prefix='build', flat=True)) == set([])
+  assert set(cf.list(prefix='build', flat=True)) == set(['build/info3'])
   # Tests (3)
-  assert set(cf.list(prefix='level1/', flat=True)) == set([])
+  assert set(cf.list(prefix='level1/', flat=True)) == set(['level1/level2/'])
   assert set(cf.list(prefix='build/', flat=True)) == set(['build/info3'])
   # Tests (4)
   assert set(cf.list(prefix='build/inf', flat=True)) == set(['build/info3'])
@@ -670,11 +670,13 @@ def test_transfer_semantics(s3, compression, src_protocol, dest_protocol, allow_
   cff.transfer_to(cfm.cloudpath, allow_missing=allow_missing)
   assert sorted(list(cfm)) == sorted([ str(i) for i in range(N) ])
   assert [ f['content'] for f in cfm[:] ] == [ content ] * N
+  assert cfm.head("1")["Content-Encoding"] == cff.head("1")["Content-Encoding"]
   cfm.delete(list(cfm))
   cff.transfer_to(cfm.cloudpath, reencode='br', allow_missing=allow_missing)
   assert sorted(list(cfm)) == sorted([ str(i) for i in range(N) ])
   assert [ f['content'] for f in cfm[:] ] == [ content ] * N
+  assert 'br' in cfm.head("1")["Content-Encoding"]
   if dest_protocol == "mem":
     data = cfm._get_connection()._data

{cloud-files-4.30.1 → cloud_files-5.0.1}/cloud_files.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cloud-files
-Version: 4.30.1
+Version: 5.0.1
 Summary: Fast access to cloud storage and local FS.
 Home-page: https://github.com/seung-lab/cloud-files/
 Author: William Silversmith
@@ -10,13 +10,13 @@ Classifier: Intended Audience :: Developers
 Classifier: Development Status :: 4 - Beta
 Classifier: License :: OSI Approved :: BSD License
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.7
-Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
-Requires-Python: >=3.7,<4.0
+Requires-Python: >=3.9,<4.0
 Description-Content-Type: text/markdown
 License-File: LICENSE
 License-File: AUTHORS

cloud_files-5.0.1/cloud_files.egg-info/pbr.json ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"git_version": "4c96852", "is_release": true}

{cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles/cloudfiles.py RENAMED Viewed

@@ -743,9 +743,12 @@ class CloudFiles:
         return True
     elif prefix[-1] == "/":
       return True
-    res = first(self.list(prefix=prefix))
-    return res is not None
+    try:
+      res = first(self.list(prefix=prefix))
+      return res is not None
+    except NotImplementedError as err:
+      res = CloudFile(self.cloudpath).size()
+      return res > 0
   def exists(
     self, paths:GetPathType,
@@ -1001,6 +1004,7 @@ class CloudFiles:
     content_type:Optional[str] = None,
     allow_missing:bool = False,
     progress:Optional[bool] = None,
+    resumable:bool = False,
   ) -> None:
     """
     Transfer all files from this CloudFiles storage
@@ -1035,6 +1039,11 @@ class CloudFiles:
       as '' (None), 'gzip', 'br', 'zstd'
     content_type: if provided, set the Content-Type header
       on the upload. This is necessary for e.g. file->cloud
+    resumable: for remote->file downloads, download to a .part
+      file and rename it when the download completes. If the
+      download does not complete, it can be resumed. Only
+      supported for https->file currently.
     """
     if isinstance(cf_dest, str):
       cf_dest = CloudFiles(
@@ -1046,7 +1055,7 @@ class CloudFiles:
       self, paths, block_size,
       reencode, content_type,
       allow_missing,
-      progress,
+      progress, resumable,
     )
   def transfer_from(
@@ -1058,6 +1067,7 @@ class CloudFiles:
     content_type:Optional[str] = None,
     allow_missing:bool = False,
     progress:Optional[bool] = None,
+    resumable:bool = False,
   ) -> None:
     """
     Transfer all files from the source CloudFiles storage
@@ -1092,6 +1102,10 @@ class CloudFiles:
       as '' (None), 'gzip', 'br', 'zstd'
     content_type: if provided, set the Content-Type header
       on the upload. This is necessary for e.g. file->cloud
+    resumable: for remote->file downloads, download to a .part
+      file and rename it when the download completes. If the
+      download does not complete, it can be resumed. Only
+      supported for https->file currently.
     """
     if isinstance(cf_src, str):
       cf_src = CloudFiles(
@@ -1122,6 +1136,16 @@ class CloudFiles:
           cf_src, self, paths, total,
           pbar, block_size, allow_missing
         )
+      elif (
+        cf_src.protocol != "file"
+        and self.protocol == "file"
+        and reencode is None
+      ):
+        self.__transfer_remote_to_file(
+          cf_src, self, paths, total,
+          pbar, block_size, content_type,
+          allow_missing, resumable,
+        )
       elif (
         cf_src.protocol == "file"
         and self.protocol != "file"
@@ -1237,6 +1261,38 @@ class CloudFiles:
       pbar.update(1)
+  def __transfer_remote_to_file(
+    self, cf_src, cf_dest, paths,
+    total, pbar, block_size, content_type,
+    allow_missing, resumable,
+  ):
+    def thunk_save(key):
+      with cf_src._get_connection() as conn:
+        if isinstance(key, dict):
+          dest_key = key.get("dest_path", key["path"])
+          src_key = key["path"]
+        else:
+          src_key = key
+          dest_key = key
+        dest_key = os.path.join(cf_dest._path.path, dest_key)
+        found = conn.save_file(src_key, dest_key, resumable=resumable)
+      if found == False and not allow_missing:
+        raise FileNotFoundError(src_key)
+      return int(found)
+    results = schedule_jobs(
+      fns=( partial(thunk_save, path) for path in paths ),
+      progress=pbar,
+      concurrency=self.num_threads,
+      total=totalfn(paths, total),
+      green=self.green,
+      count_return=True,
+    )
+    return len(results)
   def __transfer_file_to_remote(
     self, cf_src, cf_dest, paths,
     total, pbar, block_size, content_type,

{cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles/exceptions.py RENAMED Viewed

@@ -14,6 +14,10 @@ class CompressionError(Exception):
   """
   pass
+class AuthorizationError(Exception):
+  """Authorization Error"""
+  pass
 class UnsupportedCompressionType(Exception):
   """
   Raised when attempting to use a compression type which is unsupported

{cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles/interfaces.py RENAMED Viewed

@@ -11,7 +11,6 @@ import re
 import boto3
 import botocore
 import gevent.monkey
-from glob import glob
 import google.cloud.exceptions
 from google.cloud.storage import Batch, Client
 import requests
@@ -22,8 +21,8 @@ import fasteners
 from .compression import COMPRESSION_TYPES
 from .connectionpools import S3ConnectionPool, GCloudBucketPool, MemoryPool, MEMORY_DATA
-from .exceptions import MD5IntegrityError, CompressionError
-from .lib import mkdir, sip, md5, validate_s3_multipart_etag
+from .exceptions import MD5IntegrityError, CompressionError, AuthorizationError
+from .lib import mkdir, sip, md5, encode_crc32c_b64, validate_s3_multipart_etag
 from .secrets import (
   http_credentials,
   cave_credentials,
@@ -339,7 +338,7 @@ class FileInterface(StorageInterface):
     """
     layer_path = self.get_path_to_file("")
-    path = os.path.join(layer_path, prefix) + '*'
+    path = os.path.join(layer_path, prefix)
     filenames = []
@@ -348,17 +347,33 @@ class FileInterface(StorageInterface):
       remove += os.path.sep
     if flat:
-      for file_path in glob(path):
-        if not os.path.isfile(file_path):
+      if os.path.isdir(path):
+        list_path = path
+        list_prefix = ''
+        prepend_prefix = prefix
+        if prepend_prefix and prepend_prefix[-1] != os.path.sep:
+          prepend_prefix += os.path.sep
+      else:
+        list_path = os.path.dirname(path)
+        list_prefix = os.path.basename(prefix)
+        prepend_prefix = os.path.dirname(prefix)
+        if prepend_prefix != '':
+          prepend_prefix += os.path.sep
+      for fobj in os.scandir(list_path):
+        if list_prefix != '' and not fobj.name.startswith(list_prefix):
           continue
-        filename = file_path.replace(remove, '')
-        filenames.append(filename)
+        if fobj.is_dir():
+          filenames.append(f"{prepend_prefix}{fobj.name}{os.path.sep}")
+        else:
+          filenames.append(f"{prepend_prefix}{fobj.name}")
     else:
       subdir = os.path.join(layer_path, os.path.dirname(prefix))
       for root, dirs, files in os.walk(subdir):
-        files = [ os.path.join(root, f) for f in files ]
-        files = [ f.replace(remove, '') for f in files ]
-        files = [ f for f in files if f[:len(prefix)] == prefix ]
+        files = ( os.path.join(root, f) for f in files )
+        files = ( f.removeprefix(remove) for f in files )
+        files = ( f for f in files if f[:len(prefix)] == prefix )
         for filename in files:
           filenames.append(filename)
@@ -452,8 +467,60 @@ class MemoryInterface(StorageInterface):
       result = result[slice(start, end)]
     return (result, encoding, None, None)
+  def save_file(self, src, dest, resumable):
+    key = self.get_path_to_file(src)
+    with EXT_TEST_SEQUENCE_LOCK:
+      exts = list(EXT_TEST_SEQUENCE)
+      exts = [ x[0] for x in exts ]
+    path = key
+    true_ext = ''
+    for ext in exts:
+      pathext = key + ext
+      if pathext in self._data:
+        path = pathext
+        true_ext = ext
+        break
+    filepath = os.path.join(dest, os.path.basename(path))
+    mkdir(os.path.dirname(dest))
+    try:
+      with open(dest + true_ext, "wb") as f:
+        f.write(self._data[path])
+    except KeyError:
+      return False
+    return True
   def head(self, file_path):
-    raise NotImplementedError()
+    path = self.get_path_to_file(file_path)
+    data = None
+    encoding = ''
+    with EXT_TEST_SEQUENCE_LOCK:
+      for ext, enc in EXT_TEST_SEQUENCE:
+        pathext = path + ext
+        if pathext in self._data:
+          data = self._data[pathext]
+          encoding = enc
+          break
+    return {
+      "Cache-Control": None,
+      "Content-Length": len(data),
+      "Content-Type": None,
+      "ETag": None,
+      "Last-Modified": None,
+      "Content-Md5": None,
+      "Content-Encoding": encoding,
+      "Content-Disposition": None,
+      "Content-Language": None,
+      "Storage-Class": None,
+      "Request-Charged": None,
+      "Parts-Count": None,
+    }
   def size(self, file_path):
     path = self.get_path_to_file(file_path)
@@ -520,11 +587,22 @@ class MemoryInterface(StorageInterface):
     if len(remove) and remove[-1] != '/':
       remove += '/'
-    filenames = [ f.replace(remove, '') for f in self._data ]
-    filenames = [ f for f in filenames if f[:len(prefix)] == prefix ]
+    filenames = ( f.removeprefix(remove) for f in self._data )
+    filenames = ( f for f in filenames if f[:len(prefix)] == prefix )
     if flat:
-      filenames = [ f for f in filenames if '/' not in f.replace(prefix, '') ]
+      tmp = []
+      for f in filenames:
+        elems = f.removeprefix(prefix).split('/')
+        if len(elems) > 1 and elems[0] == '':
+          elems.pop(0)
+          elems[0] = f'/{elems[0]}'
+        if len(elems) > 1:
+          tmp.append(f"{prefix}{elems[0]}/")
+        else:
+          tmp.append(f"{prefix}{elems[0]}")
+      filenames = tmp
     def stripext(fname):
       (base, ext) = os.path.splitext(fname)
@@ -624,6 +702,25 @@ class GoogleCloudStorageInterface(StorageInterface):
     return (content, blob.content_encoding, hash_value, hash_type)
+  @retry
+  def save_file(self, src, dest, resumable):
+    key = self.get_path_to_file(src)
+    blob = self._bucket.blob(key)
+    try:
+      blob.download_to_filename(
+        filename=dest,
+        raw_download=True,
+        checksum=None
+      )
+    except google.cloud.exceptions.NotFound:
+      return False
+    ext = FileInterface.get_extension(blob.content_encoding)
+    if not dest.endswith(ext):
+      os.rename(dest, dest + ext)
+    return True
   @retry_if_not(google.cloud.exceptions.NotFound)
   def head(self, file_path):
     key = self.get_path_to_file(file_path)
@@ -711,13 +808,24 @@ class GoogleCloudStorageInterface(StorageInterface):
     path = posixpath.join(layer_path, prefix)
     delimiter = '/' if flat else None
-    for blob in self._bucket.list_blobs(prefix=path, delimiter=delimiter):
-      filename = blob.name.replace(layer_path, '')
+    blobs = self._bucket.list_blobs(
+      prefix=path,
+      delimiter=delimiter,
+    )
+    if blobs.prefixes:
+      yield from (
+        item.removeprefix(path)
+        for item in blobs.prefixes
+      )
+    for blob in blobs:
+      filename = blob.name.removeprefix(layer_path)
       if not filename:
         continue
       elif not flat and filename[-1] != '/':
         yield filename
-      elif flat and '/' not in blob.name.replace(path, ''):
+      elif flat and '/' not in blob.name.removeprefix(path):
         yield filename
   def release_connection(self):
@@ -803,6 +911,43 @@ class HttpInterface(StorageInterface):
     return (resp.content, content_encoding, None, None)
+  @retry
+  def save_file(self, src, dest, resumable):
+    key = self.get_path_to_file(src)
+    headers = self.head(src)
+    content_encoding = headers.get('Content-Encoding', None)
+    try:
+      ext = FileInterface.get_extension(content_encoding)
+    except ValueError:
+      ext = ""
+    fulldest = dest + ext
+    partname = fulldest
+    if resumable:
+      partname += ".part"
+    downloaded_size = 0
+    if resumable and os.path.exists(partname):
+      downloaded_size = os.path.getsize(partname)
+    range_headers = { "Range": f"bytes={downloaded_size}-" }
+    with self.session.get(key, headers=range_headers, stream=True) as resp:
+      if resp.status_code not in [200, 206]:
+        resp.raise_for_status()
+        return False
+      with open(partname, 'ab') as f:
+        for chunk in resp.iter_content(chunk_size=int(10e6)):
+          f.write(chunk)
+    if resumable:
+      os.rename(partname, fulldest)
+    return True
   @retry
   def exists(self, file_path):
     key = self.get_path_to_file(file_path)
@@ -821,29 +966,49 @@ class HttpInterface(StorageInterface):
     )
     if prefix and prefix[0] == '/':
       prefix = prefix[1:]
-    if prefix and prefix[-1] != '/':
-      prefix += '/'
     headers = self.default_headers()
-    @retry
+    @retry_if_not(AuthorizationError)
     def request(token):
       nonlocal headers
+      params = {}
+      if prefix:
+        params["prefix"] = prefix
+      if token is not None:
+        params["pageToken"] = token
+      if flat:
+        params["delimiter"] = '/'
       results = self.session.get(
         f"https://storage.googleapis.com/storage/v1/b/{bucket}/o",
-        params={ "prefix": prefix, "pageToken": token },
+        params=params,
         headers=headers,
       )
+      if results.status_code in [401,403]:
+        raise AuthorizationError(f"http {results.status_code}")
       results.raise_for_status()
       results.close()
       return results.json()
+    strip = posixpath.dirname(prefix)
+    if strip and strip[-1] != '/':
+      strip += '/'
     token = None
     while True:
       results = request(token)
-      for res in results["items"]:
-        yield res["name"].replace(prefix, "", 1)
+      if 'prefixes' in results:
+        yield from (
+          item.removeprefix(strip)
+          for item in results["prefixes"]
+        )
+      for res in results.get("items", []):
+        print(res["name"])
+        yield res["name"].removeprefix(strip)
       token = results.get("nextPageToken", None)
       if token is None:
@@ -895,13 +1060,15 @@ class HttpInterface(StorageInterface):
   def list_files(self, prefix, flat=False):
     if self._path.host == "https://storage.googleapis.com":
       yield from self._list_files_google(prefix, flat)
+      return
     url = posixpath.join(self._path.host, self._path.path, prefix)
     resp = requests.head(url)
     server = resp.headers.get("Server", "").lower()
     if 'apache' in server:
       yield from self._list_files_apache(prefix, flat)
+      return
     else:
       raise NotImplementedError()
@@ -971,7 +1138,7 @@ class S3Interface(StorageInterface):
     elif compress in ("xz", "lzma"):
       attrs['ContentEncoding'] = 'xz'
     elif compress in ("bzip2", "bz2"):
-      attrs['ContentEncoding'] = 'bz2'
+      attrs['ContentEncoding'] = 'bzip2'
     elif compress:
       raise ValueError("Compression type {} not supported.".format(compress))
@@ -995,23 +1162,39 @@ class S3Interface(StorageInterface):
     if multipart:
       self._conn.upload_fileobj(content, self._path.bucket, key, ExtraArgs=attrs)
+      # upload_fileobj will add 'aws-chunked' to the ContentEncoding,
+      # which after it finishes uploading is useless and messes up our
+      # software. Therefore, edit the metadata and replace it (but this incurs
+      # 2x class-A...)
+      self._conn.copy_object(
+        Bucket=self._path.bucket,
+        Key=key,
+        CopySource={'Bucket': self._path.bucket, 'Key': key},
+        MetadataDirective="REPLACE",
+        **attrs
+      )
     else:
       attrs['Bucket'] = self._path.bucket
       attrs['Body'] = content
       attrs['Key'] = key
-      attrs['ContentMD5'] = md5(content)
+      attrs["ChecksumCRC32C"] = str(encode_crc32c_b64(content))
       self._conn.put_object(**attrs)
   @retry
   def copy_file(self, src_path, dest_bucket_name, dest_key):
     key = self.get_path_to_file(src_path)
-    dest_bucket = self._get_bucket(dest_bucket_name)
+    s3client = self._get_bucket(dest_bucket_name)
     copy_source = {
       'Bucket': self._path.bucket,
       'Key': key,
     }
     try:
-      dest_bucket.copy(CopySource=copy_source, Bucket=dest_bucket_name, Key=dest_key)
+      s3client.copy_object(
+          CopySource=copy_source,
+          Bucket=dest_bucket_name,
+          Key=dest_key,
+          MetadataDirective='COPY'  # Ensure metadata like Content-Encoding is copied
+      )
     except botocore.exceptions.ClientError as err:
       if err.response['Error']['Code'] in ('NoSuchKey', '404'):
         return False
@@ -1046,6 +1229,11 @@ class S3Interface(StorageInterface):
       if 'ContentEncoding' in resp:
         encoding = resp['ContentEncoding']
+      encoding = ",".join([
+        enc for enc in encoding.split(",")
+        if enc != "aws-chunked"
+      ])
       # s3 etags return hex digests but we need the base64 encoding
       # to make uniform comparisons.
       # example s3 etag: "31ee76261d87fed8cb9d4c465c48158c"
@@ -1073,6 +1261,43 @@ class S3Interface(StorageInterface):
       else:
         raise
+  @retry
+  def save_file(self, src, dest, resumable):
+    key = self.get_path_to_file(src)
+    kwargs = self._additional_attrs.copy()
+    resp = self.head(src)
+    if resp is None:
+      return False
+    mkdir(os.path.dirname(dest))
+    encoding = resp.get("Content-Encoding", "") or ""
+    encoding = ",".join([
+      enc for enc in encoding.split(",")
+      if enc != "aws-chunked"
+    ])
+    ext = FileInterface.get_extension(encoding)
+    if not dest.endswith(ext):
+      dest += ext
+    try:
+      self._conn.download_file(
+        Bucket=self._path.bucket,
+        Key=key,
+        Filename=dest,
+        **kwargs
+      )
+    except botocore.exceptions.ClientError as err:
+      if err.response['Error']['Code'] in ('NoSuchKey', '404'):
+        return False
+      else:
+        raise
+    return True
   @retry
   def head(self, file_path):
     try:
@@ -1081,6 +1306,11 @@ class S3Interface(StorageInterface):
         Key=self.get_path_to_file(file_path),
         **self._additional_attrs,
       )
+      encoding = response.get("ContentEncoding", None)
+      if encoding == '':
+        encoding = None
       return {
         "Cache-Control": response.get("CacheControl", None),
         "Content-Length": response.get("ContentLength", None),
@@ -1088,7 +1318,7 @@ class S3Interface(StorageInterface):
         "ETag": response.get("ETag", None),
         "Last-Modified": response.get("LastModified", None),
         "Content-Md5": response["ResponseMetadata"]["HTTPHeaders"].get("content-md5", None),
-        "Content-Encoding": response.get("ContentEncoding", None),
+        "Content-Encoding": encoding,
         "Content-Disposition": response.get("ContentDisposition", None),
         "Content-Language": response.get("ContentLanguage", None),
         "Storage-Class": response.get("StorageClass", None),
@@ -1179,7 +1409,7 @@ class S3Interface(StorageInterface):
     path = posixpath.join(layer_path, prefix)
     @retry
-    def s3lst(continuation_token=None):
+    def s3lst(path, continuation_token=None):
       kwargs = {
         'Bucket': self._path.bucket,
         'Prefix': path,
@@ -1193,27 +1423,44 @@ class S3Interface(StorageInterface):
       return self._conn.list_objects_v2(**kwargs)
-    resp = s3lst()
+    resp = s3lst(path)
+    # the case where the prefix is something like "build", but "build" is a subdirectory
+    # so requery with "build/" to get the proper behavior
+    if (
+      flat
+      and path
+      and path[-1] != '/'
+      and 'Contents' not in resp
+      and len(resp.get("CommonPrefixes", [])) == 1
+    ):
+      path += '/'
+      resp = s3lst(path)
     def iterate(resp):
+      if 'CommonPrefixes' in resp.keys():
+        yield from [
+          item["Prefix"].removeprefix(layer_path)
+          for item in resp['CommonPrefixes']
+        ]
       if 'Contents' not in resp.keys():
         resp['Contents'] = []
       for item in resp['Contents']:
         key = item['Key']
-        filename = key.replace(layer_path, '')
+        filename = key.removeprefix(layer_path)
         if filename == '':
           continue
         elif not flat and filename[-1] != '/':
           yield filename
-        elif flat and '/' not in key.replace(path, ''):
+        elif flat and '/' not in key.removeprefix(path):
           yield filename
     for filename in iterate(resp):
       yield filename
     while resp['IsTruncated'] and resp['NextContinuationToken']:
-      resp = s3lst(resp['NextContinuationToken'])
+      resp = s3lst(path, resp['NextContinuationToken'])
       for filename in iterate(resp):
         yield filename

{cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles/lib.py RENAMED Viewed

@@ -153,6 +153,11 @@ def decode_crc32c_b64(b64digest):
   # !I means network order (big endian) and unsigned int
   return struct.unpack("!I", base64.b64decode(b64digest))[0]
+def encode_crc32c_b64(binary):
+  val = crc32c(binary)
+  val = val.to_bytes(4, 'big')
+  return base64.b64encode(val)
 def crc32c(binary):
   """
   Computes the crc32c of a binary string

{cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles_cli/cloudfiles_cli.py RENAMED Viewed

@@ -83,14 +83,19 @@ def license():
     print(f.read())
 @main.command()
-@click.option('--shortpath', is_flag=True, default=False, help='Don\'t print the common base path for each listed path.')
-@click.option('--flat', is_flag=True, default=False, help='Only produce a single level of directory hierarchy.')
-@click.option('-e','--expr',is_flag=True, default=False, help='Use a limited regexp language (e.g. [abc123]\{3\}) to generate prefixes.')
+@click.option('--shortpath', is_flag=True, default=False, help='Don\'t print the common base path for each listed path.',show_default=True)
+@click.option('--flat', is_flag=True, default=False, help='Only produce a single level of directory hierarchy.',show_default=True)
+@click.option('-e','--expr',is_flag=True, default=False, help='Use a limited regexp language (e.g. [abc123]\{3\}) to generate prefixes.', show_default=True)
+@click.option('--no-auth',is_flag=True, default=False, help='Uses the http API for read-only operations.', show_default=True)
 @click.argument("cloudpath")
-def ls(shortpath, flat, expr, cloudpath):
+def ls(shortpath, flat, expr, cloudpath, no_auth):
   """Recursively lists the contents of a directory."""
   cloudpath = normalize_path(cloudpath)
+  no_sign_request = no_auth # only affects s3
+  if no_auth and 's3://' not in cloudpath:
+    cloudpath = cloudfiles.paths.to_https_protocol(cloudpath)
   _, flt, prefix = get_mfp(cloudpath, True)
   epath = extract(cloudpath)
   if len(epath.path) > 0:
@@ -100,7 +105,7 @@ def ls(shortpath, flat, expr, cloudpath):
   flat = flat or flt
-  cf = CloudFiles(cloudpath)
+  cf = CloudFiles(cloudpath, no_sign_request=no_sign_request)
   iterables = []
   if expr:
     # TODO: make this a reality using a parser
@@ -173,11 +178,13 @@ def get_mfp(path, recursive):
 @click.option('-b', '--block-size', default=128, help="Number of files to download at a time.", show_default=True)
 @click.option('--part-bytes', default=int(1e8), help="Composite upload threshold in bytes. Splits a file into pieces for some cloud services like gs and s3.", show_default=True)
 @click.option('--no-sign-request', is_flag=True, default=False, help="Use s3 in anonymous mode (don't sign requests) for the source.", show_default=True)
+@click.option('--resumable', is_flag=True, default=False, help="http->file transfers will dowload to .part files while they are in progress.", show_default=True)
 @click.pass_context
 def cp(
   ctx, source, destination,
   recursive, compression, progress,
   block_size, part_bytes, no_sign_request,
+  resumable,
 ):
   """
   Copy one or more files from a source to destination.
@@ -194,13 +201,15 @@ def cp(
     _cp_single(
       ctx, src, destination, recursive,
       compression, progress, block_size,
-      part_bytes, no_sign_request
+      part_bytes, no_sign_request,
+      resumable,
     )
 def _cp_single(
   ctx, source, destination, recursive,
   compression, progress, block_size,
-  part_bytes, no_sign_request
+  part_bytes, no_sign_request,
+  resumable,
 ):
   use_stdin = (source == '-')
   use_stdout = (destination == '-')
@@ -210,8 +219,8 @@ def _cp_single(
   nsrc = normalize_path(source)
   ndest = normalize_path(destination)
-  issrcdir = (ispathdir(source) or CloudFiles(nsrc).isdir()) and use_stdin == False
+  issrcdir = (use_stdin == False) and (ispathdir(source) or CloudFiles(nsrc).isdir())
   isdestdir = (ispathdir(destination) or CloudFiles(ndest).isdir())
   recursive = recursive and issrcdir
@@ -267,7 +276,11 @@ def _cp_single(
   if not isinstance(xferpaths, str):
     if parallel == 1:
-      _cp(srcpath, destpath, compression, progress, block_size, part_bytes, no_sign_request, xferpaths)
+      _cp(
+        srcpath, destpath, compression,
+        progress, block_size, part_bytes,
+        no_sign_request, resumable, xferpaths
+      )
       return
     total = None
@@ -277,9 +290,12 @@ def _cp_single(
       pass
     if use_stdout:
-      fn = partial(_cp_stdout, no_sign_request, srcpath)
+      fn = partial(_cp_stdout, srcpath, no_sign_request)
     else:
-      fn = partial(_cp, srcpath, destpath, compression, False, block_size, part_bytes, no_sign_request)
+      fn = partial(
+        _cp, srcpath, destpath, compression, False,
+        block_size, part_bytes, no_sign_request, resumable
+      )
     with tqdm(desc="Transferring", total=total, disable=(not progress)) as pbar:
       with pathos.pools.ProcessPool(parallel) as executor:
@@ -309,14 +325,20 @@ def _cp_single(
     cfsrc.transfer_to(cfdest, paths=[{
       "path": xferpaths,
       "dest_path": new_path,
-    }], reencode=compression)
+    }], reencode=compression, resumable=resumable)
-def _cp(src, dst, compression, progress, block_size, part_bytes, no_sign_request, paths):
+def _cp(
+  src, dst, compression, progress,
+  block_size, part_bytes,
+  no_sign_request, resumable,
+  paths
+):
   cfsrc = CloudFiles(src, progress=progress, composite_upload_threshold=part_bytes, no_sign_request=no_sign_request)
   cfdest = CloudFiles(dst, progress=progress, composite_upload_threshold=part_bytes)
   cfsrc.transfer_to(
     cfdest, paths=paths,
-    reencode=compression, block_size=block_size
+    reencode=compression, block_size=block_size,
+    resumable=resumable,
   )
 def _cp_stdout(src, no_sign_request, paths):

{cloud-files-4.30.1 → cloud_files-5.0.1}/setup.cfg RENAMED Viewed

@@ -13,11 +13,11 @@ classifier =
 	Development Status :: 4 - Beta
 	License :: OSI Approved :: BSD License
 	Programming Language :: Python :: 3
-	Programming Language :: Python :: 3.7
-	Programming Language :: Python :: 3.8
 	Programming Language :: Python :: 3.9
 	Programming Language :: Python :: 3.10
 	Programming Language :: Python :: 3.11
+	Programming Language :: Python :: 3.12
+	Programming Language :: Python :: 3.13
 	Topic :: Software Development :: Libraries :: Python Modules
 [global]

{cloud-files-4.30.1 → cloud_files-5.0.1}/setup.py RENAMED Viewed

@@ -4,7 +4,7 @@ import sys
 setuptools.setup(
   setup_requires=['pbr'],
-  python_requires=">=3.7,<4.0",
+  python_requires=">=3.9,<4.0",
   include_package_data=True,
   entry_points={
     "console_scripts": [