PyPI - cloud-files - Versions diffs - 5.9.0__tar.gz → 6.1.0__tar.gz - Mend

cloud-files 5.9.0tar.gz → 6.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

{cloud_files-5.9.0 → cloud_files-6.1.0}/ChangeLog RENAMED Viewed

@@ -1,6 +1,17 @@
 CHANGES
 =======
+6.1.0
+-----
+* test: check that raw objects work for mem
+* feat: allow for storing python objects in mem to avoid serialization cost
+6.0.0
+-----
+* feat: add file counts to du as -N flag
 5.9.0
 -----

{cloud_files-5.9.0 → cloud_files-6.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cloud-files
-Version: 5.9.0
+Version: 6.1.0
 Summary: Fast access to cloud storage and local FS.
 Home-page: https://github.com/seung-lab/cloud-files/
 Author: William Silversmith

{cloud_files-5.9.0 → cloud_files-6.1.0}/automated_test.py RENAMED Viewed

@@ -128,6 +128,24 @@ def test_read_write(s3, protocol, num_threads, green):
   if protocol == 'file':
     rmtree(url)
+def test_read_write_py_objects_mem():
+  from cloudfiles import CloudFiles, CloudFile, exceptions
+  url = compute_url("mem", "rw")
+  cf = CloudFiles(url)
+  content = set([1,2,3])
+  cf.put('my_set', content, compress=None, raw=True, cache_control='no-cache')
+  cf['my_set2'] = content
+  f = CloudFile(cf.join(url, "my_set"))
+  assert cf.get('my_set') == content
+  assert cf['my_set2'] == content
+  assert f.get() == content
+  assert cf.get('nonexistentfile') is None
+  cf.delete(["my_set", "my_set2"])
 @pytest.mark.parametrize("protocol", ('mem', 'file', 's3'))#'gs'))
 def test_get_json_order(s3, protocol):
   from cloudfiles import CloudFiles

{cloud_files-5.9.0 → cloud_files-6.1.0}/cloud_files.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cloud-files
-Version: 5.9.0
+Version: 6.1.0
 Summary: Fast access to cloud storage and local FS.
 Home-page: https://github.com/seung-lab/cloud-files/
 Author: William Silversmith

cloud_files-6.1.0/cloud_files.egg-info/pbr.json ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"git_version": "411220d", "is_release": true}

{cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles/cloudfiles.py RENAMED Viewed

@@ -759,10 +759,14 @@ class CloudFiles:
   def put(
     self,
-    path:str, content:Union[BinaryIO,bytes],
-    content_type:str = None, compress:CompressType = None,
-    compression_level:Optional[int] = None, cache_control:Optional[str] = None,
-    raw:bool = False, storage_class:Optional[str] = None
+    path:str,
+    content:Union[BinaryIO,bytes],
+    content_type:str = None,
+    compress:CompressType = None,
+    compression_level:Optional[int] = None,
+    cache_control:Optional[str] = None,
+    raw:bool = False,
+    storage_class:Optional[str] = None
   ) -> int:
     """
     Write a single file.
@@ -1008,10 +1012,11 @@ class CloudFiles:
       return results
     return first(results.values())
-  def subtree_size(self, prefix:GetPathType = "") -> int:
+  def subtree_size(self, prefix:GetPathType = "") -> dict[str,int]:
     """High performance size calculation for directory trees."""
     prefix, return_multiple = toiter(prefix, is_iter=True)
     total_bytes = 0
+    total_files = 0
     total = totalfn(prefix, None)
@@ -1019,11 +1024,13 @@ class CloudFiles:
     def size_thunk(prefix):
       nonlocal total_bytes
+      nonlocal total_files
       nonlocal lock
       with self._get_connection() as conn:
-        subtree_bytes = conn.subtree_size(prefix)
+        subtree_files, subtree_bytes = conn.subtree_size(prefix)
         with lock:
+          total_files += subtree_files
           total_bytes += subtree_bytes
     schedule_jobs(
@@ -1034,7 +1041,10 @@ class CloudFiles:
       total=total,
     )
-    return total_bytes
+    return {
+      "N": total_files,
+      "num_bytes": total_bytes,
+    }
   @parallelize(desc="Delete")
   def delete(

{cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles/interfaces.py RENAMED Viewed

@@ -304,18 +304,21 @@ class FileInterface(StorageInterface):
     return self.io_with_lock(do_size, path, exclusive=False)
-  def subtree_size(self, prefix:str = "") -> int:
+  def subtree_size(self, prefix:str = "") -> tuple[int,int]:
     total_bytes = 0
+    total_files = 0
     subdir = self.get_path_to_file("")
     if prefix:
       subdir = os.path.join(subdir, os.path.dirname(prefix))
     for root, dirs, files in os.walk(subdir):
-      files = ( os.path.join(root, f) for f in files )
-      total_bytes += sum(( os.path.getsize(f) for f in files ))
+      for f in files:
+          path = os.path.join(root, f)
+          total_files += 1
+          total_bytes += os.path.getsize(path)
-    return total_bytes
+    return (total_files, total_bytes)
   def exists(self, file_path):
     path = self.get_path_to_file(file_path)
@@ -443,11 +446,12 @@ class MemoryInterface(StorageInterface):
     elif compress:
       raise ValueError("Compression type {} not supported.".format(compress))
-    if content \
-      and content_type \
-      and re.search('json|te?xt', content_type) \
-      and type(content) is str:
+    if (
+      isinstance(content, str)
+      and len(content) > 0
+      and content_type
+      and re.search('json|te?xt', content_type)
+    ):
       content = content.encode('utf-8')
     if hasattr(content, "read") and hasattr(content, "seek"):
@@ -477,7 +481,7 @@ class MemoryInterface(StorageInterface):
       encoding = None
     result = self._data.get(path, None)
-    if result:
+    if isinstance(result, (bytes, bytearray, str)):
       result = result[slice(start, end)]
     return (result, encoding, None, None)
@@ -628,7 +632,7 @@ class MemoryInterface(StorageInterface):
     filenames.sort()
     return iter(filenames)
-  def subtree_size(self, prefix:str = "") -> int:
+  def subtree_size(self, prefix:str = "") -> tuple[int,int]:
     layer_path = self.get_path_to_file("")
     remove = layer_path
@@ -636,12 +640,14 @@ class MemoryInterface(StorageInterface):
       remove += '/'
     total_bytes = 0
+    total_files = 0
     for filename, binary in self._data.items():
       f_prefix = f.removeprefix(remove)[:len(prefix)]
       if f_prefix == prefix:
         total_bytes += len(binary)
+        total_files += 1
-    return total_bytes
+    return (total_files, total_bytes)
 class GoogleCloudStorageInterface(StorageInterface):
   exists_batch_size = Batch._MAX_BATCH_SIZE
@@ -866,7 +872,7 @@ class GoogleCloudStorageInterface(StorageInterface):
   @retry
-  def subtree_size(self, prefix:str = "") -> int:
+  def subtree_size(self, prefix:str = "") -> tuple[int,int]:
     layer_path = self.get_path_to_file("")
     path = posixpath.join(layer_path, prefix)
@@ -877,11 +883,13 @@ class GoogleCloudStorageInterface(StorageInterface):
     )
     total_bytes = 0
+    total_files = 0
     for page in blobs.pages:
       for blob in page:
         total_bytes += blob.size
+        total_files += 1
-    return total_bytes
+    return (total_files, total_bytes)
   def release_connection(self):
     global GC_POOL
@@ -939,7 +947,7 @@ class HttpInterface(StorageInterface):
     headers = self.head(file_path)
     return int(headers["Content-Length"])
-  def subtree_size(self, prefix:str = "") -> int:
+  def subtree_size(self, prefix:str = "") -> tuple[int,int]:
     raise NotImplementedError()
   @retry
@@ -1538,7 +1546,7 @@ class S3Interface(StorageInterface):
       for filename in iterate(resp):
         yield filename
-  def subtree_size(self, prefix:str = "") -> int:
+  def subtree_size(self, prefix:str = "") -> tuple[int,int]:
     layer_path = self.get_path_to_file("")
     path = posixpath.join(layer_path, prefix)
@@ -1565,16 +1573,19 @@ class S3Interface(StorageInterface):
         yield item.get('Size', 0)
     total_bytes = 0
+    total_files = 0
     for num_bytes in iterate(resp):
+      total_files += 1
       total_bytes += num_bytes
     while resp['IsTruncated'] and resp['NextContinuationToken']:
       resp = s3lst(path, resp['NextContinuationToken'])
       for num_bytes in iterate(resp):
+        total_files += 1
         total_bytes += num_bytes
-    return total_bytes
+    return (total_files, total_bytes)
   def release_connection(self):
     global S3_POOL

{cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles_cli/cloudfiles_cli.py RENAMED Viewed

@@ -802,9 +802,13 @@ def __rm(cloudpath, progress, paths):
 @click.option('-c', '--grand-total', is_flag=True, default=False, help="Sum a grand total of all inputs.")
 @click.option('-s', '--summarize', is_flag=True, default=False, help="Sum a total for each input argument.")
 @click.option('-h', '--human-readable', is_flag=True, default=False, help='"Human-readable" output. Use unit suffixes: Bytes, KiB, MiB, GiB, TiB, PiB, and EiB.')
-def du(paths, grand_total, summarize, human_readable):
+@click.option('-N', '--count-files', is_flag=True, default=False, help='Also report the number of files.')
+def du(paths, grand_total, summarize, human_readable, count_files):
   """Display disk usage statistics."""
   results = []
+  list_data = False
   for path in paths:
     npath = normalize_path(path)
     if ispathdir(path):
@@ -812,6 +816,7 @@ def du(paths, grand_total, summarize, human_readable):
       if summarize:
         results.append(cf.subtree_size())
       else:
+        list_data = True
         results.append(cf.size(cf.list()))
     else:
       cf = CloudFiles(os.path.dirname(npath))
@@ -841,11 +846,15 @@ def du(paths, grand_total, summarize, human_readable):
       return f"{(val / 2**60):.2f} EiB"
   summary = {}
+  num_files = 0
   for path, res in zip(paths, results):
-    if isinstance(res, int):
-      summary[path] = res
-    else:
+    if list_data:
       summary[path] = sum(res.values())
+      num_files += len(res)
+    else:
+      summary[path] = res["num_bytes"]
+      num_files += res["N"]
     if summarize:
       print(f"{SI(summary[path])}\t{path}")
@@ -855,7 +864,10 @@ def du(paths, grand_total, summarize, human_readable):
         print(f"{SI(size)}\t{pth}")
   if grand_total:
-    print(f"{SI(sum(summary.values()))}\ttotal")
+    print(f"{SI(sum(summary.values()))}\tbytes total")
+  if count_files:
+    print(f"{num_files}\tfiles total")
 @main.command()
 @click.argument('paths', nargs=-1)