cloud-files 5.9.0__tar.gz → 6.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {cloud_files-5.9.0 → cloud_files-6.1.0}/ChangeLog +11 -0
  2. {cloud_files-5.9.0 → cloud_files-6.1.0}/PKG-INFO +1 -1
  3. {cloud_files-5.9.0 → cloud_files-6.1.0}/automated_test.py +18 -0
  4. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloud_files.egg-info/PKG-INFO +1 -1
  5. cloud_files-6.1.0/cloud_files.egg-info/pbr.json +1 -0
  6. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles/cloudfiles.py +17 -7
  7. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles/interfaces.py +28 -17
  8. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles_cli/cloudfiles_cli.py +17 -5
  9. cloud_files-5.9.0/cloud_files.egg-info/pbr.json +0 -1
  10. {cloud_files-5.9.0 → cloud_files-6.1.0}/.github/workflows/test-suite.yml +0 -0
  11. {cloud_files-5.9.0 → cloud_files-6.1.0}/AUTHORS +0 -0
  12. {cloud_files-5.9.0 → cloud_files-6.1.0}/LICENSE +0 -0
  13. {cloud_files-5.9.0 → cloud_files-6.1.0}/MANIFEST.in +0 -0
  14. {cloud_files-5.9.0 → cloud_files-6.1.0}/README.md +0 -0
  15. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloud_files.egg-info/SOURCES.txt +0 -0
  16. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloud_files.egg-info/dependency_links.txt +0 -0
  17. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloud_files.egg-info/entry_points.txt +0 -0
  18. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloud_files.egg-info/not-zip-safe +0 -0
  19. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloud_files.egg-info/requires.txt +0 -0
  20. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloud_files.egg-info/top_level.txt +0 -0
  21. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles/__init__.py +0 -0
  22. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles/compression.py +0 -0
  23. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles/connectionpools.py +0 -0
  24. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles/exceptions.py +0 -0
  25. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles/gcs.py +0 -0
  26. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles/lib.py +0 -0
  27. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles/monitoring.py +0 -0
  28. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles/paths.py +0 -0
  29. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles/resumable_tools.py +0 -0
  30. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles/scheduler.py +0 -0
  31. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles/secrets.py +0 -0
  32. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles/test.py +0 -0
  33. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles/threaded_queue.py +0 -0
  34. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles/typing.py +0 -0
  35. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles_cli/LICENSE +0 -0
  36. {cloud_files-5.9.0 → cloud_files-6.1.0}/cloudfiles_cli/__init__.py +0 -0
  37. {cloud_files-5.9.0 → cloud_files-6.1.0}/requirements.txt +0 -0
  38. {cloud_files-5.9.0 → cloud_files-6.1.0}/setup.cfg +0 -0
  39. {cloud_files-5.9.0 → cloud_files-6.1.0}/setup.py +0 -0
@@ -1,6 +1,17 @@
1
1
  CHANGES
2
2
  =======
3
3
 
4
+ 6.1.0
5
+ -----
6
+
7
+ * test: check that raw objects work for mem
8
+ * feat: allow for storing python objects in mem to avoid serialization cost
9
+
10
+ 6.0.0
11
+ -----
12
+
13
+ * feat: add file counts to du as -N flag
14
+
4
15
  5.9.0
5
16
  -----
6
17
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cloud-files
3
- Version: 5.9.0
3
+ Version: 6.1.0
4
4
  Summary: Fast access to cloud storage and local FS.
5
5
  Home-page: https://github.com/seung-lab/cloud-files/
6
6
  Author: William Silversmith
@@ -128,6 +128,24 @@ def test_read_write(s3, protocol, num_threads, green):
128
128
  if protocol == 'file':
129
129
  rmtree(url)
130
130
 
131
+ def test_read_write_py_objects_mem():
132
+ from cloudfiles import CloudFiles, CloudFile, exceptions
133
+ url = compute_url("mem", "rw")
134
+
135
+ cf = CloudFiles(url)
136
+
137
+ content = set([1,2,3])
138
+ cf.put('my_set', content, compress=None, raw=True, cache_control='no-cache')
139
+ cf['my_set2'] = content
140
+
141
+ f = CloudFile(cf.join(url, "my_set"))
142
+ assert cf.get('my_set') == content
143
+ assert cf['my_set2'] == content
144
+ assert f.get() == content
145
+ assert cf.get('nonexistentfile') is None
146
+
147
+ cf.delete(["my_set", "my_set2"])
148
+
131
149
  @pytest.mark.parametrize("protocol", ('mem', 'file', 's3'))#'gs'))
132
150
  def test_get_json_order(s3, protocol):
133
151
  from cloudfiles import CloudFiles
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cloud-files
3
- Version: 5.9.0
3
+ Version: 6.1.0
4
4
  Summary: Fast access to cloud storage and local FS.
5
5
  Home-page: https://github.com/seung-lab/cloud-files/
6
6
  Author: William Silversmith
@@ -0,0 +1 @@
1
+ {"git_version": "411220d", "is_release": true}
@@ -759,10 +759,14 @@ class CloudFiles:
759
759
 
760
760
  def put(
761
761
  self,
762
- path:str, content:Union[BinaryIO,bytes],
763
- content_type:str = None, compress:CompressType = None,
764
- compression_level:Optional[int] = None, cache_control:Optional[str] = None,
765
- raw:bool = False, storage_class:Optional[str] = None
762
+ path:str,
763
+ content:Union[BinaryIO,bytes],
764
+ content_type:str = None,
765
+ compress:CompressType = None,
766
+ compression_level:Optional[int] = None,
767
+ cache_control:Optional[str] = None,
768
+ raw:bool = False,
769
+ storage_class:Optional[str] = None
766
770
  ) -> int:
767
771
  """
768
772
  Write a single file.
@@ -1008,10 +1012,11 @@ class CloudFiles:
1008
1012
  return results
1009
1013
  return first(results.values())
1010
1014
 
1011
- def subtree_size(self, prefix:GetPathType = "") -> int:
1015
+ def subtree_size(self, prefix:GetPathType = "") -> dict[str,int]:
1012
1016
  """High performance size calculation for directory trees."""
1013
1017
  prefix, return_multiple = toiter(prefix, is_iter=True)
1014
1018
  total_bytes = 0
1019
+ total_files = 0
1015
1020
 
1016
1021
  total = totalfn(prefix, None)
1017
1022
 
@@ -1019,11 +1024,13 @@ class CloudFiles:
1019
1024
 
1020
1025
  def size_thunk(prefix):
1021
1026
  nonlocal total_bytes
1027
+ nonlocal total_files
1022
1028
  nonlocal lock
1023
1029
 
1024
1030
  with self._get_connection() as conn:
1025
- subtree_bytes = conn.subtree_size(prefix)
1031
+ subtree_files, subtree_bytes = conn.subtree_size(prefix)
1026
1032
  with lock:
1033
+ total_files += subtree_files
1027
1034
  total_bytes += subtree_bytes
1028
1035
 
1029
1036
  schedule_jobs(
@@ -1034,7 +1041,10 @@ class CloudFiles:
1034
1041
  total=total,
1035
1042
  )
1036
1043
 
1037
- return total_bytes
1044
+ return {
1045
+ "N": total_files,
1046
+ "num_bytes": total_bytes,
1047
+ }
1038
1048
 
1039
1049
  @parallelize(desc="Delete")
1040
1050
  def delete(
@@ -304,18 +304,21 @@ class FileInterface(StorageInterface):
304
304
 
305
305
  return self.io_with_lock(do_size, path, exclusive=False)
306
306
 
307
- def subtree_size(self, prefix:str = "") -> int:
307
+ def subtree_size(self, prefix:str = "") -> tuple[int,int]:
308
308
  total_bytes = 0
309
+ total_files = 0
309
310
 
310
311
  subdir = self.get_path_to_file("")
311
312
  if prefix:
312
313
  subdir = os.path.join(subdir, os.path.dirname(prefix))
313
314
 
314
315
  for root, dirs, files in os.walk(subdir):
315
- files = ( os.path.join(root, f) for f in files )
316
- total_bytes += sum(( os.path.getsize(f) for f in files ))
316
+ for f in files:
317
+ path = os.path.join(root, f)
318
+ total_files += 1
319
+ total_bytes += os.path.getsize(path)
317
320
 
318
- return total_bytes
321
+ return (total_files, total_bytes)
319
322
 
320
323
  def exists(self, file_path):
321
324
  path = self.get_path_to_file(file_path)
@@ -443,11 +446,12 @@ class MemoryInterface(StorageInterface):
443
446
  elif compress:
444
447
  raise ValueError("Compression type {} not supported.".format(compress))
445
448
 
446
- if content \
447
- and content_type \
448
- and re.search('json|te?xt', content_type) \
449
- and type(content) is str:
450
-
449
+ if (
450
+ isinstance(content, str)
451
+ and len(content) > 0
452
+ and content_type
453
+ and re.search('json|te?xt', content_type)
454
+ ):
451
455
  content = content.encode('utf-8')
452
456
 
453
457
  if hasattr(content, "read") and hasattr(content, "seek"):
@@ -477,7 +481,7 @@ class MemoryInterface(StorageInterface):
477
481
  encoding = None
478
482
 
479
483
  result = self._data.get(path, None)
480
- if result:
484
+ if isinstance(result, (bytes, bytearray, str)):
481
485
  result = result[slice(start, end)]
482
486
  return (result, encoding, None, None)
483
487
 
@@ -628,7 +632,7 @@ class MemoryInterface(StorageInterface):
628
632
  filenames.sort()
629
633
  return iter(filenames)
630
634
 
631
- def subtree_size(self, prefix:str = "") -> int:
635
+ def subtree_size(self, prefix:str = "") -> tuple[int,int]:
632
636
  layer_path = self.get_path_to_file("")
633
637
 
634
638
  remove = layer_path
@@ -636,12 +640,14 @@ class MemoryInterface(StorageInterface):
636
640
  remove += '/'
637
641
 
638
642
  total_bytes = 0
643
+ total_files = 0
639
644
  for filename, binary in self._data.items():
640
645
  f_prefix = f.removeprefix(remove)[:len(prefix)]
641
646
  if f_prefix == prefix:
642
647
  total_bytes += len(binary)
648
+ total_files += 1
643
649
 
644
- return total_bytes
650
+ return (total_files, total_bytes)
645
651
 
646
652
  class GoogleCloudStorageInterface(StorageInterface):
647
653
  exists_batch_size = Batch._MAX_BATCH_SIZE
@@ -866,7 +872,7 @@ class GoogleCloudStorageInterface(StorageInterface):
866
872
 
867
873
 
868
874
  @retry
869
- def subtree_size(self, prefix:str = "") -> int:
875
+ def subtree_size(self, prefix:str = "") -> tuple[int,int]:
870
876
  layer_path = self.get_path_to_file("")
871
877
  path = posixpath.join(layer_path, prefix)
872
878
 
@@ -877,11 +883,13 @@ class GoogleCloudStorageInterface(StorageInterface):
877
883
  )
878
884
 
879
885
  total_bytes = 0
886
+ total_files = 0
880
887
  for page in blobs.pages:
881
888
  for blob in page:
882
889
  total_bytes += blob.size
890
+ total_files += 1
883
891
 
884
- return total_bytes
892
+ return (total_files, total_bytes)
885
893
 
886
894
  def release_connection(self):
887
895
  global GC_POOL
@@ -939,7 +947,7 @@ class HttpInterface(StorageInterface):
939
947
  headers = self.head(file_path)
940
948
  return int(headers["Content-Length"])
941
949
 
942
- def subtree_size(self, prefix:str = "") -> int:
950
+ def subtree_size(self, prefix:str = "") -> tuple[int,int]:
943
951
  raise NotImplementedError()
944
952
 
945
953
  @retry
@@ -1538,7 +1546,7 @@ class S3Interface(StorageInterface):
1538
1546
  for filename in iterate(resp):
1539
1547
  yield filename
1540
1548
 
1541
- def subtree_size(self, prefix:str = "") -> int:
1549
+ def subtree_size(self, prefix:str = "") -> tuple[int,int]:
1542
1550
  layer_path = self.get_path_to_file("")
1543
1551
  path = posixpath.join(layer_path, prefix)
1544
1552
 
@@ -1565,16 +1573,19 @@ class S3Interface(StorageInterface):
1565
1573
  yield item.get('Size', 0)
1566
1574
 
1567
1575
  total_bytes = 0
1576
+ total_files = 0
1568
1577
  for num_bytes in iterate(resp):
1578
+ total_files += 1
1569
1579
  total_bytes += num_bytes
1570
1580
 
1571
1581
  while resp['IsTruncated'] and resp['NextContinuationToken']:
1572
1582
  resp = s3lst(path, resp['NextContinuationToken'])
1573
1583
 
1574
1584
  for num_bytes in iterate(resp):
1585
+ total_files += 1
1575
1586
  total_bytes += num_bytes
1576
1587
 
1577
- return total_bytes
1588
+ return (total_files, total_bytes)
1578
1589
 
1579
1590
  def release_connection(self):
1580
1591
  global S3_POOL
@@ -802,9 +802,13 @@ def __rm(cloudpath, progress, paths):
802
802
  @click.option('-c', '--grand-total', is_flag=True, default=False, help="Sum a grand total of all inputs.")
803
803
  @click.option('-s', '--summarize', is_flag=True, default=False, help="Sum a total for each input argument.")
804
804
  @click.option('-h', '--human-readable', is_flag=True, default=False, help='"Human-readable" output. Use unit suffixes: Bytes, KiB, MiB, GiB, TiB, PiB, and EiB.')
805
- def du(paths, grand_total, summarize, human_readable):
805
+ @click.option('-N', '--count-files', is_flag=True, default=False, help='Also report the number of files.')
806
+ def du(paths, grand_total, summarize, human_readable, count_files):
806
807
  """Display disk usage statistics."""
807
808
  results = []
809
+
810
+ list_data = False
811
+
808
812
  for path in paths:
809
813
  npath = normalize_path(path)
810
814
  if ispathdir(path):
@@ -812,6 +816,7 @@ def du(paths, grand_total, summarize, human_readable):
812
816
  if summarize:
813
817
  results.append(cf.subtree_size())
814
818
  else:
819
+ list_data = True
815
820
  results.append(cf.size(cf.list()))
816
821
  else:
817
822
  cf = CloudFiles(os.path.dirname(npath))
@@ -841,11 +846,15 @@ def du(paths, grand_total, summarize, human_readable):
841
846
  return f"{(val / 2**60):.2f} EiB"
842
847
 
843
848
  summary = {}
849
+ num_files = 0
844
850
  for path, res in zip(paths, results):
845
- if isinstance(res, int):
846
- summary[path] = res
847
- else:
851
+ if list_data:
848
852
  summary[path] = sum(res.values())
853
+ num_files += len(res)
854
+ else:
855
+ summary[path] = res["num_bytes"]
856
+ num_files += res["N"]
857
+
849
858
  if summarize:
850
859
  print(f"{SI(summary[path])}\t{path}")
851
860
 
@@ -855,7 +864,10 @@ def du(paths, grand_total, summarize, human_readable):
855
864
  print(f"{SI(size)}\t{pth}")
856
865
 
857
866
  if grand_total:
858
- print(f"{SI(sum(summary.values()))}\ttotal")
867
+ print(f"{SI(sum(summary.values()))}\tbytes total")
868
+
869
+ if count_files:
870
+ print(f"{num_files}\tfiles total")
859
871
 
860
872
  @main.command()
861
873
  @click.argument('paths', nargs=-1)
@@ -1 +0,0 @@
1
- {"git_version": "623052c", "is_release": true}
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes