cloud-files 4.23.0__tar.gz → 4.24.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {cloud-files-4.23.0 → cloud-files-4.24.1}/ChangeLog +15 -0
  2. {cloud-files-4.23.0 → cloud-files-4.24.1}/PKG-INFO +1 -1
  3. {cloud-files-4.23.0 → cloud-files-4.24.1}/automated_test.py +17 -4
  4. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloud_files.egg-info/PKG-INFO +1 -1
  5. cloud-files-4.24.1/cloud_files.egg-info/pbr.json +1 -0
  6. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloudfiles/cloudfiles.py +56 -14
  7. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloudfiles/interfaces.py +47 -17
  8. cloud-files-4.23.0/cloud_files.egg-info/pbr.json +0 -1
  9. {cloud-files-4.23.0 → cloud-files-4.24.1}/.github/workflows/test-suite.yml +0 -0
  10. {cloud-files-4.23.0 → cloud-files-4.24.1}/AUTHORS +0 -0
  11. {cloud-files-4.23.0 → cloud-files-4.24.1}/LICENSE +0 -0
  12. {cloud-files-4.23.0 → cloud-files-4.24.1}/MANIFEST.in +0 -0
  13. {cloud-files-4.23.0 → cloud-files-4.24.1}/README.md +0 -0
  14. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloud_files.egg-info/SOURCES.txt +0 -0
  15. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloud_files.egg-info/dependency_links.txt +0 -0
  16. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloud_files.egg-info/entry_points.txt +0 -0
  17. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloud_files.egg-info/not-zip-safe +0 -0
  18. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloud_files.egg-info/requires.txt +0 -0
  19. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloud_files.egg-info/top_level.txt +0 -0
  20. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloudfiles/__init__.py +0 -0
  21. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloudfiles/compression.py +0 -0
  22. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloudfiles/connectionpools.py +0 -0
  23. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloudfiles/exceptions.py +0 -0
  24. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloudfiles/gcs.py +0 -0
  25. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloudfiles/lib.py +0 -0
  26. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloudfiles/paths.py +0 -0
  27. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloudfiles/resumable_tools.py +0 -0
  28. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloudfiles/scheduler.py +0 -0
  29. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloudfiles/secrets.py +0 -0
  30. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloudfiles/threaded_queue.py +0 -0
  31. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloudfiles/typing.py +0 -0
  32. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloudfiles_cli/LICENSE +0 -0
  33. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloudfiles_cli/__init__.py +0 -0
  34. {cloud-files-4.23.0 → cloud-files-4.24.1}/cloudfiles_cli/cloudfiles_cli.py +0 -0
  35. {cloud-files-4.23.0 → cloud-files-4.24.1}/requirements.txt +0 -0
  36. {cloud-files-4.23.0 → cloud-files-4.24.1}/setup.cfg +0 -0
  37. {cloud-files-4.23.0 → cloud-files-4.24.1}/setup.py +0 -0
@@ -1,6 +1,21 @@
1
1
  CHANGES
2
2
  =======
3
3
 
4
+ 4.24.1
5
+ ------
6
+
7
+ * fix: close unused connections for HTTP interface
8
+
9
+ 4.24.0
10
+ ------
11
+
12
+ * fix: check for 404 and use proper key variable
13
+ * test: modify dne test
14
+ * docs: update authors, changelog
15
+ * feat: add support for allow\_missing to interfaces
16
+ * test: check to see if allow\_missing works
17
+ * feat: add allow\_missing to transfer\_to/from
18
+
4
19
  4.23.0
5
20
  ------
6
21
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cloud-files
3
- Version: 4.23.0
3
+ Version: 4.24.1
4
4
  Summary: Fast access to cloud storage and local FS.
5
5
  Home-page: https://github.com/seung-lab/cloud-files/
6
6
  Author: William Silversmith
@@ -198,6 +198,7 @@ def test_http_read(secrets):
198
198
  cf = CloudFiles("https://storage.googleapis.com/seunglab-test/test_v0/black/", secrets=secrets)
199
199
 
200
200
  try:
201
+ head = cf.head('info')
201
202
  info = cf.get_json('info')
202
203
  except requests.exceptions.HTTPError:
203
204
  return
@@ -613,10 +614,11 @@ def test_s3_custom_endpoint_path():
613
614
  assert extract.path == 'world'
614
615
  assert extract.host == 'https://s3-hpcrc.rc.princeton.edu'
615
616
 
617
+ @pytest.mark.parametrize('allow_missing', [False, True])
616
618
  @pytest.mark.parametrize('compression', (None, 'gzip', 'br', 'zstd', 'xz', 'bz2'))
617
619
  @pytest.mark.parametrize('src_protocol', ['mem', 'file', 's3'])
618
620
  @pytest.mark.parametrize('dest_protocol', ['mem', 'file', 's3'])
619
- def test_transfer_semantics(s3, compression, src_protocol, dest_protocol):
621
+ def test_transfer_semantics(s3, compression, src_protocol, dest_protocol, allow_missing):
620
622
  from cloudfiles import CloudFiles, exceptions
621
623
 
622
624
  if src_protocol == "file":
@@ -650,18 +652,18 @@ def test_transfer_semantics(s3, compression, src_protocol, dest_protocol):
650
652
  cfm.delete(list(cfm))
651
653
  assert list(cfm) == []
652
654
 
653
- cfm.transfer_from(f'{src_protocol}://' + path)
655
+ cfm.transfer_from(f'{src_protocol}://' + path, allow_missing=allow_missing)
654
656
  assert sorted(list(cfm)) == sorted([ str(i) for i in range(N) ])
655
657
  assert [ f['content'] for f in cfm[:] ] == [ content ] * N
656
658
 
657
659
  cfm.delete(list(cfm))
658
660
 
659
- cff.transfer_to(cfm.cloudpath)
661
+ cff.transfer_to(cfm.cloudpath, allow_missing=allow_missing)
660
662
  assert sorted(list(cfm)) == sorted([ str(i) for i in range(N) ])
661
663
  assert [ f['content'] for f in cfm[:] ] == [ content ] * N
662
664
  cfm.delete(list(cfm))
663
665
 
664
- cff.transfer_to(cfm.cloudpath, reencode='br')
666
+ cff.transfer_to(cfm.cloudpath, reencode='br', allow_missing=allow_missing)
665
667
  assert sorted(list(cfm)) == sorted([ str(i) for i in range(N) ])
666
668
  assert [ f['content'] for f in cfm[:] ] == [ content ] * N
667
669
 
@@ -670,6 +672,17 @@ def test_transfer_semantics(s3, compression, src_protocol, dest_protocol):
670
672
  data = [ os.path.splitext(d)[1] for d in data.keys() ]
671
673
  assert all([ ext == '.br' for ext in data ])
672
674
 
675
+ if not allow_missing:
676
+ try:
677
+ cff.transfer_to(cfm.cloudpath, paths=["dne"], allow_missing=False)
678
+ assert False
679
+ except FileNotFoundError:
680
+ pass
681
+ else:
682
+ cff.transfer_to(cfm.cloudpath, paths=["dne"], allow_missing=True)
683
+ assert not cfm.exists("dne") or cfm.get("dne") == b''
684
+
685
+
673
686
  cfm.delete(list(cfm))
674
687
  cff.delete(list(cff))
675
688
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cloud-files
3
- Version: 4.23.0
3
+ Version: 4.24.1
4
4
  Summary: Fast access to cloud storage and local FS.
5
5
  Home-page: https://github.com/seung-lab/cloud-files/
6
6
  Author: William Silversmith
@@ -0,0 +1 @@
1
+ {"git_version": "560597e", "is_release": true}
@@ -951,6 +951,7 @@ class CloudFiles:
951
951
  block_size:int = 64,
952
952
  reencode:Optional[str] = None,
953
953
  content_type:Optional[str] = None,
954
+ allow_missing:bool = False,
954
955
  ) -> None:
955
956
  """
956
957
  Transfer all files from this CloudFiles storage
@@ -992,7 +993,11 @@ class CloudFiles:
992
993
  green=self.green, num_threads=self.num_threads,
993
994
  )
994
995
 
995
- return cf_dest.transfer_from(self, paths, block_size, reencode, content_type)
996
+ return cf_dest.transfer_from(
997
+ self, paths, block_size,
998
+ reencode, content_type,
999
+ allow_missing,
1000
+ )
996
1001
 
997
1002
  def transfer_from(
998
1003
  self,
@@ -1001,6 +1006,7 @@ class CloudFiles:
1001
1006
  block_size:int = 64,
1002
1007
  reencode:Optional[str] = None,
1003
1008
  content_type:Optional[str] = None,
1009
+ allow_missing:bool = False,
1004
1010
  ) -> None:
1005
1011
  """
1006
1012
  Transfer all files from the source CloudFiles storage
@@ -1053,7 +1059,10 @@ class CloudFiles:
1053
1059
  and self.protocol == "file"
1054
1060
  and reencode is None
1055
1061
  ):
1056
- self.__transfer_file_to_file(cf_src, self, paths, total, pbar, block_size)
1062
+ self.__transfer_file_to_file(
1063
+ cf_src, self, paths, total,
1064
+ pbar, block_size, allow_missing
1065
+ )
1057
1066
  elif (
1058
1067
  cf_src.protocol == "file"
1059
1068
  and self.protocol != "file"
@@ -1061,7 +1070,8 @@ class CloudFiles:
1061
1070
  ):
1062
1071
  self.__transfer_file_to_remote(
1063
1072
  cf_src, self, paths, total,
1064
- pbar, block_size, content_type
1073
+ pbar, block_size, content_type,
1074
+ allow_missing,
1065
1075
  )
1066
1076
  elif (
1067
1077
  (
@@ -1076,19 +1086,22 @@ class CloudFiles:
1076
1086
  ):
1077
1087
  self.__transfer_cloud_internal(
1078
1088
  cf_src, self, paths,
1079
- total, pbar, block_size
1089
+ total, pbar, block_size,
1090
+ allow_missing,
1080
1091
  )
1081
1092
  else:
1082
1093
  self.__transfer_general(
1083
1094
  cf_src, self, paths, total,
1084
1095
  pbar, block_size,
1085
- reencode, content_type
1096
+ reencode, content_type,
1097
+ allow_missing,
1086
1098
  )
1087
1099
 
1088
1100
  def __transfer_general(
1089
1101
  self, cf_src, cf_dest, paths,
1090
1102
  total, pbar, block_size,
1091
- reencode, content_type
1103
+ reencode, content_type,
1104
+ allow_missing
1092
1105
  ):
1093
1106
  """
1094
1107
  Downloads the file into RAM, transforms
@@ -1107,7 +1120,13 @@ class CloudFiles:
1107
1120
  if reencode is not None:
1108
1121
  downloaded = compression.transcode(downloaded, reencode, in_place=True)
1109
1122
  def renameiter():
1123
+ nonlocal allow_missing
1110
1124
  for item in downloaded:
1125
+ if item["content"] is None:
1126
+ if allow_missing:
1127
+ item["content"] = b""
1128
+ else:
1129
+ raise FileNotFoundError(f"{item['path']}")
1111
1130
  if (
1112
1131
  item["tags"] is not None
1113
1132
  and "dest_path" in item["tags"]
@@ -1126,7 +1145,7 @@ class CloudFiles:
1126
1145
 
1127
1146
  def __transfer_file_to_file(
1128
1147
  self, cf_src, cf_dest, paths,
1129
- total, pbar, block_size
1148
+ total, pbar, block_size, allow_missing
1130
1149
  ):
1131
1150
  """
1132
1151
  shutil.copyfile, starting in Python 3.8, uses
@@ -1148,12 +1167,21 @@ class CloudFiles:
1148
1167
  if dest_ext_compress != dest_ext:
1149
1168
  dest += dest_ext_compress
1150
1169
 
1151
- shutil.copyfile(src, dest) # avoids user space
1170
+ try:
1171
+ shutil.copyfile(src, dest) # avoids user space
1172
+ except FileNotFoundError:
1173
+ if allow_missing:
1174
+ with open(dest, "wb") as f:
1175
+ f.write(b'')
1176
+ else:
1177
+ raise
1178
+
1152
1179
  pbar.update(1)
1153
1180
 
1154
1181
  def __transfer_file_to_remote(
1155
1182
  self, cf_src, cf_dest, paths,
1156
- total, pbar, block_size, content_type
1183
+ total, pbar, block_size, content_type,
1184
+ allow_missing
1157
1185
  ):
1158
1186
  """
1159
1187
  Provide file handles instead of slurped binaries
@@ -1174,19 +1202,29 @@ class CloudFiles:
1174
1202
  handle_path, encoding = FileInterface.get_encoded_file_path(
1175
1203
  os.path.join(srcdir, src_path)
1176
1204
  )
1205
+ try:
1206
+ handle = open(handle_path, "rb")
1207
+ except FileNotFoundError:
1208
+ if allow_missing:
1209
+ handle = b''
1210
+ else:
1211
+ raise
1212
+
1177
1213
  to_upload.append({
1178
1214
  "path": dest_path,
1179
- "content": open(handle_path, "rb"),
1215
+ "content": handle,
1180
1216
  "compress": encoding,
1181
1217
  })
1182
1218
  cf_dest.puts(to_upload, raw=True, progress=False, content_type=content_type)
1183
1219
  for item in to_upload:
1184
- item["content"].close()
1220
+ handle = item["content"]
1221
+ if hasattr(handle, "close"):
1222
+ handle.close()
1185
1223
  pbar.update(len(block_paths))
1186
1224
 
1187
1225
  def __transfer_cloud_internal(
1188
1226
  self, cf_src, cf_dest, paths,
1189
- total, pbar, block_size
1227
+ total, pbar, block_size, allow_missing
1190
1228
  ):
1191
1229
  """
1192
1230
  For performing internal transfers in gs or s3.
@@ -1206,8 +1244,12 @@ class CloudFiles:
1206
1244
  dest_key = key
1207
1245
 
1208
1246
  dest_key = posixpath.join(cf_dest._path.path, dest_key)
1209
- conn.copy_file(src_key, cf_dest._path.bucket, dest_key)
1210
- return 1
1247
+ found = conn.copy_file(src_key, cf_dest._path.bucket, dest_key)
1248
+
1249
+ if found == False and not allow_missing:
1250
+ raise FileNotFoundError(src_key)
1251
+
1252
+ return int(found)
1211
1253
 
1212
1254
  results = schedule_jobs(
1213
1255
  fns=( partial(thunk_copy, path) for path in paths ),
@@ -22,7 +22,7 @@ import fasteners
22
22
 
23
23
  from .compression import COMPRESSION_TYPES
24
24
  from .connectionpools import S3ConnectionPool, GCloudBucketPool, MemoryPool, MEMORY_DATA
25
- from .exceptions import MD5IntegrityError
25
+ from .exceptions import MD5IntegrityError, CompressionError
26
26
  from .lib import mkdir, sip, md5, validate_s3_multipart_etag
27
27
  from .secrets import http_credentials, CLOUD_FILES_DIR, CLOUD_FILES_LOCK_DIR
28
28
 
@@ -82,6 +82,21 @@ retry = tenacity.retry(
82
82
  wait=tenacity.wait_random_exponential(0.5, 60.0),
83
83
  )
84
84
 
85
+ def retry_if_not(exception_type):
86
+ if type(exception_type) != list:
87
+ exception_type = [ exception_type ]
88
+
89
+ conditions = tenacity.retry_if_not_exception_type(exception_type[0])
90
+ for et in exception_type[1:]:
91
+ conditions = conditions | tenacity.retry_if_not_exception_type(et)
92
+
93
+ return tenacity.retry(
94
+ retry=conditions,
95
+ reraise=True,
96
+ stop=tenacity.stop_after_attempt(7),
97
+ wait=tenacity.wait_random_exponential(0.5, 60.0),
98
+ )
99
+
85
100
  class StorageInterface(object):
86
101
  exists_batch_size = 1
87
102
  delete_batch_size = 1
@@ -528,7 +543,7 @@ class GoogleCloudStorageInterface(StorageInterface):
528
543
  def get_path_to_file(self, file_path):
529
544
  return posixpath.join(self._path.path, file_path)
530
545
 
531
- @retry
546
+ @retry_if_not(CompressionError)
532
547
  def put_file(self, file_path, content, content_type,
533
548
  compress, cache_control=None, storage_class=None):
534
549
  key = self.get_path_to_file(file_path)
@@ -545,7 +560,7 @@ class GoogleCloudStorageInterface(StorageInterface):
545
560
  elif compress in ("bzip2", "bz2"):
546
561
  blob.content_encoding = "bz2"
547
562
  elif compress:
548
- raise ValueError("Compression type {} not supported.".format(compress))
563
+ raise CompressionError("Compression type {} not supported.".format(compress))
549
564
 
550
565
  if cache_control:
551
566
  blob.cache_control = cache_control
@@ -562,11 +577,17 @@ class GoogleCloudStorageInterface(StorageInterface):
562
577
  with GCS_BUCKET_POOL_LOCK:
563
578
  pool = GC_POOL[GCloudBucketPoolParams(dest_bucket, self._request_payer)]
564
579
  dest_bucket = pool.get_connection(self._secrets, None)
565
- self._bucket.copy_blob(
566
- source_blob, dest_bucket, dest_key
567
- )
568
580
 
569
- @retry
581
+ try:
582
+ self._bucket.copy_blob(
583
+ source_blob, dest_bucket, dest_key
584
+ )
585
+ except google.api_core.exceptions.NotFound:
586
+ return False
587
+
588
+ return True
589
+
590
+ @retry_if_not(google.cloud.exceptions.NotFound)
570
591
  def get_file(self, file_path, start=None, end=None, part_size=None):
571
592
  key = self.get_path_to_file(file_path)
572
593
  blob = self._bucket.blob( key )
@@ -590,7 +611,7 @@ class GoogleCloudStorageInterface(StorageInterface):
590
611
 
591
612
  return (content, blob.content_encoding, hash_value, hash_type)
592
613
 
593
- @retry
614
+ @retry_if_not(google.cloud.exceptions.NotFound)
594
615
  def head(self, file_path):
595
616
  key = self.get_path_to_file(file_path)
596
617
  blob = self._bucket.get_blob(key)
@@ -609,7 +630,7 @@ class GoogleCloudStorageInterface(StorageInterface):
609
630
  "Component-Count": blob.component_count,
610
631
  }
611
632
 
612
- @retry
633
+ @retry_if_not(google.cloud.exceptions.NotFound)
613
634
  def size(self, file_path):
614
635
  key = self.get_path_to_file(file_path)
615
636
  blob = self._bucket.get_blob(key)
@@ -617,7 +638,7 @@ class GoogleCloudStorageInterface(StorageInterface):
617
638
  return blob.size
618
639
  return None
619
640
 
620
- @retry
641
+ @retry_if_not(google.cloud.exceptions.NotFound)
621
642
  def exists(self, file_path):
622
643
  key = self.get_path_to_file(file_path)
623
644
  blob = self._bucket.blob(key)
@@ -724,9 +745,9 @@ class HttpInterface(StorageInterface):
724
745
  @retry
725
746
  def head(self, file_path):
726
747
  key = self.get_path_to_file(file_path)
727
- resp = self.session.head(key)
728
- resp.raise_for_status()
729
- return resp.headers
748
+ with self.session.head(key) as resp:
749
+ resp.raise_for_status()
750
+ return resp.headers
730
751
 
731
752
  @retry
732
753
  def get_file(self, file_path, start=None, end=None, part_size=None):
@@ -741,6 +762,7 @@ class HttpInterface(StorageInterface):
741
762
  resp = self.session.get(key)
742
763
  if resp.status_code in (404, 403):
743
764
  return (None, None, None, None)
765
+ resp.close()
744
766
  resp.raise_for_status()
745
767
 
746
768
  # Don't check MD5 for http because the etag can come in many
@@ -758,9 +780,8 @@ class HttpInterface(StorageInterface):
758
780
  @retry
759
781
  def exists(self, file_path):
760
782
  key = self.get_path_to_file(file_path)
761
- resp = self.session.get(key, stream=True)
762
- resp.close()
763
- return resp.ok
783
+ with self.session.get(key, stream=True) as resp:
784
+ return resp.ok
764
785
 
765
786
  def files_exist(self, file_paths):
766
787
  return {path: self.exists(path) for path in file_paths}
@@ -783,6 +804,7 @@ class HttpInterface(StorageInterface):
783
804
  params={ "prefix": prefix, "pageToken": token },
784
805
  )
785
806
  results.raise_for_status()
807
+ results.close()
786
808
  return results.json()
787
809
 
788
810
  token = None
@@ -907,7 +929,15 @@ class S3Interface(StorageInterface):
907
929
  'Bucket': self._path.bucket,
908
930
  'Key': key,
909
931
  }
910
- dest_bucket.copy(CopySource=copy_source, Bucket=dest_bucket_name, Key=dest_key)
932
+ try:
933
+ dest_bucket.copy(CopySource=copy_source, Bucket=dest_bucket_name, Key=dest_key)
934
+ except botocore.exceptions.ClientError as err:
935
+ if err.response['Error']['Code'] in ('NoSuchKey', '404'):
936
+ return False
937
+ else:
938
+ raise
939
+
940
+ return True
911
941
 
912
942
  @retry
913
943
  def get_file(self, file_path, start=None, end=None, part_size=None):
@@ -1 +0,0 @@
1
- {"git_version": "a123f94", "is_release": true}
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes