cloud-files 5.5.0__py3-none-any.whl → 5.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cloudfiles/interfaces.py CHANGED
@@ -4,7 +4,7 @@ from collections import defaultdict, namedtuple
4
4
  from datetime import datetime
5
5
  from io import BytesIO
6
6
  import json
7
- import os.path
7
+ import os
8
8
  import posixpath
9
9
  import re
10
10
 
@@ -467,7 +467,7 @@ class MemoryInterface(StorageInterface):
467
467
  result = result[slice(start, end)]
468
468
  return (result, encoding, None, None)
469
469
 
470
- def save_file(self, src, dest, resumable):
470
+ def save_file(self, src, dest, resumable) -> tuple[bool,int]:
471
471
  key = self.get_path_to_file(src)
472
472
  with EXT_TEST_SEQUENCE_LOCK:
473
473
  exts = list(EXT_TEST_SEQUENCE)
@@ -489,9 +489,9 @@ class MemoryInterface(StorageInterface):
489
489
  with open(dest + true_ext, "wb") as f:
490
490
  f.write(self._data[path])
491
491
  except KeyError:
492
- return False
492
+ return (False, 0)
493
493
 
494
- return True
494
+ return (True, len(self._data[path]))
495
495
 
496
496
  def head(self, file_path):
497
497
  path = self.get_path_to_file(file_path)
@@ -541,13 +541,13 @@ class MemoryInterface(StorageInterface):
541
541
 
542
542
  return None
543
543
 
544
- def copy_file(self, src_path, dest_bucket, dest_key):
544
+ def copy_file(self, src_path, dest_bucket, dest_key) -> tuple[bool,int]:
545
545
  key = self.get_path_to_file(src_path)
546
546
  with MEM_BUCKET_POOL_LOCK:
547
547
  pool = MEM_POOL[MemoryPoolParams(dest_bucket)]
548
548
  dest_bucket = pool.get_connection(None, None)
549
549
  dest_bucket[dest_key] = self._data[key]
550
- return True
550
+ return (True, len(self._data[key]))
551
551
 
552
552
  def exists(self, file_path):
553
553
  path = self.get_path_to_file(file_path)
@@ -662,7 +662,7 @@ class GoogleCloudStorageInterface(StorageInterface):
662
662
  blob.upload_from_string(content, content_type)
663
663
 
664
664
  @retry
665
- def copy_file(self, src_path, dest_bucket, dest_key):
665
+ def copy_file(self, src_path, dest_bucket, dest_key) -> tuple[bool,int]:
666
666
  key = self.get_path_to_file(src_path)
667
667
  source_blob = self._bucket.blob( key )
668
668
  with GCS_BUCKET_POOL_LOCK:
@@ -670,13 +670,13 @@ class GoogleCloudStorageInterface(StorageInterface):
670
670
  dest_bucket = pool.get_connection(self._secrets, None)
671
671
 
672
672
  try:
673
- self._bucket.copy_blob(
673
+ blob = self._bucket.copy_blob(
674
674
  source_blob, dest_bucket, dest_key
675
675
  )
676
676
  except google.api_core.exceptions.NotFound:
677
- return False
677
+ return (False, 0)
678
678
 
679
- return True
679
+ return (True, blob.size)
680
680
 
681
681
  @retry_if_not(google.cloud.exceptions.NotFound)
682
682
  def get_file(self, file_path, start=None, end=None, part_size=None):
@@ -703,7 +703,7 @@ class GoogleCloudStorageInterface(StorageInterface):
703
703
  return (content, blob.content_encoding, hash_value, hash_type)
704
704
 
705
705
  @retry
706
- def save_file(self, src, dest, resumable):
706
+ def save_file(self, src, dest, resumable) -> tuple[bool, int]:
707
707
  key = self.get_path_to_file(src)
708
708
  blob = self._bucket.blob(key)
709
709
  try:
@@ -714,13 +714,15 @@ class GoogleCloudStorageInterface(StorageInterface):
714
714
  checksum=None
715
715
  )
716
716
  except google.cloud.exceptions.NotFound:
717
- return False
717
+ return (False, 0)
718
+
719
+ num_bytes = os.path.getsize(dest)
718
720
 
719
721
  ext = FileInterface.get_extension(blob.content_encoding)
720
722
  if not dest.endswith(ext):
721
723
  os.rename(dest, dest + ext)
722
724
 
723
- return True
725
+ return (True, num_bytes)
724
726
 
725
727
  @retry_if_not(google.cloud.exceptions.NotFound)
726
728
  def head(self, file_path):
@@ -927,7 +929,7 @@ class HttpInterface(StorageInterface):
927
929
  return (resp.content, content_encoding, None, None)
928
930
 
929
931
  @retry
930
- def save_file(self, src, dest, resumable):
932
+ def save_file(self, src, dest, resumable) -> tuple[bool, int]:
931
933
  key = self.get_path_to_file(src)
932
934
 
933
935
  headers = self.head(src)
@@ -948,20 +950,23 @@ class HttpInterface(StorageInterface):
948
950
  if resumable and os.path.exists(partname):
949
951
  downloaded_size = os.path.getsize(partname)
950
952
 
953
+ streamed_bytes = 0
954
+
951
955
  range_headers = { "Range": f"bytes={downloaded_size}-" }
952
956
  with self.session.get(key, headers=range_headers, stream=True) as resp:
953
957
  if resp.status_code not in [200, 206]:
954
958
  resp.raise_for_status()
955
- return False
959
+ return (False, 0)
956
960
 
957
961
  with open(partname, 'ab') as f:
958
962
  for chunk in resp.iter_content(chunk_size=int(10e6)):
959
963
  f.write(chunk)
964
+ streamed_bytes += len(chunk)
960
965
 
961
966
  if resumable:
962
967
  os.rename(partname, fulldest)
963
968
 
964
- return True
969
+ return (True, streamed_bytes)
965
970
 
966
971
  @retry
967
972
  def exists(self, file_path):
@@ -1162,10 +1167,17 @@ class S3Interface(StorageInterface):
1162
1167
  if storage_class:
1163
1168
  attrs['StorageClass'] = storage_class
1164
1169
 
1165
- multipart = hasattr(content, "read") and hasattr(content, "seek")
1170
+ multipart = False
1171
+ is_file_handle = hasattr(content, "read") and hasattr(content, "seek")
1172
+
1173
+ if is_file_handle:
1174
+ content_length = os.fstat(content.fileno()).st_size
1175
+ else:
1176
+ content_length = len(content)
1166
1177
 
1167
- if not multipart and len(content) > int(self.composite_upload_threshold):
1168
- content = BytesIO(content)
1178
+ if not multipart and content_length > int(self.composite_upload_threshold):
1179
+ if not is_file_handle:
1180
+ content = BytesIO(content)
1169
1181
  multipart = True
1170
1182
 
1171
1183
  # gevent monkey patching has a bad interaction with s3's use
@@ -1175,19 +1187,18 @@ class S3Interface(StorageInterface):
1175
1187
  multipart = False
1176
1188
  content = content.read()
1177
1189
 
1190
+ # WMS 2025-07-05:
1191
+ # Currently, boto3 does not properly support streaming smaller files.
1192
+ # It uses an S3 API that requires a checksum up-front, but streaming
1193
+ # checksums can only be provided at the end.
1194
+ # https://github.com/boto/boto3/issues/3738
1195
+ # https://github.com/boto/boto3/issues/4392
1196
+ # https://docs.aws.amazon.com/sdkref/latest/guide/feature-dataintegrity.html
1197
+ if not multipart and is_file_handle and content_length < int(self.composite_upload_threshold):
1198
+ content = content.read()
1199
+
1178
1200
  if multipart:
1179
1201
  self._conn.upload_fileobj(content, self._path.bucket, key, ExtraArgs=attrs)
1180
- # upload_fileobj will add 'aws-chunked' to the ContentEncoding,
1181
- # which after it finishes uploading is useless and messes up our
1182
- # software. Therefore, edit the metadata and replace it (but this incurs
1183
- # 2x class-A...)
1184
- self._conn.copy_object(
1185
- Bucket=self._path.bucket,
1186
- Key=key,
1187
- CopySource={'Bucket': self._path.bucket, 'Key': key},
1188
- MetadataDirective="REPLACE",
1189
- **attrs
1190
- )
1191
1202
  else:
1192
1203
  if isinstance(content, str):
1193
1204
  content = content.encode('utf8')
@@ -1199,7 +1210,7 @@ class S3Interface(StorageInterface):
1199
1210
  self._conn.put_object(**attrs)
1200
1211
 
1201
1212
  @retry
1202
- def copy_file(self, src_path, dest_bucket_name, dest_key):
1213
+ def copy_file(self, src_path, dest_bucket_name, dest_key) -> tuple[bool,int]:
1203
1214
  key = self.get_path_to_file(src_path)
1204
1215
  s3client = self._get_bucket(dest_bucket_name)
1205
1216
  copy_source = {
@@ -1207,7 +1218,7 @@ class S3Interface(StorageInterface):
1207
1218
  'Key': key,
1208
1219
  }
1209
1220
  try:
1210
- s3client.copy_object(
1221
+ response = s3client.copy_object(
1211
1222
  CopySource=copy_source,
1212
1223
  Bucket=dest_bucket_name,
1213
1224
  Key=dest_key,
@@ -1215,11 +1226,16 @@ class S3Interface(StorageInterface):
1215
1226
  )
1216
1227
  except botocore.exceptions.ClientError as err:
1217
1228
  if err.response['Error']['Code'] in ('NoSuchKey', '404'):
1218
- return False
1229
+ return (False, 0)
1219
1230
  else:
1220
1231
  raise
1221
1232
 
1222
- return True
1233
+ try:
1234
+ num_bytes = int(response["ResponseMetadata"]["HTTPHeaders"]["content-length"])
1235
+ except KeyError:
1236
+ num_bytes = 0
1237
+
1238
+ return (True, num_bytes)
1223
1239
 
1224
1240
  @retry
1225
1241
  def get_file(self, file_path, start=None, end=None, part_size=None):
@@ -1280,14 +1296,14 @@ class S3Interface(StorageInterface):
1280
1296
  raise
1281
1297
 
1282
1298
  @retry
1283
- def save_file(self, src, dest, resumable):
1299
+ def save_file(self, src, dest, resumable) -> tuple[bool,int]:
1284
1300
  key = self.get_path_to_file(src)
1285
1301
  kwargs = self._additional_attrs.copy()
1286
1302
 
1287
1303
  resp = self.head(src)
1288
1304
 
1289
1305
  if resp is None:
1290
- return False
1306
+ return (False, 0)
1291
1307
 
1292
1308
  mkdir(os.path.dirname(dest))
1293
1309
 
@@ -1310,11 +1326,12 @@ class S3Interface(StorageInterface):
1310
1326
  )
1311
1327
  except botocore.exceptions.ClientError as err:
1312
1328
  if err.response['Error']['Code'] in ('NoSuchKey', '404'):
1313
- return False
1329
+ return (False, 0)
1314
1330
  else:
1315
1331
  raise
1316
1332
 
1317
- return True
1333
+ num_bytes = os.path.getsize(dest)
1334
+ return (True, num_bytes)
1318
1335
 
1319
1336
  @retry
1320
1337
  def head(self, file_path):