cloud-files 4.30.1__tar.gz → 5.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {cloud-files-4.30.1 → cloud_files-5.0.1}/.github/workflows/test-suite.yml +1 -1
  2. {cloud-files-4.30.1 → cloud_files-5.0.1}/ChangeLog +33 -0
  3. {cloud-files-4.30.1 → cloud_files-5.0.1}/PKG-INFO +4 -4
  4. {cloud-files-4.30.1 → cloud_files-5.0.1}/automated_test.py +5 -3
  5. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloud_files.egg-info/PKG-INFO +4 -4
  6. cloud_files-5.0.1/cloud_files.egg-info/pbr.json +1 -0
  7. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles/cloudfiles.py +60 -4
  8. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles/exceptions.py +4 -0
  9. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles/interfaces.py +282 -35
  10. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles/lib.py +5 -0
  11. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles_cli/cloudfiles_cli.py +37 -15
  12. {cloud-files-4.30.1 → cloud_files-5.0.1}/setup.cfg +2 -2
  13. {cloud-files-4.30.1 → cloud_files-5.0.1}/setup.py +1 -1
  14. cloud-files-4.30.1/cloud_files.egg-info/pbr.json +0 -1
  15. {cloud-files-4.30.1 → cloud_files-5.0.1}/AUTHORS +0 -0
  16. {cloud-files-4.30.1 → cloud_files-5.0.1}/LICENSE +0 -0
  17. {cloud-files-4.30.1 → cloud_files-5.0.1}/MANIFEST.in +0 -0
  18. {cloud-files-4.30.1 → cloud_files-5.0.1}/README.md +0 -0
  19. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloud_files.egg-info/SOURCES.txt +0 -0
  20. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloud_files.egg-info/dependency_links.txt +0 -0
  21. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloud_files.egg-info/entry_points.txt +0 -0
  22. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloud_files.egg-info/not-zip-safe +0 -0
  23. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloud_files.egg-info/requires.txt +0 -0
  24. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloud_files.egg-info/top_level.txt +0 -0
  25. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles/__init__.py +0 -0
  26. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles/compression.py +0 -0
  27. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles/connectionpools.py +0 -0
  28. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles/gcs.py +0 -0
  29. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles/paths.py +0 -0
  30. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles/resumable_tools.py +0 -0
  31. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles/scheduler.py +0 -0
  32. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles/secrets.py +0 -0
  33. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles/threaded_queue.py +0 -0
  34. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles/typing.py +0 -0
  35. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles_cli/LICENSE +0 -0
  36. {cloud-files-4.30.1 → cloud_files-5.0.1}/cloudfiles_cli/__init__.py +0 -0
  37. {cloud-files-4.30.1 → cloud_files-5.0.1}/requirements.txt +0 -0
@@ -15,7 +15,7 @@ jobs:
15
15
  runs-on: ubuntu-latest
16
16
  strategy:
17
17
  matrix:
18
- python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
18
+ python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
19
19
 
20
20
  steps:
21
21
  - uses: actions/checkout@v2
@@ -1,10 +1,43 @@
1
1
  CHANGES
2
2
  =======
3
3
 
4
+ 5.0.1
5
+ -----
6
+
7
+ * fix: prevent aws-chunked from populating in Content-Encoding (#109)
8
+ * fix: add head implementation for MemoryInterface
9
+ * fix+test: check that content encoding is transferred correctly
10
+
11
+ 5.0.0
12
+ -----
13
+
14
+ * feat: efficient saving to disk (#108)
15
+ * install: set minimum version to py39
16
+ * ci: drop py38, add py313
17
+ * fix: strip 'aws-chunked' from s3 encodings
18
+ * fix: add no\_sign\_request for s3 listing
19
+ * fix: prefix logic for no-auth gcs
20
+ * fix: list files google http
21
+ * feat(cli): add no-auth flag to ls
22
+ * fix: abort auth error in list files (http, google)
23
+ * fix: make s3 listing consistent with file and mem
24
+ * fix(list): memory and files interface list flat more consistently
25
+ * test: make flat more consistent in list\_files
26
+ * fix: replaceprefix -> removeprefix
27
+ * fix: aws-chunked does not affect byte encoding
28
+ * fix: harmonizing definition of flat across interfaces
29
+ * feat: adding (broken) support for listing common prefixes
30
+ * refactor: use same pattern for removeprefix
31
+ * fix: make "flat" listing work for s3
32
+
4
33
  4.30.1
5
34
  ------
6
35
 
7
36
  * fix(gcs): don't double compress when uploading to gcs w/ composite
37
+
38
+ 4.30.0
39
+ ------
40
+
8
41
  * redesign: normalize cloudpaths so file:// isn't required
9
42
 
10
43
  4.29.0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cloud-files
3
- Version: 4.30.1
3
+ Version: 5.0.1
4
4
  Summary: Fast access to cloud storage and local FS.
5
5
  Home-page: https://github.com/seung-lab/cloud-files/
6
6
  Author: William Silversmith
@@ -10,13 +10,13 @@ Classifier: Intended Audience :: Developers
10
10
  Classifier: Development Status :: 4 - Beta
11
11
  Classifier: License :: OSI Approved :: BSD License
12
12
  Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3.7
14
- Classifier: Programming Language :: Python :: 3.8
15
13
  Classifier: Programming Language :: Python :: 3.9
16
14
  Classifier: Programming Language :: Python :: 3.10
17
15
  Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
18
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
- Requires-Python: >=3.7,<4.0
19
+ Requires-Python: >=3.9,<4.0
20
20
  Description-Content-Type: text/markdown
21
21
  License-File: LICENSE
22
22
  License-File: AUTHORS
@@ -376,12 +376,12 @@ def test_list(s3, protocol):
376
376
  assert set(cf.list(prefix='nofolder/')) == set([])
377
377
 
378
378
  # Tests (1)
379
- assert set(cf.list(prefix='', flat=True)) == set(['info1','info2','info5','info.txt'])
379
+ assert set(cf.list(prefix='', flat=True)) == set(['info1','info2','info5','info.txt', 'build/', 'level1/'])
380
380
  assert set(cf.list(prefix='inf', flat=True)) == set(['info1','info2','info5','info.txt'])
381
381
  # Tests (2)
382
- assert set(cf.list(prefix='build', flat=True)) == set([])
382
+ assert set(cf.list(prefix='build', flat=True)) == set(['build/info3'])
383
383
  # Tests (3)
384
- assert set(cf.list(prefix='level1/', flat=True)) == set([])
384
+ assert set(cf.list(prefix='level1/', flat=True)) == set(['level1/level2/'])
385
385
  assert set(cf.list(prefix='build/', flat=True)) == set(['build/info3'])
386
386
  # Tests (4)
387
387
  assert set(cf.list(prefix='build/inf', flat=True)) == set(['build/info3'])
@@ -670,11 +670,13 @@ def test_transfer_semantics(s3, compression, src_protocol, dest_protocol, allow_
670
670
  cff.transfer_to(cfm.cloudpath, allow_missing=allow_missing)
671
671
  assert sorted(list(cfm)) == sorted([ str(i) for i in range(N) ])
672
672
  assert [ f['content'] for f in cfm[:] ] == [ content ] * N
673
+ assert cfm.head("1")["Content-Encoding"] == cff.head("1")["Content-Encoding"]
673
674
  cfm.delete(list(cfm))
674
675
 
675
676
  cff.transfer_to(cfm.cloudpath, reencode='br', allow_missing=allow_missing)
676
677
  assert sorted(list(cfm)) == sorted([ str(i) for i in range(N) ])
677
678
  assert [ f['content'] for f in cfm[:] ] == [ content ] * N
679
+ assert 'br' in cfm.head("1")["Content-Encoding"]
678
680
 
679
681
  if dest_protocol == "mem":
680
682
  data = cfm._get_connection()._data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cloud-files
3
- Version: 4.30.1
3
+ Version: 5.0.1
4
4
  Summary: Fast access to cloud storage and local FS.
5
5
  Home-page: https://github.com/seung-lab/cloud-files/
6
6
  Author: William Silversmith
@@ -10,13 +10,13 @@ Classifier: Intended Audience :: Developers
10
10
  Classifier: Development Status :: 4 - Beta
11
11
  Classifier: License :: OSI Approved :: BSD License
12
12
  Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3.7
14
- Classifier: Programming Language :: Python :: 3.8
15
13
  Classifier: Programming Language :: Python :: 3.9
16
14
  Classifier: Programming Language :: Python :: 3.10
17
15
  Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
18
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
- Requires-Python: >=3.7,<4.0
19
+ Requires-Python: >=3.9,<4.0
20
20
  Description-Content-Type: text/markdown
21
21
  License-File: LICENSE
22
22
  License-File: AUTHORS
@@ -0,0 +1 @@
1
+ {"git_version": "4c96852", "is_release": true}
@@ -743,9 +743,12 @@ class CloudFiles:
743
743
  return True
744
744
  elif prefix[-1] == "/":
745
745
  return True
746
-
747
- res = first(self.list(prefix=prefix))
748
- return res is not None
746
+ try:
747
+ res = first(self.list(prefix=prefix))
748
+ return res is not None
749
+ except NotImplementedError as err:
750
+ res = CloudFile(self.cloudpath).size()
751
+ return res > 0
749
752
 
750
753
  def exists(
751
754
  self, paths:GetPathType,
@@ -1001,6 +1004,7 @@ class CloudFiles:
1001
1004
  content_type:Optional[str] = None,
1002
1005
  allow_missing:bool = False,
1003
1006
  progress:Optional[bool] = None,
1007
+ resumable:bool = False,
1004
1008
  ) -> None:
1005
1009
  """
1006
1010
  Transfer all files from this CloudFiles storage
@@ -1035,6 +1039,11 @@ class CloudFiles:
1035
1039
  as '' (None), 'gzip', 'br', 'zstd'
1036
1040
  content_type: if provided, set the Content-Type header
1037
1041
  on the upload. This is necessary for e.g. file->cloud
1042
+
1043
+ resumable: for remote->file downloads, download to a .part
1044
+ file and rename it when the download completes. If the
1045
+ download does not complete, it can be resumed. Only
1046
+ supported for https->file currently.
1038
1047
  """
1039
1048
  if isinstance(cf_dest, str):
1040
1049
  cf_dest = CloudFiles(
@@ -1046,7 +1055,7 @@ class CloudFiles:
1046
1055
  self, paths, block_size,
1047
1056
  reencode, content_type,
1048
1057
  allow_missing,
1049
- progress,
1058
+ progress, resumable,
1050
1059
  )
1051
1060
 
1052
1061
  def transfer_from(
@@ -1058,6 +1067,7 @@ class CloudFiles:
1058
1067
  content_type:Optional[str] = None,
1059
1068
  allow_missing:bool = False,
1060
1069
  progress:Optional[bool] = None,
1070
+ resumable:bool = False,
1061
1071
  ) -> None:
1062
1072
  """
1063
1073
  Transfer all files from the source CloudFiles storage
@@ -1092,6 +1102,10 @@ class CloudFiles:
1092
1102
  as '' (None), 'gzip', 'br', 'zstd'
1093
1103
  content_type: if provided, set the Content-Type header
1094
1104
  on the upload. This is necessary for e.g. file->cloud
1105
+ resumable: for remote->file downloads, download to a .part
1106
+ file and rename it when the download completes. If the
1107
+ download does not complete, it can be resumed. Only
1108
+ supported for https->file currently.
1095
1109
  """
1096
1110
  if isinstance(cf_src, str):
1097
1111
  cf_src = CloudFiles(
@@ -1122,6 +1136,16 @@ class CloudFiles:
1122
1136
  cf_src, self, paths, total,
1123
1137
  pbar, block_size, allow_missing
1124
1138
  )
1139
+ elif (
1140
+ cf_src.protocol != "file"
1141
+ and self.protocol == "file"
1142
+ and reencode is None
1143
+ ):
1144
+ self.__transfer_remote_to_file(
1145
+ cf_src, self, paths, total,
1146
+ pbar, block_size, content_type,
1147
+ allow_missing, resumable,
1148
+ )
1125
1149
  elif (
1126
1150
  cf_src.protocol == "file"
1127
1151
  and self.protocol != "file"
@@ -1237,6 +1261,38 @@ class CloudFiles:
1237
1261
 
1238
1262
  pbar.update(1)
1239
1263
 
1264
+ def __transfer_remote_to_file(
1265
+ self, cf_src, cf_dest, paths,
1266
+ total, pbar, block_size, content_type,
1267
+ allow_missing, resumable,
1268
+ ):
1269
+ def thunk_save(key):
1270
+ with cf_src._get_connection() as conn:
1271
+ if isinstance(key, dict):
1272
+ dest_key = key.get("dest_path", key["path"])
1273
+ src_key = key["path"]
1274
+ else:
1275
+ src_key = key
1276
+ dest_key = key
1277
+
1278
+ dest_key = os.path.join(cf_dest._path.path, dest_key)
1279
+ found = conn.save_file(src_key, dest_key, resumable=resumable)
1280
+
1281
+ if found == False and not allow_missing:
1282
+ raise FileNotFoundError(src_key)
1283
+
1284
+ return int(found)
1285
+
1286
+ results = schedule_jobs(
1287
+ fns=( partial(thunk_save, path) for path in paths ),
1288
+ progress=pbar,
1289
+ concurrency=self.num_threads,
1290
+ total=totalfn(paths, total),
1291
+ green=self.green,
1292
+ count_return=True,
1293
+ )
1294
+ return len(results)
1295
+
1240
1296
  def __transfer_file_to_remote(
1241
1297
  self, cf_src, cf_dest, paths,
1242
1298
  total, pbar, block_size, content_type,
@@ -14,6 +14,10 @@ class CompressionError(Exception):
14
14
  """
15
15
  pass
16
16
 
17
+ class AuthorizationError(Exception):
18
+ """Authorization Error"""
19
+ pass
20
+
17
21
  class UnsupportedCompressionType(Exception):
18
22
  """
19
23
  Raised when attempting to use a compression type which is unsupported
@@ -11,7 +11,6 @@ import re
11
11
  import boto3
12
12
  import botocore
13
13
  import gevent.monkey
14
- from glob import glob
15
14
  import google.cloud.exceptions
16
15
  from google.cloud.storage import Batch, Client
17
16
  import requests
@@ -22,8 +21,8 @@ import fasteners
22
21
 
23
22
  from .compression import COMPRESSION_TYPES
24
23
  from .connectionpools import S3ConnectionPool, GCloudBucketPool, MemoryPool, MEMORY_DATA
25
- from .exceptions import MD5IntegrityError, CompressionError
26
- from .lib import mkdir, sip, md5, validate_s3_multipart_etag
24
+ from .exceptions import MD5IntegrityError, CompressionError, AuthorizationError
25
+ from .lib import mkdir, sip, md5, encode_crc32c_b64, validate_s3_multipart_etag
27
26
  from .secrets import (
28
27
  http_credentials,
29
28
  cave_credentials,
@@ -339,7 +338,7 @@ class FileInterface(StorageInterface):
339
338
  """
340
339
 
341
340
  layer_path = self.get_path_to_file("")
342
- path = os.path.join(layer_path, prefix) + '*'
341
+ path = os.path.join(layer_path, prefix)
343
342
 
344
343
  filenames = []
345
344
 
@@ -348,17 +347,33 @@ class FileInterface(StorageInterface):
348
347
  remove += os.path.sep
349
348
 
350
349
  if flat:
351
- for file_path in glob(path):
352
- if not os.path.isfile(file_path):
350
+ if os.path.isdir(path):
351
+ list_path = path
352
+ list_prefix = ''
353
+ prepend_prefix = prefix
354
+ if prepend_prefix and prepend_prefix[-1] != os.path.sep:
355
+ prepend_prefix += os.path.sep
356
+ else:
357
+ list_path = os.path.dirname(path)
358
+ list_prefix = os.path.basename(prefix)
359
+ prepend_prefix = os.path.dirname(prefix)
360
+ if prepend_prefix != '':
361
+ prepend_prefix += os.path.sep
362
+
363
+ for fobj in os.scandir(list_path):
364
+ if list_prefix != '' and not fobj.name.startswith(list_prefix):
353
365
  continue
354
- filename = file_path.replace(remove, '')
355
- filenames.append(filename)
366
+
367
+ if fobj.is_dir():
368
+ filenames.append(f"{prepend_prefix}{fobj.name}{os.path.sep}")
369
+ else:
370
+ filenames.append(f"{prepend_prefix}{fobj.name}")
356
371
  else:
357
372
  subdir = os.path.join(layer_path, os.path.dirname(prefix))
358
373
  for root, dirs, files in os.walk(subdir):
359
- files = [ os.path.join(root, f) for f in files ]
360
- files = [ f.replace(remove, '') for f in files ]
361
- files = [ f for f in files if f[:len(prefix)] == prefix ]
374
+ files = ( os.path.join(root, f) for f in files )
375
+ files = ( f.removeprefix(remove) for f in files )
376
+ files = ( f for f in files if f[:len(prefix)] == prefix )
362
377
 
363
378
  for filename in files:
364
379
  filenames.append(filename)
@@ -452,8 +467,60 @@ class MemoryInterface(StorageInterface):
452
467
  result = result[slice(start, end)]
453
468
  return (result, encoding, None, None)
454
469
 
470
+ def save_file(self, src, dest, resumable):
471
+ key = self.get_path_to_file(src)
472
+ with EXT_TEST_SEQUENCE_LOCK:
473
+ exts = list(EXT_TEST_SEQUENCE)
474
+ exts = [ x[0] for x in exts ]
475
+
476
+ path = key
477
+ true_ext = ''
478
+ for ext in exts:
479
+ pathext = key + ext
480
+ if pathext in self._data:
481
+ path = pathext
482
+ true_ext = ext
483
+ break
484
+
485
+ filepath = os.path.join(dest, os.path.basename(path))
486
+
487
+ mkdir(os.path.dirname(dest))
488
+ try:
489
+ with open(dest + true_ext, "wb") as f:
490
+ f.write(self._data[path])
491
+ except KeyError:
492
+ return False
493
+
494
+ return True
495
+
455
496
  def head(self, file_path):
456
- raise NotImplementedError()
497
+ path = self.get_path_to_file(file_path)
498
+
499
+ data = None
500
+ encoding = ''
501
+
502
+ with EXT_TEST_SEQUENCE_LOCK:
503
+ for ext, enc in EXT_TEST_SEQUENCE:
504
+ pathext = path + ext
505
+ if pathext in self._data:
506
+ data = self._data[pathext]
507
+ encoding = enc
508
+ break
509
+
510
+ return {
511
+ "Cache-Control": None,
512
+ "Content-Length": len(data),
513
+ "Content-Type": None,
514
+ "ETag": None,
515
+ "Last-Modified": None,
516
+ "Content-Md5": None,
517
+ "Content-Encoding": encoding,
518
+ "Content-Disposition": None,
519
+ "Content-Language": None,
520
+ "Storage-Class": None,
521
+ "Request-Charged": None,
522
+ "Parts-Count": None,
523
+ }
457
524
 
458
525
  def size(self, file_path):
459
526
  path = self.get_path_to_file(file_path)
@@ -520,11 +587,22 @@ class MemoryInterface(StorageInterface):
520
587
  if len(remove) and remove[-1] != '/':
521
588
  remove += '/'
522
589
 
523
- filenames = [ f.replace(remove, '') for f in self._data ]
524
- filenames = [ f for f in filenames if f[:len(prefix)] == prefix ]
590
+ filenames = ( f.removeprefix(remove) for f in self._data )
591
+ filenames = ( f for f in filenames if f[:len(prefix)] == prefix )
525
592
 
526
593
  if flat:
527
- filenames = [ f for f in filenames if '/' not in f.replace(prefix, '') ]
594
+ tmp = []
595
+ for f in filenames:
596
+ elems = f.removeprefix(prefix).split('/')
597
+ if len(elems) > 1 and elems[0] == '':
598
+ elems.pop(0)
599
+ elems[0] = f'/{elems[0]}'
600
+
601
+ if len(elems) > 1:
602
+ tmp.append(f"{prefix}{elems[0]}/")
603
+ else:
604
+ tmp.append(f"{prefix}{elems[0]}")
605
+ filenames = tmp
528
606
 
529
607
  def stripext(fname):
530
608
  (base, ext) = os.path.splitext(fname)
@@ -624,6 +702,25 @@ class GoogleCloudStorageInterface(StorageInterface):
624
702
 
625
703
  return (content, blob.content_encoding, hash_value, hash_type)
626
704
 
705
+ @retry
706
+ def save_file(self, src, dest, resumable):
707
+ key = self.get_path_to_file(src)
708
+ blob = self._bucket.blob(key)
709
+ try:
710
+ blob.download_to_filename(
711
+ filename=dest,
712
+ raw_download=True,
713
+ checksum=None
714
+ )
715
+ except google.cloud.exceptions.NotFound:
716
+ return False
717
+
718
+ ext = FileInterface.get_extension(blob.content_encoding)
719
+ if not dest.endswith(ext):
720
+ os.rename(dest, dest + ext)
721
+
722
+ return True
723
+
627
724
  @retry_if_not(google.cloud.exceptions.NotFound)
628
725
  def head(self, file_path):
629
726
  key = self.get_path_to_file(file_path)
@@ -711,13 +808,24 @@ class GoogleCloudStorageInterface(StorageInterface):
711
808
  path = posixpath.join(layer_path, prefix)
712
809
 
713
810
  delimiter = '/' if flat else None
714
- for blob in self._bucket.list_blobs(prefix=path, delimiter=delimiter):
715
- filename = blob.name.replace(layer_path, '')
811
+ blobs = self._bucket.list_blobs(
812
+ prefix=path,
813
+ delimiter=delimiter,
814
+ )
815
+
816
+ if blobs.prefixes:
817
+ yield from (
818
+ item.removeprefix(path)
819
+ for item in blobs.prefixes
820
+ )
821
+
822
+ for blob in blobs:
823
+ filename = blob.name.removeprefix(layer_path)
716
824
  if not filename:
717
825
  continue
718
826
  elif not flat and filename[-1] != '/':
719
827
  yield filename
720
- elif flat and '/' not in blob.name.replace(path, ''):
828
+ elif flat and '/' not in blob.name.removeprefix(path):
721
829
  yield filename
722
830
 
723
831
  def release_connection(self):
@@ -803,6 +911,43 @@ class HttpInterface(StorageInterface):
803
911
 
804
912
  return (resp.content, content_encoding, None, None)
805
913
 
914
+ @retry
915
+ def save_file(self, src, dest, resumable):
916
+ key = self.get_path_to_file(src)
917
+
918
+ headers = self.head(src)
919
+ content_encoding = headers.get('Content-Encoding', None)
920
+
921
+ try:
922
+ ext = FileInterface.get_extension(content_encoding)
923
+ except ValueError:
924
+ ext = ""
925
+
926
+ fulldest = dest + ext
927
+
928
+ partname = fulldest
929
+ if resumable:
930
+ partname += ".part"
931
+
932
+ downloaded_size = 0
933
+ if resumable and os.path.exists(partname):
934
+ downloaded_size = os.path.getsize(partname)
935
+
936
+ range_headers = { "Range": f"bytes={downloaded_size}-" }
937
+ with self.session.get(key, headers=range_headers, stream=True) as resp:
938
+ if resp.status_code not in [200, 206]:
939
+ resp.raise_for_status()
940
+ return False
941
+
942
+ with open(partname, 'ab') as f:
943
+ for chunk in resp.iter_content(chunk_size=int(10e6)):
944
+ f.write(chunk)
945
+
946
+ if resumable:
947
+ os.rename(partname, fulldest)
948
+
949
+ return True
950
+
806
951
  @retry
807
952
  def exists(self, file_path):
808
953
  key = self.get_path_to_file(file_path)
@@ -821,29 +966,49 @@ class HttpInterface(StorageInterface):
821
966
  )
822
967
  if prefix and prefix[0] == '/':
823
968
  prefix = prefix[1:]
824
- if prefix and prefix[-1] != '/':
825
- prefix += '/'
826
969
 
827
970
  headers = self.default_headers()
828
971
 
829
- @retry
972
+ @retry_if_not(AuthorizationError)
830
973
  def request(token):
831
974
  nonlocal headers
975
+ params = {}
976
+ if prefix:
977
+ params["prefix"] = prefix
978
+ if token is not None:
979
+ params["pageToken"] = token
980
+ if flat:
981
+ params["delimiter"] = '/'
982
+
832
983
  results = self.session.get(
833
984
  f"https://storage.googleapis.com/storage/v1/b/{bucket}/o",
834
- params={ "prefix": prefix, "pageToken": token },
985
+ params=params,
835
986
  headers=headers,
836
987
  )
988
+ if results.status_code in [401,403]:
989
+ raise AuthorizationError(f"http {results.status_code}")
990
+
837
991
  results.raise_for_status()
838
992
  results.close()
839
993
  return results.json()
840
994
 
995
+ strip = posixpath.dirname(prefix)
996
+ if strip and strip[-1] != '/':
997
+ strip += '/'
998
+
841
999
  token = None
842
1000
  while True:
843
1001
  results = request(token)
844
1002
 
845
- for res in results["items"]:
846
- yield res["name"].replace(prefix, "", 1)
1003
+ if 'prefixes' in results:
1004
+ yield from (
1005
+ item.removeprefix(strip)
1006
+ for item in results["prefixes"]
1007
+ )
1008
+
1009
+ for res in results.get("items", []):
1010
+ print(res["name"])
1011
+ yield res["name"].removeprefix(strip)
847
1012
 
848
1013
  token = results.get("nextPageToken", None)
849
1014
  if token is None:
@@ -895,13 +1060,15 @@ class HttpInterface(StorageInterface):
895
1060
  def list_files(self, prefix, flat=False):
896
1061
  if self._path.host == "https://storage.googleapis.com":
897
1062
  yield from self._list_files_google(prefix, flat)
898
-
1063
+ return
1064
+
899
1065
  url = posixpath.join(self._path.host, self._path.path, prefix)
900
1066
  resp = requests.head(url)
901
1067
 
902
1068
  server = resp.headers.get("Server", "").lower()
903
1069
  if 'apache' in server:
904
1070
  yield from self._list_files_apache(prefix, flat)
1071
+ return
905
1072
  else:
906
1073
  raise NotImplementedError()
907
1074
 
@@ -971,7 +1138,7 @@ class S3Interface(StorageInterface):
971
1138
  elif compress in ("xz", "lzma"):
972
1139
  attrs['ContentEncoding'] = 'xz'
973
1140
  elif compress in ("bzip2", "bz2"):
974
- attrs['ContentEncoding'] = 'bz2'
1141
+ attrs['ContentEncoding'] = 'bzip2'
975
1142
  elif compress:
976
1143
  raise ValueError("Compression type {} not supported.".format(compress))
977
1144
 
@@ -995,23 +1162,39 @@ class S3Interface(StorageInterface):
995
1162
 
996
1163
  if multipart:
997
1164
  self._conn.upload_fileobj(content, self._path.bucket, key, ExtraArgs=attrs)
1165
+ # upload_fileobj will add 'aws-chunked' to the ContentEncoding,
1166
+ # which after it finishes uploading is useless and messes up our
1167
+ # software. Therefore, edit the metadata and replace it (but this incurs
1168
+ # 2x class-A...)
1169
+ self._conn.copy_object(
1170
+ Bucket=self._path.bucket,
1171
+ Key=key,
1172
+ CopySource={'Bucket': self._path.bucket, 'Key': key},
1173
+ MetadataDirective="REPLACE",
1174
+ **attrs
1175
+ )
998
1176
  else:
999
1177
  attrs['Bucket'] = self._path.bucket
1000
1178
  attrs['Body'] = content
1001
1179
  attrs['Key'] = key
1002
- attrs['ContentMD5'] = md5(content)
1180
+ attrs["ChecksumCRC32C"] = str(encode_crc32c_b64(content))
1003
1181
  self._conn.put_object(**attrs)
1004
1182
 
1005
1183
  @retry
1006
1184
  def copy_file(self, src_path, dest_bucket_name, dest_key):
1007
1185
  key = self.get_path_to_file(src_path)
1008
- dest_bucket = self._get_bucket(dest_bucket_name)
1186
+ s3client = self._get_bucket(dest_bucket_name)
1009
1187
  copy_source = {
1010
1188
  'Bucket': self._path.bucket,
1011
1189
  'Key': key,
1012
1190
  }
1013
1191
  try:
1014
- dest_bucket.copy(CopySource=copy_source, Bucket=dest_bucket_name, Key=dest_key)
1192
+ s3client.copy_object(
1193
+ CopySource=copy_source,
1194
+ Bucket=dest_bucket_name,
1195
+ Key=dest_key,
1196
+ MetadataDirective='COPY' # Ensure metadata like Content-Encoding is copied
1197
+ )
1015
1198
  except botocore.exceptions.ClientError as err:
1016
1199
  if err.response['Error']['Code'] in ('NoSuchKey', '404'):
1017
1200
  return False
@@ -1046,6 +1229,11 @@ class S3Interface(StorageInterface):
1046
1229
  if 'ContentEncoding' in resp:
1047
1230
  encoding = resp['ContentEncoding']
1048
1231
 
1232
+ encoding = ",".join([
1233
+ enc for enc in encoding.split(",")
1234
+ if enc != "aws-chunked"
1235
+ ])
1236
+
1049
1237
  # s3 etags return hex digests but we need the base64 encoding
1050
1238
  # to make uniform comparisons.
1051
1239
  # example s3 etag: "31ee76261d87fed8cb9d4c465c48158c"
@@ -1073,6 +1261,43 @@ class S3Interface(StorageInterface):
1073
1261
  else:
1074
1262
  raise
1075
1263
 
1264
+ @retry
1265
+ def save_file(self, src, dest, resumable):
1266
+ key = self.get_path_to_file(src)
1267
+ kwargs = self._additional_attrs.copy()
1268
+
1269
+ resp = self.head(src)
1270
+
1271
+ if resp is None:
1272
+ return False
1273
+
1274
+ mkdir(os.path.dirname(dest))
1275
+
1276
+ encoding = resp.get("Content-Encoding", "") or ""
1277
+ encoding = ",".join([
1278
+ enc for enc in encoding.split(",")
1279
+ if enc != "aws-chunked"
1280
+ ])
1281
+ ext = FileInterface.get_extension(encoding)
1282
+
1283
+ if not dest.endswith(ext):
1284
+ dest += ext
1285
+
1286
+ try:
1287
+ self._conn.download_file(
1288
+ Bucket=self._path.bucket,
1289
+ Key=key,
1290
+ Filename=dest,
1291
+ **kwargs
1292
+ )
1293
+ except botocore.exceptions.ClientError as err:
1294
+ if err.response['Error']['Code'] in ('NoSuchKey', '404'):
1295
+ return False
1296
+ else:
1297
+ raise
1298
+
1299
+ return True
1300
+
1076
1301
  @retry
1077
1302
  def head(self, file_path):
1078
1303
  try:
@@ -1081,6 +1306,11 @@ class S3Interface(StorageInterface):
1081
1306
  Key=self.get_path_to_file(file_path),
1082
1307
  **self._additional_attrs,
1083
1308
  )
1309
+
1310
+ encoding = response.get("ContentEncoding", None)
1311
+ if encoding == '':
1312
+ encoding = None
1313
+
1084
1314
  return {
1085
1315
  "Cache-Control": response.get("CacheControl", None),
1086
1316
  "Content-Length": response.get("ContentLength", None),
@@ -1088,7 +1318,7 @@ class S3Interface(StorageInterface):
1088
1318
  "ETag": response.get("ETag", None),
1089
1319
  "Last-Modified": response.get("LastModified", None),
1090
1320
  "Content-Md5": response["ResponseMetadata"]["HTTPHeaders"].get("content-md5", None),
1091
- "Content-Encoding": response.get("ContentEncoding", None),
1321
+ "Content-Encoding": encoding,
1092
1322
  "Content-Disposition": response.get("ContentDisposition", None),
1093
1323
  "Content-Language": response.get("ContentLanguage", None),
1094
1324
  "Storage-Class": response.get("StorageClass", None),
@@ -1179,7 +1409,7 @@ class S3Interface(StorageInterface):
1179
1409
  path = posixpath.join(layer_path, prefix)
1180
1410
 
1181
1411
  @retry
1182
- def s3lst(continuation_token=None):
1412
+ def s3lst(path, continuation_token=None):
1183
1413
  kwargs = {
1184
1414
  'Bucket': self._path.bucket,
1185
1415
  'Prefix': path,
@@ -1193,27 +1423,44 @@ class S3Interface(StorageInterface):
1193
1423
 
1194
1424
  return self._conn.list_objects_v2(**kwargs)
1195
1425
 
1196
- resp = s3lst()
1426
+ resp = s3lst(path)
1427
+ # the case where the prefix is something like "build", but "build" is a subdirectory
1428
+ # so requery with "build/" to get the proper behavior
1429
+ if (
1430
+ flat
1431
+ and path
1432
+ and path[-1] != '/'
1433
+ and 'Contents' not in resp
1434
+ and len(resp.get("CommonPrefixes", [])) == 1
1435
+ ):
1436
+ path += '/'
1437
+ resp = s3lst(path)
1197
1438
 
1198
1439
  def iterate(resp):
1440
+ if 'CommonPrefixes' in resp.keys():
1441
+ yield from [
1442
+ item["Prefix"].removeprefix(layer_path)
1443
+ for item in resp['CommonPrefixes']
1444
+ ]
1445
+
1199
1446
  if 'Contents' not in resp.keys():
1200
1447
  resp['Contents'] = []
1201
1448
 
1202
1449
  for item in resp['Contents']:
1203
1450
  key = item['Key']
1204
- filename = key.replace(layer_path, '')
1451
+ filename = key.removeprefix(layer_path)
1205
1452
  if filename == '':
1206
1453
  continue
1207
1454
  elif not flat and filename[-1] != '/':
1208
1455
  yield filename
1209
- elif flat and '/' not in key.replace(path, ''):
1456
+ elif flat and '/' not in key.removeprefix(path):
1210
1457
  yield filename
1211
1458
 
1212
1459
  for filename in iterate(resp):
1213
1460
  yield filename
1214
1461
 
1215
1462
  while resp['IsTruncated'] and resp['NextContinuationToken']:
1216
- resp = s3lst(resp['NextContinuationToken'])
1463
+ resp = s3lst(path, resp['NextContinuationToken'])
1217
1464
 
1218
1465
  for filename in iterate(resp):
1219
1466
  yield filename
@@ -153,6 +153,11 @@ def decode_crc32c_b64(b64digest):
153
153
  # !I means network order (big endian) and unsigned int
154
154
  return struct.unpack("!I", base64.b64decode(b64digest))[0]
155
155
 
156
+ def encode_crc32c_b64(binary):
157
+ val = crc32c(binary)
158
+ val = val.to_bytes(4, 'big')
159
+ return base64.b64encode(val)
160
+
156
161
  def crc32c(binary):
157
162
  """
158
163
  Computes the crc32c of a binary string
@@ -83,14 +83,19 @@ def license():
83
83
  print(f.read())
84
84
 
85
85
  @main.command()
86
- @click.option('--shortpath', is_flag=True, default=False, help='Don\'t print the common base path for each listed path.')
87
- @click.option('--flat', is_flag=True, default=False, help='Only produce a single level of directory hierarchy.')
88
- @click.option('-e','--expr',is_flag=True, default=False, help='Use a limited regexp language (e.g. [abc123]\{3\}) to generate prefixes.')
86
+ @click.option('--shortpath', is_flag=True, default=False, help='Don\'t print the common base path for each listed path.',show_default=True)
87
+ @click.option('--flat', is_flag=True, default=False, help='Only produce a single level of directory hierarchy.',show_default=True)
88
+ @click.option('-e','--expr',is_flag=True, default=False, help='Use a limited regexp language (e.g. [abc123]\{3\}) to generate prefixes.', show_default=True)
89
+ @click.option('--no-auth',is_flag=True, default=False, help='Uses the http API for read-only operations.', show_default=True)
89
90
  @click.argument("cloudpath")
90
- def ls(shortpath, flat, expr, cloudpath):
91
+ def ls(shortpath, flat, expr, cloudpath, no_auth):
91
92
  """Recursively lists the contents of a directory."""
92
93
  cloudpath = normalize_path(cloudpath)
93
94
 
95
+ no_sign_request = no_auth # only affects s3
96
+ if no_auth and 's3://' not in cloudpath:
97
+ cloudpath = cloudfiles.paths.to_https_protocol(cloudpath)
98
+
94
99
  _, flt, prefix = get_mfp(cloudpath, True)
95
100
  epath = extract(cloudpath)
96
101
  if len(epath.path) > 0:
@@ -100,7 +105,7 @@ def ls(shortpath, flat, expr, cloudpath):
100
105
 
101
106
  flat = flat or flt
102
107
 
103
- cf = CloudFiles(cloudpath)
108
+ cf = CloudFiles(cloudpath, no_sign_request=no_sign_request)
104
109
  iterables = []
105
110
  if expr:
106
111
  # TODO: make this a reality using a parser
@@ -173,11 +178,13 @@ def get_mfp(path, recursive):
173
178
  @click.option('-b', '--block-size', default=128, help="Number of files to download at a time.", show_default=True)
174
179
  @click.option('--part-bytes', default=int(1e8), help="Composite upload threshold in bytes. Splits a file into pieces for some cloud services like gs and s3.", show_default=True)
175
180
  @click.option('--no-sign-request', is_flag=True, default=False, help="Use s3 in anonymous mode (don't sign requests) for the source.", show_default=True)
181
+ @click.option('--resumable', is_flag=True, default=False, help="http->file transfers will dowload to .part files while they are in progress.", show_default=True)
176
182
  @click.pass_context
177
183
  def cp(
178
184
  ctx, source, destination,
179
185
  recursive, compression, progress,
180
186
  block_size, part_bytes, no_sign_request,
187
+ resumable,
181
188
  ):
182
189
  """
183
190
  Copy one or more files from a source to destination.
@@ -194,13 +201,15 @@ def cp(
194
201
  _cp_single(
195
202
  ctx, src, destination, recursive,
196
203
  compression, progress, block_size,
197
- part_bytes, no_sign_request
204
+ part_bytes, no_sign_request,
205
+ resumable,
198
206
  )
199
207
 
200
208
  def _cp_single(
201
209
  ctx, source, destination, recursive,
202
210
  compression, progress, block_size,
203
- part_bytes, no_sign_request
211
+ part_bytes, no_sign_request,
212
+ resumable,
204
213
  ):
205
214
  use_stdin = (source == '-')
206
215
  use_stdout = (destination == '-')
@@ -210,8 +219,8 @@ def _cp_single(
210
219
 
211
220
  nsrc = normalize_path(source)
212
221
  ndest = normalize_path(destination)
213
-
214
- issrcdir = (ispathdir(source) or CloudFiles(nsrc).isdir()) and use_stdin == False
222
+
223
+ issrcdir = (use_stdin == False) and (ispathdir(source) or CloudFiles(nsrc).isdir())
215
224
  isdestdir = (ispathdir(destination) or CloudFiles(ndest).isdir())
216
225
 
217
226
  recursive = recursive and issrcdir
@@ -267,7 +276,11 @@ def _cp_single(
267
276
 
268
277
  if not isinstance(xferpaths, str):
269
278
  if parallel == 1:
270
- _cp(srcpath, destpath, compression, progress, block_size, part_bytes, no_sign_request, xferpaths)
279
+ _cp(
280
+ srcpath, destpath, compression,
281
+ progress, block_size, part_bytes,
282
+ no_sign_request, resumable, xferpaths
283
+ )
271
284
  return
272
285
 
273
286
  total = None
@@ -277,9 +290,12 @@ def _cp_single(
277
290
  pass
278
291
 
279
292
  if use_stdout:
280
- fn = partial(_cp_stdout, no_sign_request, srcpath)
293
+ fn = partial(_cp_stdout, srcpath, no_sign_request)
281
294
  else:
282
- fn = partial(_cp, srcpath, destpath, compression, False, block_size, part_bytes, no_sign_request)
295
+ fn = partial(
296
+ _cp, srcpath, destpath, compression, False,
297
+ block_size, part_bytes, no_sign_request, resumable
298
+ )
283
299
 
284
300
  with tqdm(desc="Transferring", total=total, disable=(not progress)) as pbar:
285
301
  with pathos.pools.ProcessPool(parallel) as executor:
@@ -309,14 +325,20 @@ def _cp_single(
309
325
  cfsrc.transfer_to(cfdest, paths=[{
310
326
  "path": xferpaths,
311
327
  "dest_path": new_path,
312
- }], reencode=compression)
328
+ }], reencode=compression, resumable=resumable)
313
329
 
314
- def _cp(src, dst, compression, progress, block_size, part_bytes, no_sign_request, paths):
330
+ def _cp(
331
+ src, dst, compression, progress,
332
+ block_size, part_bytes,
333
+ no_sign_request, resumable,
334
+ paths
335
+ ):
315
336
  cfsrc = CloudFiles(src, progress=progress, composite_upload_threshold=part_bytes, no_sign_request=no_sign_request)
316
337
  cfdest = CloudFiles(dst, progress=progress, composite_upload_threshold=part_bytes)
317
338
  cfsrc.transfer_to(
318
339
  cfdest, paths=paths,
319
- reencode=compression, block_size=block_size
340
+ reencode=compression, block_size=block_size,
341
+ resumable=resumable,
320
342
  )
321
343
 
322
344
  def _cp_stdout(src, no_sign_request, paths):
@@ -13,11 +13,11 @@ classifier =
13
13
  Development Status :: 4 - Beta
14
14
  License :: OSI Approved :: BSD License
15
15
  Programming Language :: Python :: 3
16
- Programming Language :: Python :: 3.7
17
- Programming Language :: Python :: 3.8
18
16
  Programming Language :: Python :: 3.9
19
17
  Programming Language :: Python :: 3.10
20
18
  Programming Language :: Python :: 3.11
19
+ Programming Language :: Python :: 3.12
20
+ Programming Language :: Python :: 3.13
21
21
  Topic :: Software Development :: Libraries :: Python Modules
22
22
 
23
23
  [global]
@@ -4,7 +4,7 @@ import sys
4
4
 
5
5
  setuptools.setup(
6
6
  setup_requires=['pbr'],
7
- python_requires=">=3.7,<4.0",
7
+ python_requires=">=3.9,<4.0",
8
8
  include_package_data=True,
9
9
  entry_points={
10
10
  "console_scripts": [
@@ -1 +0,0 @@
1
- {"git_version": "ce90720", "is_release": true}
File without changes
File without changes
File without changes
File without changes