cloud-files 4.30.1__tar.gz → 5.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cloud-files-4.30.1 → cloud_files-5.0.0}/.github/workflows/test-suite.yml +1 -1
- {cloud-files-4.30.1 → cloud_files-5.0.0}/ChangeLog +26 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/PKG-INFO +4 -4
- {cloud-files-4.30.1 → cloud_files-5.0.0}/automated_test.py +3 -3
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloud_files.egg-info/PKG-INFO +4 -4
- cloud_files-5.0.0/cloud_files.egg-info/pbr.json +1 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloudfiles/cloudfiles.py +60 -4
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloudfiles/exceptions.py +4 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloudfiles/interfaces.py +227 -28
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloudfiles_cli/cloudfiles_cli.py +37 -15
- {cloud-files-4.30.1 → cloud_files-5.0.0}/setup.cfg +2 -2
- {cloud-files-4.30.1 → cloud_files-5.0.0}/setup.py +1 -1
- cloud-files-4.30.1/cloud_files.egg-info/pbr.json +0 -1
- {cloud-files-4.30.1 → cloud_files-5.0.0}/AUTHORS +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/LICENSE +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/MANIFEST.in +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/README.md +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloud_files.egg-info/SOURCES.txt +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloud_files.egg-info/dependency_links.txt +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloud_files.egg-info/entry_points.txt +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloud_files.egg-info/not-zip-safe +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloud_files.egg-info/requires.txt +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloud_files.egg-info/top_level.txt +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloudfiles/__init__.py +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloudfiles/compression.py +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloudfiles/connectionpools.py +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloudfiles/gcs.py +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloudfiles/lib.py +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloudfiles/paths.py +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloudfiles/resumable_tools.py +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloudfiles/scheduler.py +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloudfiles/secrets.py +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloudfiles/threaded_queue.py +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloudfiles/typing.py +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloudfiles_cli/LICENSE +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/cloudfiles_cli/__init__.py +0 -0
- {cloud-files-4.30.1 → cloud_files-5.0.0}/requirements.txt +0 -0
|
@@ -1,10 +1,36 @@
|
|
|
1
1
|
CHANGES
|
|
2
2
|
=======
|
|
3
3
|
|
|
4
|
+
5.0.0
|
|
5
|
+
-----
|
|
6
|
+
|
|
7
|
+
* feat: efficient saving to disk (#108)
|
|
8
|
+
* install: set minimum version to py39
|
|
9
|
+
* ci: drop py38, add py313
|
|
10
|
+
* fix: strip 'aws-chunked' from s3 encodings
|
|
11
|
+
* fix: add no\_sign\_request for s3 listing
|
|
12
|
+
* fix: prefix logic for no-auth gcs
|
|
13
|
+
* fix: list files google http
|
|
14
|
+
* feat(cli): add no-auth flag to ls
|
|
15
|
+
* fix: abort auth error in list files (http, google)
|
|
16
|
+
* fix: make s3 listing consistent with file and mem
|
|
17
|
+
* fix(list): memory and files interface list flat more consistently
|
|
18
|
+
* test: make flat more consistent in list\_files
|
|
19
|
+
* fix: replaceprefix -> removeprefix
|
|
20
|
+
* fix: aws-chunked does not affect byte encoding
|
|
21
|
+
* fix: harmonizing definition of flat across interfaces
|
|
22
|
+
* feat: adding (broken) support for listing common prefixes
|
|
23
|
+
* refactor: use same pattern for removeprefix
|
|
24
|
+
* fix: make "flat" listing work for s3
|
|
25
|
+
|
|
4
26
|
4.30.1
|
|
5
27
|
------
|
|
6
28
|
|
|
7
29
|
* fix(gcs): don't double compress when uploading to gcs w/ composite
|
|
30
|
+
|
|
31
|
+
4.30.0
|
|
32
|
+
------
|
|
33
|
+
|
|
8
34
|
* redesign: normalize cloudpaths so file:// isn't required
|
|
9
35
|
|
|
10
36
|
4.29.0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cloud-files
|
|
3
|
-
Version:
|
|
3
|
+
Version: 5.0.0
|
|
4
4
|
Summary: Fast access to cloud storage and local FS.
|
|
5
5
|
Home-page: https://github.com/seung-lab/cloud-files/
|
|
6
6
|
Author: William Silversmith
|
|
@@ -10,13 +10,13 @@ Classifier: Intended Audience :: Developers
|
|
|
10
10
|
Classifier: Development Status :: 4 - Beta
|
|
11
11
|
Classifier: License :: OSI Approved :: BSD License
|
|
12
12
|
Classifier: Programming Language :: Python :: 3
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.7
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
15
13
|
Classifier: Programming Language :: Python :: 3.9
|
|
16
14
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
18
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
-
Requires-Python: >=3.
|
|
19
|
+
Requires-Python: >=3.9,<4.0
|
|
20
20
|
Description-Content-Type: text/markdown
|
|
21
21
|
License-File: LICENSE
|
|
22
22
|
License-File: AUTHORS
|
|
@@ -376,12 +376,12 @@ def test_list(s3, protocol):
|
|
|
376
376
|
assert set(cf.list(prefix='nofolder/')) == set([])
|
|
377
377
|
|
|
378
378
|
# Tests (1)
|
|
379
|
-
assert set(cf.list(prefix='', flat=True)) == set(['info1','info2','info5','info.txt'])
|
|
379
|
+
assert set(cf.list(prefix='', flat=True)) == set(['info1','info2','info5','info.txt', 'build/', 'level1/'])
|
|
380
380
|
assert set(cf.list(prefix='inf', flat=True)) == set(['info1','info2','info5','info.txt'])
|
|
381
381
|
# Tests (2)
|
|
382
|
-
assert set(cf.list(prefix='build', flat=True)) == set([])
|
|
382
|
+
assert set(cf.list(prefix='build', flat=True)) == set(['build/info3'])
|
|
383
383
|
# Tests (3)
|
|
384
|
-
assert set(cf.list(prefix='level1/', flat=True)) == set([])
|
|
384
|
+
assert set(cf.list(prefix='level1/', flat=True)) == set(['level1/level2/'])
|
|
385
385
|
assert set(cf.list(prefix='build/', flat=True)) == set(['build/info3'])
|
|
386
386
|
# Tests (4)
|
|
387
387
|
assert set(cf.list(prefix='build/inf', flat=True)) == set(['build/info3'])
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cloud-files
|
|
3
|
-
Version:
|
|
3
|
+
Version: 5.0.0
|
|
4
4
|
Summary: Fast access to cloud storage and local FS.
|
|
5
5
|
Home-page: https://github.com/seung-lab/cloud-files/
|
|
6
6
|
Author: William Silversmith
|
|
@@ -10,13 +10,13 @@ Classifier: Intended Audience :: Developers
|
|
|
10
10
|
Classifier: Development Status :: 4 - Beta
|
|
11
11
|
Classifier: License :: OSI Approved :: BSD License
|
|
12
12
|
Classifier: Programming Language :: Python :: 3
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.7
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
15
13
|
Classifier: Programming Language :: Python :: 3.9
|
|
16
14
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
18
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
-
Requires-Python: >=3.
|
|
19
|
+
Requires-Python: >=3.9,<4.0
|
|
20
20
|
Description-Content-Type: text/markdown
|
|
21
21
|
License-File: LICENSE
|
|
22
22
|
License-File: AUTHORS
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"git_version": "ee08b4f", "is_release": true}
|
|
@@ -743,9 +743,12 @@ class CloudFiles:
|
|
|
743
743
|
return True
|
|
744
744
|
elif prefix[-1] == "/":
|
|
745
745
|
return True
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
746
|
+
try:
|
|
747
|
+
res = first(self.list(prefix=prefix))
|
|
748
|
+
return res is not None
|
|
749
|
+
except NotImplementedError as err:
|
|
750
|
+
res = CloudFile(self.cloudpath).size()
|
|
751
|
+
return res > 0
|
|
749
752
|
|
|
750
753
|
def exists(
|
|
751
754
|
self, paths:GetPathType,
|
|
@@ -1001,6 +1004,7 @@ class CloudFiles:
|
|
|
1001
1004
|
content_type:Optional[str] = None,
|
|
1002
1005
|
allow_missing:bool = False,
|
|
1003
1006
|
progress:Optional[bool] = None,
|
|
1007
|
+
resumable:bool = False,
|
|
1004
1008
|
) -> None:
|
|
1005
1009
|
"""
|
|
1006
1010
|
Transfer all files from this CloudFiles storage
|
|
@@ -1035,6 +1039,11 @@ class CloudFiles:
|
|
|
1035
1039
|
as '' (None), 'gzip', 'br', 'zstd'
|
|
1036
1040
|
content_type: if provided, set the Content-Type header
|
|
1037
1041
|
on the upload. This is necessary for e.g. file->cloud
|
|
1042
|
+
|
|
1043
|
+
resumable: for remote->file downloads, download to a .part
|
|
1044
|
+
file and rename it when the download completes. If the
|
|
1045
|
+
download does not complete, it can be resumed. Only
|
|
1046
|
+
supported for https->file currently.
|
|
1038
1047
|
"""
|
|
1039
1048
|
if isinstance(cf_dest, str):
|
|
1040
1049
|
cf_dest = CloudFiles(
|
|
@@ -1046,7 +1055,7 @@ class CloudFiles:
|
|
|
1046
1055
|
self, paths, block_size,
|
|
1047
1056
|
reencode, content_type,
|
|
1048
1057
|
allow_missing,
|
|
1049
|
-
progress,
|
|
1058
|
+
progress, resumable,
|
|
1050
1059
|
)
|
|
1051
1060
|
|
|
1052
1061
|
def transfer_from(
|
|
@@ -1058,6 +1067,7 @@ class CloudFiles:
|
|
|
1058
1067
|
content_type:Optional[str] = None,
|
|
1059
1068
|
allow_missing:bool = False,
|
|
1060
1069
|
progress:Optional[bool] = None,
|
|
1070
|
+
resumable:bool = False,
|
|
1061
1071
|
) -> None:
|
|
1062
1072
|
"""
|
|
1063
1073
|
Transfer all files from the source CloudFiles storage
|
|
@@ -1092,6 +1102,10 @@ class CloudFiles:
|
|
|
1092
1102
|
as '' (None), 'gzip', 'br', 'zstd'
|
|
1093
1103
|
content_type: if provided, set the Content-Type header
|
|
1094
1104
|
on the upload. This is necessary for e.g. file->cloud
|
|
1105
|
+
resumable: for remote->file downloads, download to a .part
|
|
1106
|
+
file and rename it when the download completes. If the
|
|
1107
|
+
download does not complete, it can be resumed. Only
|
|
1108
|
+
supported for https->file currently.
|
|
1095
1109
|
"""
|
|
1096
1110
|
if isinstance(cf_src, str):
|
|
1097
1111
|
cf_src = CloudFiles(
|
|
@@ -1122,6 +1136,16 @@ class CloudFiles:
|
|
|
1122
1136
|
cf_src, self, paths, total,
|
|
1123
1137
|
pbar, block_size, allow_missing
|
|
1124
1138
|
)
|
|
1139
|
+
elif (
|
|
1140
|
+
cf_src.protocol != "file"
|
|
1141
|
+
and self.protocol == "file"
|
|
1142
|
+
and reencode is None
|
|
1143
|
+
):
|
|
1144
|
+
self.__transfer_remote_to_file(
|
|
1145
|
+
cf_src, self, paths, total,
|
|
1146
|
+
pbar, block_size, content_type,
|
|
1147
|
+
allow_missing, resumable,
|
|
1148
|
+
)
|
|
1125
1149
|
elif (
|
|
1126
1150
|
cf_src.protocol == "file"
|
|
1127
1151
|
and self.protocol != "file"
|
|
@@ -1237,6 +1261,38 @@ class CloudFiles:
|
|
|
1237
1261
|
|
|
1238
1262
|
pbar.update(1)
|
|
1239
1263
|
|
|
1264
|
+
def __transfer_remote_to_file(
|
|
1265
|
+
self, cf_src, cf_dest, paths,
|
|
1266
|
+
total, pbar, block_size, content_type,
|
|
1267
|
+
allow_missing, resumable,
|
|
1268
|
+
):
|
|
1269
|
+
def thunk_save(key):
|
|
1270
|
+
with cf_src._get_connection() as conn:
|
|
1271
|
+
if isinstance(key, dict):
|
|
1272
|
+
dest_key = key.get("dest_path", key["path"])
|
|
1273
|
+
src_key = key["path"]
|
|
1274
|
+
else:
|
|
1275
|
+
src_key = key
|
|
1276
|
+
dest_key = key
|
|
1277
|
+
|
|
1278
|
+
dest_key = os.path.join(cf_dest._path.path, dest_key)
|
|
1279
|
+
found = conn.save_file(src_key, dest_key, resumable=resumable)
|
|
1280
|
+
|
|
1281
|
+
if found == False and not allow_missing:
|
|
1282
|
+
raise FileNotFoundError(src_key)
|
|
1283
|
+
|
|
1284
|
+
return int(found)
|
|
1285
|
+
|
|
1286
|
+
results = schedule_jobs(
|
|
1287
|
+
fns=( partial(thunk_save, path) for path in paths ),
|
|
1288
|
+
progress=pbar,
|
|
1289
|
+
concurrency=self.num_threads,
|
|
1290
|
+
total=totalfn(paths, total),
|
|
1291
|
+
green=self.green,
|
|
1292
|
+
count_return=True,
|
|
1293
|
+
)
|
|
1294
|
+
return len(results)
|
|
1295
|
+
|
|
1240
1296
|
def __transfer_file_to_remote(
|
|
1241
1297
|
self, cf_src, cf_dest, paths,
|
|
1242
1298
|
total, pbar, block_size, content_type,
|
|
@@ -14,6 +14,10 @@ class CompressionError(Exception):
|
|
|
14
14
|
"""
|
|
15
15
|
pass
|
|
16
16
|
|
|
17
|
+
class AuthorizationError(Exception):
|
|
18
|
+
"""Authorization Error"""
|
|
19
|
+
pass
|
|
20
|
+
|
|
17
21
|
class UnsupportedCompressionType(Exception):
|
|
18
22
|
"""
|
|
19
23
|
Raised when attempting to use a compression type which is unsupported
|
|
@@ -11,7 +11,6 @@ import re
|
|
|
11
11
|
import boto3
|
|
12
12
|
import botocore
|
|
13
13
|
import gevent.monkey
|
|
14
|
-
from glob import glob
|
|
15
14
|
import google.cloud.exceptions
|
|
16
15
|
from google.cloud.storage import Batch, Client
|
|
17
16
|
import requests
|
|
@@ -22,7 +21,7 @@ import fasteners
|
|
|
22
21
|
|
|
23
22
|
from .compression import COMPRESSION_TYPES
|
|
24
23
|
from .connectionpools import S3ConnectionPool, GCloudBucketPool, MemoryPool, MEMORY_DATA
|
|
25
|
-
from .exceptions import MD5IntegrityError, CompressionError
|
|
24
|
+
from .exceptions import MD5IntegrityError, CompressionError, AuthorizationError
|
|
26
25
|
from .lib import mkdir, sip, md5, validate_s3_multipart_etag
|
|
27
26
|
from .secrets import (
|
|
28
27
|
http_credentials,
|
|
@@ -339,7 +338,7 @@ class FileInterface(StorageInterface):
|
|
|
339
338
|
"""
|
|
340
339
|
|
|
341
340
|
layer_path = self.get_path_to_file("")
|
|
342
|
-
path = os.path.join(layer_path, prefix)
|
|
341
|
+
path = os.path.join(layer_path, prefix)
|
|
343
342
|
|
|
344
343
|
filenames = []
|
|
345
344
|
|
|
@@ -348,17 +347,33 @@ class FileInterface(StorageInterface):
|
|
|
348
347
|
remove += os.path.sep
|
|
349
348
|
|
|
350
349
|
if flat:
|
|
351
|
-
|
|
352
|
-
|
|
350
|
+
if os.path.isdir(path):
|
|
351
|
+
list_path = path
|
|
352
|
+
list_prefix = ''
|
|
353
|
+
prepend_prefix = prefix
|
|
354
|
+
if prepend_prefix and prepend_prefix[-1] != os.path.sep:
|
|
355
|
+
prepend_prefix += os.path.sep
|
|
356
|
+
else:
|
|
357
|
+
list_path = os.path.dirname(path)
|
|
358
|
+
list_prefix = os.path.basename(prefix)
|
|
359
|
+
prepend_prefix = os.path.dirname(prefix)
|
|
360
|
+
if prepend_prefix != '':
|
|
361
|
+
prepend_prefix += os.path.sep
|
|
362
|
+
|
|
363
|
+
for fobj in os.scandir(list_path):
|
|
364
|
+
if list_prefix != '' and not fobj.name.startswith(list_prefix):
|
|
353
365
|
continue
|
|
354
|
-
|
|
355
|
-
|
|
366
|
+
|
|
367
|
+
if fobj.is_dir():
|
|
368
|
+
filenames.append(f"{prepend_prefix}{fobj.name}{os.path.sep}")
|
|
369
|
+
else:
|
|
370
|
+
filenames.append(f"{prepend_prefix}{fobj.name}")
|
|
356
371
|
else:
|
|
357
372
|
subdir = os.path.join(layer_path, os.path.dirname(prefix))
|
|
358
373
|
for root, dirs, files in os.walk(subdir):
|
|
359
|
-
files =
|
|
360
|
-
files =
|
|
361
|
-
files =
|
|
374
|
+
files = ( os.path.join(root, f) for f in files )
|
|
375
|
+
files = ( f.removeprefix(remove) for f in files )
|
|
376
|
+
files = ( f for f in files if f[:len(prefix)] == prefix )
|
|
362
377
|
|
|
363
378
|
for filename in files:
|
|
364
379
|
filenames.append(filename)
|
|
@@ -452,6 +467,32 @@ class MemoryInterface(StorageInterface):
|
|
|
452
467
|
result = result[slice(start, end)]
|
|
453
468
|
return (result, encoding, None, None)
|
|
454
469
|
|
|
470
|
+
def save_file(self, src, dest, resumable):
|
|
471
|
+
key = self.get_path_to_file(src)
|
|
472
|
+
with EXT_TEST_SEQUENCE_LOCK:
|
|
473
|
+
exts = list(EXT_TEST_SEQUENCE)
|
|
474
|
+
exts = [ x[0] for x in exts ]
|
|
475
|
+
|
|
476
|
+
path = key
|
|
477
|
+
true_ext = ''
|
|
478
|
+
for ext in exts:
|
|
479
|
+
pathext = key + ext
|
|
480
|
+
if pathext in self._data:
|
|
481
|
+
path = pathext
|
|
482
|
+
true_ext = ext
|
|
483
|
+
break
|
|
484
|
+
|
|
485
|
+
filepath = os.path.join(dest, os.path.basename(path))
|
|
486
|
+
|
|
487
|
+
mkdir(os.path.dirname(dest))
|
|
488
|
+
try:
|
|
489
|
+
with open(dest + true_ext, "wb") as f:
|
|
490
|
+
f.write(self._data[path])
|
|
491
|
+
except KeyError:
|
|
492
|
+
return False
|
|
493
|
+
|
|
494
|
+
return True
|
|
495
|
+
|
|
455
496
|
def head(self, file_path):
|
|
456
497
|
raise NotImplementedError()
|
|
457
498
|
|
|
@@ -520,11 +561,22 @@ class MemoryInterface(StorageInterface):
|
|
|
520
561
|
if len(remove) and remove[-1] != '/':
|
|
521
562
|
remove += '/'
|
|
522
563
|
|
|
523
|
-
filenames =
|
|
524
|
-
filenames =
|
|
564
|
+
filenames = ( f.removeprefix(remove) for f in self._data )
|
|
565
|
+
filenames = ( f for f in filenames if f[:len(prefix)] == prefix )
|
|
525
566
|
|
|
526
567
|
if flat:
|
|
527
|
-
|
|
568
|
+
tmp = []
|
|
569
|
+
for f in filenames:
|
|
570
|
+
elems = f.removeprefix(prefix).split('/')
|
|
571
|
+
if len(elems) > 1 and elems[0] == '':
|
|
572
|
+
elems.pop(0)
|
|
573
|
+
elems[0] = f'/{elems[0]}'
|
|
574
|
+
|
|
575
|
+
if len(elems) > 1:
|
|
576
|
+
tmp.append(f"{prefix}{elems[0]}/")
|
|
577
|
+
else:
|
|
578
|
+
tmp.append(f"{prefix}{elems[0]}")
|
|
579
|
+
filenames = tmp
|
|
528
580
|
|
|
529
581
|
def stripext(fname):
|
|
530
582
|
(base, ext) = os.path.splitext(fname)
|
|
@@ -624,6 +676,25 @@ class GoogleCloudStorageInterface(StorageInterface):
|
|
|
624
676
|
|
|
625
677
|
return (content, blob.content_encoding, hash_value, hash_type)
|
|
626
678
|
|
|
679
|
+
@retry
|
|
680
|
+
def save_file(self, src, dest, resumable):
|
|
681
|
+
key = self.get_path_to_file(src)
|
|
682
|
+
blob = self._bucket.blob(key)
|
|
683
|
+
try:
|
|
684
|
+
blob.download_to_filename(
|
|
685
|
+
filename=dest,
|
|
686
|
+
raw_download=True,
|
|
687
|
+
checksum=None
|
|
688
|
+
)
|
|
689
|
+
except google.cloud.exceptions.NotFound:
|
|
690
|
+
return False
|
|
691
|
+
|
|
692
|
+
ext = FileInterface.get_extension(blob.content_encoding)
|
|
693
|
+
if not dest.endswith(ext):
|
|
694
|
+
os.rename(dest, dest + ext)
|
|
695
|
+
|
|
696
|
+
return True
|
|
697
|
+
|
|
627
698
|
@retry_if_not(google.cloud.exceptions.NotFound)
|
|
628
699
|
def head(self, file_path):
|
|
629
700
|
key = self.get_path_to_file(file_path)
|
|
@@ -711,13 +782,24 @@ class GoogleCloudStorageInterface(StorageInterface):
|
|
|
711
782
|
path = posixpath.join(layer_path, prefix)
|
|
712
783
|
|
|
713
784
|
delimiter = '/' if flat else None
|
|
714
|
-
|
|
715
|
-
|
|
785
|
+
blobs = self._bucket.list_blobs(
|
|
786
|
+
prefix=path,
|
|
787
|
+
delimiter=delimiter,
|
|
788
|
+
)
|
|
789
|
+
|
|
790
|
+
if blobs.prefixes:
|
|
791
|
+
yield from (
|
|
792
|
+
item.removeprefix(path)
|
|
793
|
+
for item in blobs.prefixes
|
|
794
|
+
)
|
|
795
|
+
|
|
796
|
+
for blob in blobs:
|
|
797
|
+
filename = blob.name.removeprefix(layer_path)
|
|
716
798
|
if not filename:
|
|
717
799
|
continue
|
|
718
800
|
elif not flat and filename[-1] != '/':
|
|
719
801
|
yield filename
|
|
720
|
-
elif flat and '/' not in blob.name.
|
|
802
|
+
elif flat and '/' not in blob.name.removeprefix(path):
|
|
721
803
|
yield filename
|
|
722
804
|
|
|
723
805
|
def release_connection(self):
|
|
@@ -803,6 +885,43 @@ class HttpInterface(StorageInterface):
|
|
|
803
885
|
|
|
804
886
|
return (resp.content, content_encoding, None, None)
|
|
805
887
|
|
|
888
|
+
@retry
|
|
889
|
+
def save_file(self, src, dest, resumable):
|
|
890
|
+
key = self.get_path_to_file(src)
|
|
891
|
+
|
|
892
|
+
headers = self.head(src)
|
|
893
|
+
content_encoding = headers.get('Content-Encoding', None)
|
|
894
|
+
|
|
895
|
+
try:
|
|
896
|
+
ext = FileInterface.get_extension(content_encoding)
|
|
897
|
+
except ValueError:
|
|
898
|
+
ext = ""
|
|
899
|
+
|
|
900
|
+
fulldest = dest + ext
|
|
901
|
+
|
|
902
|
+
partname = fulldest
|
|
903
|
+
if resumable:
|
|
904
|
+
partname += ".part"
|
|
905
|
+
|
|
906
|
+
downloaded_size = 0
|
|
907
|
+
if resumable and os.path.exists(partname):
|
|
908
|
+
downloaded_size = os.path.getsize(partname)
|
|
909
|
+
|
|
910
|
+
range_headers = { "Range": f"bytes={downloaded_size}-" }
|
|
911
|
+
with self.session.get(key, headers=range_headers, stream=True) as resp:
|
|
912
|
+
if resp.status_code not in [200, 206]:
|
|
913
|
+
resp.raise_for_status()
|
|
914
|
+
return False
|
|
915
|
+
|
|
916
|
+
with open(partname, 'ab') as f:
|
|
917
|
+
for chunk in resp.iter_content(chunk_size=int(10e6)):
|
|
918
|
+
f.write(chunk)
|
|
919
|
+
|
|
920
|
+
if resumable:
|
|
921
|
+
os.rename(partname, fulldest)
|
|
922
|
+
|
|
923
|
+
return True
|
|
924
|
+
|
|
806
925
|
@retry
|
|
807
926
|
def exists(self, file_path):
|
|
808
927
|
key = self.get_path_to_file(file_path)
|
|
@@ -821,29 +940,49 @@ class HttpInterface(StorageInterface):
|
|
|
821
940
|
)
|
|
822
941
|
if prefix and prefix[0] == '/':
|
|
823
942
|
prefix = prefix[1:]
|
|
824
|
-
if prefix and prefix[-1] != '/':
|
|
825
|
-
prefix += '/'
|
|
826
943
|
|
|
827
944
|
headers = self.default_headers()
|
|
828
945
|
|
|
829
|
-
@
|
|
946
|
+
@retry_if_not(AuthorizationError)
|
|
830
947
|
def request(token):
|
|
831
948
|
nonlocal headers
|
|
949
|
+
params = {}
|
|
950
|
+
if prefix:
|
|
951
|
+
params["prefix"] = prefix
|
|
952
|
+
if token is not None:
|
|
953
|
+
params["pageToken"] = token
|
|
954
|
+
if flat:
|
|
955
|
+
params["delimiter"] = '/'
|
|
956
|
+
|
|
832
957
|
results = self.session.get(
|
|
833
958
|
f"https://storage.googleapis.com/storage/v1/b/{bucket}/o",
|
|
834
|
-
params=
|
|
959
|
+
params=params,
|
|
835
960
|
headers=headers,
|
|
836
961
|
)
|
|
962
|
+
if results.status_code in [401,403]:
|
|
963
|
+
raise AuthorizationError(f"http {results.status_code}")
|
|
964
|
+
|
|
837
965
|
results.raise_for_status()
|
|
838
966
|
results.close()
|
|
839
967
|
return results.json()
|
|
840
968
|
|
|
969
|
+
strip = posixpath.dirname(prefix)
|
|
970
|
+
if strip and strip[-1] != '/':
|
|
971
|
+
strip += '/'
|
|
972
|
+
|
|
841
973
|
token = None
|
|
842
974
|
while True:
|
|
843
975
|
results = request(token)
|
|
844
976
|
|
|
845
|
-
|
|
846
|
-
yield
|
|
977
|
+
if 'prefixes' in results:
|
|
978
|
+
yield from (
|
|
979
|
+
item.removeprefix(strip)
|
|
980
|
+
for item in results["prefixes"]
|
|
981
|
+
)
|
|
982
|
+
|
|
983
|
+
for res in results.get("items", []):
|
|
984
|
+
print(res["name"])
|
|
985
|
+
yield res["name"].removeprefix(strip)
|
|
847
986
|
|
|
848
987
|
token = results.get("nextPageToken", None)
|
|
849
988
|
if token is None:
|
|
@@ -895,13 +1034,15 @@ class HttpInterface(StorageInterface):
|
|
|
895
1034
|
def list_files(self, prefix, flat=False):
|
|
896
1035
|
if self._path.host == "https://storage.googleapis.com":
|
|
897
1036
|
yield from self._list_files_google(prefix, flat)
|
|
898
|
-
|
|
1037
|
+
return
|
|
1038
|
+
|
|
899
1039
|
url = posixpath.join(self._path.host, self._path.path, prefix)
|
|
900
1040
|
resp = requests.head(url)
|
|
901
1041
|
|
|
902
1042
|
server = resp.headers.get("Server", "").lower()
|
|
903
1043
|
if 'apache' in server:
|
|
904
1044
|
yield from self._list_files_apache(prefix, flat)
|
|
1045
|
+
return
|
|
905
1046
|
else:
|
|
906
1047
|
raise NotImplementedError()
|
|
907
1048
|
|
|
@@ -1046,6 +1187,11 @@ class S3Interface(StorageInterface):
|
|
|
1046
1187
|
if 'ContentEncoding' in resp:
|
|
1047
1188
|
encoding = resp['ContentEncoding']
|
|
1048
1189
|
|
|
1190
|
+
encoding = ",".join([
|
|
1191
|
+
enc for enc in encoding.split(",")
|
|
1192
|
+
if enc != "aws-chunked"
|
|
1193
|
+
])
|
|
1194
|
+
|
|
1049
1195
|
# s3 etags return hex digests but we need the base64 encoding
|
|
1050
1196
|
# to make uniform comparisons.
|
|
1051
1197
|
# example s3 etag: "31ee76261d87fed8cb9d4c465c48158c"
|
|
@@ -1073,6 +1219,42 @@ class S3Interface(StorageInterface):
|
|
|
1073
1219
|
else:
|
|
1074
1220
|
raise
|
|
1075
1221
|
|
|
1222
|
+
@retry
|
|
1223
|
+
def save_file(self, src, dest, resumable):
|
|
1224
|
+
key = self.get_path_to_file(src)
|
|
1225
|
+
kwargs = self._additional_attrs.copy()
|
|
1226
|
+
|
|
1227
|
+
resp = self.head(src)
|
|
1228
|
+
|
|
1229
|
+
if resp is None:
|
|
1230
|
+
return False
|
|
1231
|
+
|
|
1232
|
+
mkdir(os.path.dirname(dest))
|
|
1233
|
+
|
|
1234
|
+
encoding = ",".join([
|
|
1235
|
+
enc for enc in resp.get("Content-Encoding", "").split(",")
|
|
1236
|
+
if enc != "aws-chunked"
|
|
1237
|
+
])
|
|
1238
|
+
ext = FileInterface.get_extension(encoding)
|
|
1239
|
+
|
|
1240
|
+
if not dest.endswith(ext):
|
|
1241
|
+
dest += ext
|
|
1242
|
+
|
|
1243
|
+
try:
|
|
1244
|
+
self._conn.download_file(
|
|
1245
|
+
Bucket=self._path.bucket,
|
|
1246
|
+
Key=key,
|
|
1247
|
+
Filename=dest,
|
|
1248
|
+
**kwargs
|
|
1249
|
+
)
|
|
1250
|
+
except botocore.exceptions.ClientError as err:
|
|
1251
|
+
if err.response['Error']['Code'] in ('NoSuchKey', '404'):
|
|
1252
|
+
return False
|
|
1253
|
+
else:
|
|
1254
|
+
raise
|
|
1255
|
+
|
|
1256
|
+
return True
|
|
1257
|
+
|
|
1076
1258
|
@retry
|
|
1077
1259
|
def head(self, file_path):
|
|
1078
1260
|
try:
|
|
@@ -1179,7 +1361,7 @@ class S3Interface(StorageInterface):
|
|
|
1179
1361
|
path = posixpath.join(layer_path, prefix)
|
|
1180
1362
|
|
|
1181
1363
|
@retry
|
|
1182
|
-
def s3lst(continuation_token=None):
|
|
1364
|
+
def s3lst(path, continuation_token=None):
|
|
1183
1365
|
kwargs = {
|
|
1184
1366
|
'Bucket': self._path.bucket,
|
|
1185
1367
|
'Prefix': path,
|
|
@@ -1193,27 +1375,44 @@ class S3Interface(StorageInterface):
|
|
|
1193
1375
|
|
|
1194
1376
|
return self._conn.list_objects_v2(**kwargs)
|
|
1195
1377
|
|
|
1196
|
-
resp = s3lst()
|
|
1378
|
+
resp = s3lst(path)
|
|
1379
|
+
# the case where the prefix is something like "build", but "build" is a subdirectory
|
|
1380
|
+
# so requery with "build/" to get the proper behavior
|
|
1381
|
+
if (
|
|
1382
|
+
flat
|
|
1383
|
+
and path
|
|
1384
|
+
and path[-1] != '/'
|
|
1385
|
+
and 'Contents' not in resp
|
|
1386
|
+
and len(resp.get("CommonPrefixes", [])) == 1
|
|
1387
|
+
):
|
|
1388
|
+
path += '/'
|
|
1389
|
+
resp = s3lst(path)
|
|
1197
1390
|
|
|
1198
1391
|
def iterate(resp):
|
|
1392
|
+
if 'CommonPrefixes' in resp.keys():
|
|
1393
|
+
yield from [
|
|
1394
|
+
item["Prefix"].removeprefix(layer_path)
|
|
1395
|
+
for item in resp['CommonPrefixes']
|
|
1396
|
+
]
|
|
1397
|
+
|
|
1199
1398
|
if 'Contents' not in resp.keys():
|
|
1200
1399
|
resp['Contents'] = []
|
|
1201
1400
|
|
|
1202
1401
|
for item in resp['Contents']:
|
|
1203
1402
|
key = item['Key']
|
|
1204
|
-
filename = key.
|
|
1403
|
+
filename = key.removeprefix(layer_path)
|
|
1205
1404
|
if filename == '':
|
|
1206
1405
|
continue
|
|
1207
1406
|
elif not flat and filename[-1] != '/':
|
|
1208
1407
|
yield filename
|
|
1209
|
-
elif flat and '/' not in key.
|
|
1408
|
+
elif flat and '/' not in key.removeprefix(path):
|
|
1210
1409
|
yield filename
|
|
1211
1410
|
|
|
1212
1411
|
for filename in iterate(resp):
|
|
1213
1412
|
yield filename
|
|
1214
1413
|
|
|
1215
1414
|
while resp['IsTruncated'] and resp['NextContinuationToken']:
|
|
1216
|
-
resp = s3lst(resp['NextContinuationToken'])
|
|
1415
|
+
resp = s3lst(path, resp['NextContinuationToken'])
|
|
1217
1416
|
|
|
1218
1417
|
for filename in iterate(resp):
|
|
1219
1418
|
yield filename
|
|
@@ -83,14 +83,19 @@ def license():
|
|
|
83
83
|
print(f.read())
|
|
84
84
|
|
|
85
85
|
@main.command()
|
|
86
|
-
@click.option('--shortpath', is_flag=True, default=False, help='Don\'t print the common base path for each listed path.')
|
|
87
|
-
@click.option('--flat', is_flag=True, default=False, help='Only produce a single level of directory hierarchy.')
|
|
88
|
-
@click.option('-e','--expr',is_flag=True, default=False, help='Use a limited regexp language (e.g. [abc123]\{3\}) to generate prefixes.')
|
|
86
|
+
@click.option('--shortpath', is_flag=True, default=False, help='Don\'t print the common base path for each listed path.',show_default=True)
|
|
87
|
+
@click.option('--flat', is_flag=True, default=False, help='Only produce a single level of directory hierarchy.',show_default=True)
|
|
88
|
+
@click.option('-e','--expr',is_flag=True, default=False, help='Use a limited regexp language (e.g. [abc123]\{3\}) to generate prefixes.', show_default=True)
|
|
89
|
+
@click.option('--no-auth',is_flag=True, default=False, help='Uses the http API for read-only operations.', show_default=True)
|
|
89
90
|
@click.argument("cloudpath")
|
|
90
|
-
def ls(shortpath, flat, expr, cloudpath):
|
|
91
|
+
def ls(shortpath, flat, expr, cloudpath, no_auth):
|
|
91
92
|
"""Recursively lists the contents of a directory."""
|
|
92
93
|
cloudpath = normalize_path(cloudpath)
|
|
93
94
|
|
|
95
|
+
no_sign_request = no_auth # only affects s3
|
|
96
|
+
if no_auth and 's3://' not in cloudpath:
|
|
97
|
+
cloudpath = cloudfiles.paths.to_https_protocol(cloudpath)
|
|
98
|
+
|
|
94
99
|
_, flt, prefix = get_mfp(cloudpath, True)
|
|
95
100
|
epath = extract(cloudpath)
|
|
96
101
|
if len(epath.path) > 0:
|
|
@@ -100,7 +105,7 @@ def ls(shortpath, flat, expr, cloudpath):
|
|
|
100
105
|
|
|
101
106
|
flat = flat or flt
|
|
102
107
|
|
|
103
|
-
cf = CloudFiles(cloudpath)
|
|
108
|
+
cf = CloudFiles(cloudpath, no_sign_request=no_sign_request)
|
|
104
109
|
iterables = []
|
|
105
110
|
if expr:
|
|
106
111
|
# TODO: make this a reality using a parser
|
|
@@ -173,11 +178,13 @@ def get_mfp(path, recursive):
|
|
|
173
178
|
@click.option('-b', '--block-size', default=128, help="Number of files to download at a time.", show_default=True)
|
|
174
179
|
@click.option('--part-bytes', default=int(1e8), help="Composite upload threshold in bytes. Splits a file into pieces for some cloud services like gs and s3.", show_default=True)
|
|
175
180
|
@click.option('--no-sign-request', is_flag=True, default=False, help="Use s3 in anonymous mode (don't sign requests) for the source.", show_default=True)
|
|
181
|
+
@click.option('--resumable', is_flag=True, default=False, help="http->file transfers will dowload to .part files while they are in progress.", show_default=True)
|
|
176
182
|
@click.pass_context
|
|
177
183
|
def cp(
|
|
178
184
|
ctx, source, destination,
|
|
179
185
|
recursive, compression, progress,
|
|
180
186
|
block_size, part_bytes, no_sign_request,
|
|
187
|
+
resumable,
|
|
181
188
|
):
|
|
182
189
|
"""
|
|
183
190
|
Copy one or more files from a source to destination.
|
|
@@ -194,13 +201,15 @@ def cp(
|
|
|
194
201
|
_cp_single(
|
|
195
202
|
ctx, src, destination, recursive,
|
|
196
203
|
compression, progress, block_size,
|
|
197
|
-
part_bytes, no_sign_request
|
|
204
|
+
part_bytes, no_sign_request,
|
|
205
|
+
resumable,
|
|
198
206
|
)
|
|
199
207
|
|
|
200
208
|
def _cp_single(
|
|
201
209
|
ctx, source, destination, recursive,
|
|
202
210
|
compression, progress, block_size,
|
|
203
|
-
part_bytes, no_sign_request
|
|
211
|
+
part_bytes, no_sign_request,
|
|
212
|
+
resumable,
|
|
204
213
|
):
|
|
205
214
|
use_stdin = (source == '-')
|
|
206
215
|
use_stdout = (destination == '-')
|
|
@@ -210,8 +219,8 @@ def _cp_single(
|
|
|
210
219
|
|
|
211
220
|
nsrc = normalize_path(source)
|
|
212
221
|
ndest = normalize_path(destination)
|
|
213
|
-
|
|
214
|
-
issrcdir = (ispathdir(source) or CloudFiles(nsrc).isdir())
|
|
222
|
+
|
|
223
|
+
issrcdir = (use_stdin == False) and (ispathdir(source) or CloudFiles(nsrc).isdir())
|
|
215
224
|
isdestdir = (ispathdir(destination) or CloudFiles(ndest).isdir())
|
|
216
225
|
|
|
217
226
|
recursive = recursive and issrcdir
|
|
@@ -267,7 +276,11 @@ def _cp_single(
|
|
|
267
276
|
|
|
268
277
|
if not isinstance(xferpaths, str):
|
|
269
278
|
if parallel == 1:
|
|
270
|
-
_cp(
|
|
279
|
+
_cp(
|
|
280
|
+
srcpath, destpath, compression,
|
|
281
|
+
progress, block_size, part_bytes,
|
|
282
|
+
no_sign_request, resumable, xferpaths
|
|
283
|
+
)
|
|
271
284
|
return
|
|
272
285
|
|
|
273
286
|
total = None
|
|
@@ -277,9 +290,12 @@ def _cp_single(
|
|
|
277
290
|
pass
|
|
278
291
|
|
|
279
292
|
if use_stdout:
|
|
280
|
-
fn = partial(_cp_stdout,
|
|
293
|
+
fn = partial(_cp_stdout, srcpath, no_sign_request)
|
|
281
294
|
else:
|
|
282
|
-
fn = partial(
|
|
295
|
+
fn = partial(
|
|
296
|
+
_cp, srcpath, destpath, compression, False,
|
|
297
|
+
block_size, part_bytes, no_sign_request, resumable
|
|
298
|
+
)
|
|
283
299
|
|
|
284
300
|
with tqdm(desc="Transferring", total=total, disable=(not progress)) as pbar:
|
|
285
301
|
with pathos.pools.ProcessPool(parallel) as executor:
|
|
@@ -309,14 +325,20 @@ def _cp_single(
|
|
|
309
325
|
cfsrc.transfer_to(cfdest, paths=[{
|
|
310
326
|
"path": xferpaths,
|
|
311
327
|
"dest_path": new_path,
|
|
312
|
-
}], reencode=compression)
|
|
328
|
+
}], reencode=compression, resumable=resumable)
|
|
313
329
|
|
|
314
|
-
def _cp(
|
|
330
|
+
def _cp(
|
|
331
|
+
src, dst, compression, progress,
|
|
332
|
+
block_size, part_bytes,
|
|
333
|
+
no_sign_request, resumable,
|
|
334
|
+
paths
|
|
335
|
+
):
|
|
315
336
|
cfsrc = CloudFiles(src, progress=progress, composite_upload_threshold=part_bytes, no_sign_request=no_sign_request)
|
|
316
337
|
cfdest = CloudFiles(dst, progress=progress, composite_upload_threshold=part_bytes)
|
|
317
338
|
cfsrc.transfer_to(
|
|
318
339
|
cfdest, paths=paths,
|
|
319
|
-
reencode=compression, block_size=block_size
|
|
340
|
+
reencode=compression, block_size=block_size,
|
|
341
|
+
resumable=resumable,
|
|
320
342
|
)
|
|
321
343
|
|
|
322
344
|
def _cp_stdout(src, no_sign_request, paths):
|
|
@@ -13,11 +13,11 @@ classifier =
|
|
|
13
13
|
Development Status :: 4 - Beta
|
|
14
14
|
License :: OSI Approved :: BSD License
|
|
15
15
|
Programming Language :: Python :: 3
|
|
16
|
-
Programming Language :: Python :: 3.7
|
|
17
|
-
Programming Language :: Python :: 3.8
|
|
18
16
|
Programming Language :: Python :: 3.9
|
|
19
17
|
Programming Language :: Python :: 3.10
|
|
20
18
|
Programming Language :: Python :: 3.11
|
|
19
|
+
Programming Language :: Python :: 3.12
|
|
20
|
+
Programming Language :: Python :: 3.13
|
|
21
21
|
Topic :: Software Development :: Libraries :: Python Modules
|
|
22
22
|
|
|
23
23
|
[global]
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"git_version": "ce90720", "is_release": true}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|