cloud-files 4.26.0__py3-none-any.whl → 4.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cloud_files-4.26.0.dist-info → cloud_files-4.28.0.dist-info}/AUTHORS +1 -0
- {cloud_files-4.26.0.dist-info → cloud_files-4.28.0.dist-info}/METADATA +25 -15
- cloud_files-4.28.0.dist-info/RECORD +25 -0
- {cloud_files-4.26.0.dist-info → cloud_files-4.28.0.dist-info}/WHEEL +1 -1
- cloud_files-4.28.0.dist-info/pbr.json +1 -0
- cloudfiles/cloudfiles.py +179 -25
- cloudfiles/interfaces.py +45 -8
- cloudfiles/lib.py +5 -2
- cloudfiles/paths.py +45 -6
- cloudfiles/resumable_tools.py +50 -15
- cloudfiles/secrets.py +18 -0
- cloudfiles_cli/cloudfiles_cli.py +158 -5
- cloud_files-4.26.0.dist-info/RECORD +0 -26
- cloud_files-4.26.0.dist-info/pbr.json +0 -1
- cloudfiles/buckets.py +0 -10
- {cloud_files-4.26.0.dist-info → cloud_files-4.28.0.dist-info}/LICENSE +0 -0
- {cloud_files-4.26.0.dist-info → cloud_files-4.28.0.dist-info}/entry_points.txt +0 -0
- {cloud_files-4.26.0.dist-info → cloud_files-4.28.0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
Manuel Castro <macastro@princeton.edu>
|
|
2
2
|
Nico Kemnitz <nkemnitz@princeton.edu>
|
|
3
3
|
V24 <55334829+umarfarouk98@users.noreply.github.com>
|
|
4
|
+
William Silversmith <william.silvermsith@gmail.com>
|
|
4
5
|
William Silversmith <william.silversmith@gmail.com>
|
|
5
6
|
madiganz <madiganz@users.noreply.github.com>
|
|
6
7
|
ranlu <ranlu@users.noreply.github.com>
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cloud-files
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.28.0
|
|
4
4
|
Summary: Fast access to cloud storage and local FS.
|
|
5
5
|
Home-page: https://github.com/seung-lab/cloud-files/
|
|
6
6
|
Author: William Silversmith
|
|
@@ -20,33 +20,33 @@ Requires-Python: >=3.7,<4.0
|
|
|
20
20
|
Description-Content-Type: text/markdown
|
|
21
21
|
License-File: LICENSE
|
|
22
22
|
License-File: AUTHORS
|
|
23
|
-
Requires-Dist: boto3
|
|
23
|
+
Requires-Dist: boto3 >=1.4.7
|
|
24
24
|
Requires-Dist: brotli
|
|
25
25
|
Requires-Dist: crc32c
|
|
26
|
-
Requires-Dist: chardet
|
|
26
|
+
Requires-Dist: chardet >=3.0.4
|
|
27
27
|
Requires-Dist: click
|
|
28
|
-
Requires-Dist: deflate
|
|
28
|
+
Requires-Dist: deflate >=0.2.0
|
|
29
29
|
Requires-Dist: gevent
|
|
30
|
-
Requires-Dist: google-auth
|
|
31
|
-
Requires-Dist: google-cloud-core
|
|
32
|
-
Requires-Dist: google-cloud-storage
|
|
33
|
-
Requires-Dist: google-crc32c
|
|
30
|
+
Requires-Dist: google-auth >=1.10.0
|
|
31
|
+
Requires-Dist: google-cloud-core >=1.1.0
|
|
32
|
+
Requires-Dist: google-cloud-storage >=1.31.1
|
|
33
|
+
Requires-Dist: google-crc32c >=1.0.0
|
|
34
34
|
Requires-Dist: orjson
|
|
35
35
|
Requires-Dist: pathos
|
|
36
|
-
Requires-Dist: protobuf
|
|
37
|
-
Requires-Dist: requests
|
|
38
|
-
Requires-Dist: six
|
|
39
|
-
Requires-Dist: tenacity
|
|
36
|
+
Requires-Dist: protobuf >=3.3.0
|
|
37
|
+
Requires-Dist: requests >=2.22.0
|
|
38
|
+
Requires-Dist: six >=1.14.0
|
|
39
|
+
Requires-Dist: tenacity >=4.10.0
|
|
40
40
|
Requires-Dist: tqdm
|
|
41
|
-
Requires-Dist: urllib3
|
|
41
|
+
Requires-Dist: urllib3 >=1.26.3
|
|
42
42
|
Requires-Dist: zstandard
|
|
43
|
-
Requires-Dist: rsa
|
|
43
|
+
Requires-Dist: rsa >=4.7.2
|
|
44
44
|
Requires-Dist: fasteners
|
|
45
45
|
Provides-Extra: numpy
|
|
46
46
|
Requires-Dist: numpy ; extra == 'numpy'
|
|
47
47
|
Provides-Extra: test
|
|
48
48
|
Requires-Dist: pytest ; extra == 'test'
|
|
49
|
-
Requires-Dist: moto
|
|
49
|
+
Requires-Dist: moto >=5 ; extra == 'test'
|
|
50
50
|
|
|
51
51
|
[](https://badge.fury.io/py/cloud-files) [](https://github.com/seung-lab/cloud-files/actions?query=workflow%3A%22Test+Suite%22)
|
|
52
52
|
|
|
@@ -88,6 +88,12 @@ cf.delete(paths, parallel=2) # threaded + two processes
|
|
|
88
88
|
boolean = cf.exists('filename')
|
|
89
89
|
results = cf.exists([ 'filename_1', ... ]) # threaded
|
|
90
90
|
|
|
91
|
+
cf.move("a", "gs://bucket/b")
|
|
92
|
+
cf.moves("gs://bucket/", [ ("a", "b") ])
|
|
93
|
+
|
|
94
|
+
cf.touch("example")
|
|
95
|
+
cf.touch([ "example", "example2" ])
|
|
96
|
+
|
|
91
97
|
# for single files
|
|
92
98
|
cf = CloudFile("gs://bucket/file1")
|
|
93
99
|
info = cf.head()
|
|
@@ -464,6 +470,10 @@ cloudfiles -p 2 cp --progress -r s3://bkt/ gs://bkt2/
|
|
|
464
470
|
cloudfiles cp -c br s3://bkt/file.txt gs://bkt2/
|
|
465
471
|
# decompress
|
|
466
472
|
cloudfiles cp -c none s3://bkt/file.txt gs://bkt2/
|
|
473
|
+
# move or rename files
|
|
474
|
+
cloudfiles mv s3://bkt/file.txt gs://bkt2/
|
|
475
|
+
# create an empty file if not existing
|
|
476
|
+
cloudfiles touch s3://bkt/empty.txt
|
|
467
477
|
# pass from stdin (use "-" for source argument)
|
|
468
478
|
find some_dir | cloudfiles cp - s3://bkt/
|
|
469
479
|
# resumable transfers
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
cloudfiles/__init__.py,sha256=pLB4CcV2l3Jgv_ni1520Np1pfzFj8Cpr87vNxFT3rNI,493
|
|
2
|
+
cloudfiles/cloudfiles.py,sha256=KcHgVjLjPcOsgXVTr3edFFWcuz53xcOtWpxznkiAMos,48989
|
|
3
|
+
cloudfiles/compression.py,sha256=pqYdpu5vfFv-094BpfZ2pgRjVu7ESM9pAZC09P6E8bY,6150
|
|
4
|
+
cloudfiles/connectionpools.py,sha256=aL8RiSjRepECfgAFmJcz80aJFKbou7hsbuEgugDKwB8,4814
|
|
5
|
+
cloudfiles/exceptions.py,sha256=H2IcMlZoy2Bsn-6wCPwyLDjg66LZCyxtcf3s_p21FDw,770
|
|
6
|
+
cloudfiles/gcs.py,sha256=_njJ7TpqwrHCjPHRGkBN5alCrCWKM2m9qdy5DhxMZ7U,3718
|
|
7
|
+
cloudfiles/interfaces.py,sha256=lD5hUNTJDkxSnIVRG6my5exEDN72Cqt3VwPfHmYaNDo,37074
|
|
8
|
+
cloudfiles/lib.py,sha256=YOoaEkKtkXc9FdpNnC4FbZJVG1ujbyoxN07WKdUOJcs,5200
|
|
9
|
+
cloudfiles/paths.py,sha256=RnZDDYGUKD6KBFYERgg46WQU8AO-aKlV9klfGcWvOQc,11399
|
|
10
|
+
cloudfiles/resumable_tools.py,sha256=NyuSoGh1SaP5akrHCpd9kgy2-JruEWrHW9lvJxV7jpE,6711
|
|
11
|
+
cloudfiles/scheduler.py,sha256=DqDANmOpB3NdzFgJDNMMibRIkCrXQqIh2XGL8GWoc9c,3668
|
|
12
|
+
cloudfiles/secrets.py,sha256=791b5a8nWSBYtlleGzKeoYIR5jl-FI1bw6INRM4Wy-0,5295
|
|
13
|
+
cloudfiles/threaded_queue.py,sha256=Nl4vfXhQ6nDLF8PZpSSBpww0M2zWtcd4DLs3W3BArBw,7082
|
|
14
|
+
cloudfiles/typing.py,sha256=f3ZYkNfN9poxhGu5j-P0KCxjCCqSn9HAg5KiIPkjnCg,416
|
|
15
|
+
cloudfiles_cli/LICENSE,sha256=Jna4xYE8CCQmaxjr5Fs-wmUBnIQJ1DGcNn9MMjbkprk,1538
|
|
16
|
+
cloudfiles_cli/__init__.py,sha256=Wftt3R3F21QsHtWqx49ODuqT9zcSr0em7wk48kcH0WM,29
|
|
17
|
+
cloudfiles_cli/cloudfiles_cli.py,sha256=HGlX8oyIL7XASl57KXMlVQunF7pA_MVbMq-lpPA90LY,33911
|
|
18
|
+
cloud_files-4.28.0.dist-info/AUTHORS,sha256=BFVmobgAhaVFI5fqbuqAY5XmBQxe09ZZAsAOTy87hKQ,318
|
|
19
|
+
cloud_files-4.28.0.dist-info/LICENSE,sha256=Jna4xYE8CCQmaxjr5Fs-wmUBnIQJ1DGcNn9MMjbkprk,1538
|
|
20
|
+
cloud_files-4.28.0.dist-info/METADATA,sha256=gY-SuRG7iU8PM4ckUSGrZyuhGiOkCt6qQ4bsLhYknBY,27046
|
|
21
|
+
cloud_files-4.28.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
22
|
+
cloud_files-4.28.0.dist-info/entry_points.txt,sha256=xlirb1FVhn1mbcv4IoyMEGumDqKOA4VMVd3drsRQxIg,51
|
|
23
|
+
cloud_files-4.28.0.dist-info/pbr.json,sha256=nMag4w8eL7zh1OBHgElm2bR8KdiVNL-xEh79OlA1LPI,46
|
|
24
|
+
cloud_files-4.28.0.dist-info/top_level.txt,sha256=xPyrST3okJbsmdCF5IC2gYAVxg_aD5AYVTnNo8UuoZU,26
|
|
25
|
+
cloud_files-4.28.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"git_version": "e9510b0", "is_release": true}
|
cloudfiles/cloudfiles.py
CHANGED
|
@@ -2,7 +2,7 @@ from typing import (
|
|
|
2
2
|
Any, Dict, Optional,
|
|
3
3
|
Union, List, Tuple,
|
|
4
4
|
Callable, Generator,
|
|
5
|
-
|
|
5
|
+
Sequence, cast, BinaryIO
|
|
6
6
|
)
|
|
7
7
|
|
|
8
8
|
from queue import Queue
|
|
@@ -29,10 +29,10 @@ from . import compression, paths, gcs
|
|
|
29
29
|
from .exceptions import UnsupportedProtocolError, MD5IntegrityError, CRC32CIntegrityError
|
|
30
30
|
from .lib import (
|
|
31
31
|
mkdir, totalfn, toiter, scatter, jsonify, nvl,
|
|
32
|
-
duplicates, first, sip,
|
|
32
|
+
duplicates, first, sip, touch,
|
|
33
33
|
md5, crc32c, decode_crc32c_b64
|
|
34
34
|
)
|
|
35
|
-
from .paths import ALIASES
|
|
35
|
+
from .paths import ALIASES, find_common_buckets
|
|
36
36
|
from .secrets import CLOUD_FILES_DIR, CLOUD_FILES_LOCK_DIR
|
|
37
37
|
from .threaded_queue import ThreadedQueue, DEFAULT_THREADS
|
|
38
38
|
from .typing import (
|
|
@@ -44,7 +44,7 @@ from .scheduler import schedule_jobs
|
|
|
44
44
|
from .interfaces import (
|
|
45
45
|
FileInterface, HttpInterface,
|
|
46
46
|
S3Interface, GoogleCloudStorageInterface,
|
|
47
|
-
MemoryInterface
|
|
47
|
+
MemoryInterface, CaveInterface,
|
|
48
48
|
)
|
|
49
49
|
|
|
50
50
|
INTERFACES = {
|
|
@@ -54,6 +54,7 @@ INTERFACES = {
|
|
|
54
54
|
'http': HttpInterface,
|
|
55
55
|
'https': HttpInterface,
|
|
56
56
|
'mem': MemoryInterface,
|
|
57
|
+
'middleauth+https': CaveInterface,
|
|
57
58
|
}
|
|
58
59
|
for alias in ALIASES:
|
|
59
60
|
INTERFACES[alias] = S3Interface
|
|
@@ -181,7 +182,7 @@ def path_to_byte_range_tags(path):
|
|
|
181
182
|
if isinstance(path, str):
|
|
182
183
|
return (path, None, None, None)
|
|
183
184
|
return (path['path'], path.get('start', None), path.get('end', None), path.get('tags', None))
|
|
184
|
-
|
|
185
|
+
|
|
185
186
|
def dl(
|
|
186
187
|
cloudpaths:GetPathType, raw:bool=False, **kwargs
|
|
187
188
|
) -> Union[bytes,List[dict]]:
|
|
@@ -192,23 +193,8 @@ def dl(
|
|
|
192
193
|
dict.
|
|
193
194
|
"""
|
|
194
195
|
cloudpaths, is_multiple = toiter(cloudpaths, is_iter=True)
|
|
195
|
-
clustered =
|
|
196
|
-
total =
|
|
197
|
-
for path in cloudpaths:
|
|
198
|
-
pth = path
|
|
199
|
-
byte_range = None
|
|
200
|
-
if isinstance(path, dict):
|
|
201
|
-
pth = path["path"]
|
|
202
|
-
byte_range = path["byte_range"]
|
|
203
|
-
|
|
204
|
-
epath = paths.extract(pth)
|
|
205
|
-
bucketpath = paths.asbucketpath(epath)
|
|
206
|
-
clustered[bucketpath].append({
|
|
207
|
-
"path": epath.path,
|
|
208
|
-
"start": (byte_range[0] if byte_range else None), # type: ignore
|
|
209
|
-
"end": (byte_range[1] if byte_range else None), # type: ignore
|
|
210
|
-
})
|
|
211
|
-
total += 1
|
|
196
|
+
clustered = find_common_buckets(cloudpaths)
|
|
197
|
+
total = sum([ len(bucket) for bucket in clustered.values() ])
|
|
212
198
|
|
|
213
199
|
progress = kwargs.get("progress", False) and total > 1
|
|
214
200
|
pbar = tqdm(total=total, desc="Downloading", disable=(not progress))
|
|
@@ -918,6 +904,60 @@ class CloudFiles:
|
|
|
918
904
|
)
|
|
919
905
|
return len(results)
|
|
920
906
|
|
|
907
|
+
def touch(
|
|
908
|
+
self,
|
|
909
|
+
paths:GetPathType,
|
|
910
|
+
progress:Optional[bool] = None,
|
|
911
|
+
total:Optional[int] = None,
|
|
912
|
+
nocopy:bool = False,
|
|
913
|
+
):
|
|
914
|
+
"""
|
|
915
|
+
Create a zero byte file if it doesn't exist.
|
|
916
|
+
"""
|
|
917
|
+
paths = toiter(paths)
|
|
918
|
+
progress = nvl(progress, self.progress)
|
|
919
|
+
total = totalfn(paths, total)
|
|
920
|
+
|
|
921
|
+
if self.protocol == "file":
|
|
922
|
+
basepath = self.cloudpath.replace("file://", "")
|
|
923
|
+
for path in tqdm(paths, disable=(not progress), total=total):
|
|
924
|
+
pth = path
|
|
925
|
+
if isinstance(path, dict):
|
|
926
|
+
pth = path["path"]
|
|
927
|
+
touch(self.join(basepath, pth))
|
|
928
|
+
return
|
|
929
|
+
|
|
930
|
+
results = self.exists(paths, total=total, progress=progress)
|
|
931
|
+
|
|
932
|
+
dne = [
|
|
933
|
+
(fname, b'')
|
|
934
|
+
for fname, exists in results.items()
|
|
935
|
+
if not exists
|
|
936
|
+
]
|
|
937
|
+
|
|
938
|
+
self.puts(dne, progress=progress)
|
|
939
|
+
|
|
940
|
+
# def thunk_copy(path):
|
|
941
|
+
# with self._get_connection() as conn:
|
|
942
|
+
# conn.copy_file(path, self._path.bucket, self.join(self._path.path, path))
|
|
943
|
+
# return 1
|
|
944
|
+
|
|
945
|
+
# if not nocopy:
|
|
946
|
+
# already_exists = (
|
|
947
|
+
# fname
|
|
948
|
+
# for fname, exists in results.items()
|
|
949
|
+
# if exists
|
|
950
|
+
# )
|
|
951
|
+
|
|
952
|
+
# results = schedule_jobs(
|
|
953
|
+
# fns=( partial(thunk_copy, path) for path in already_exists ),
|
|
954
|
+
# progress=progress,
|
|
955
|
+
# total=(total - len(dne)),
|
|
956
|
+
# concurrency=self.num_threads,
|
|
957
|
+
# green=self.green,
|
|
958
|
+
# count_return=True,
|
|
959
|
+
# )
|
|
960
|
+
|
|
921
961
|
def list(
|
|
922
962
|
self, prefix:str = "", flat:bool = False
|
|
923
963
|
) -> Generator[str,None,None]:
|
|
@@ -952,6 +992,7 @@ class CloudFiles:
|
|
|
952
992
|
reencode:Optional[str] = None,
|
|
953
993
|
content_type:Optional[str] = None,
|
|
954
994
|
allow_missing:bool = False,
|
|
995
|
+
progress:Optional[bool] = None,
|
|
955
996
|
) -> None:
|
|
956
997
|
"""
|
|
957
998
|
Transfer all files from this CloudFiles storage
|
|
@@ -968,7 +1009,7 @@ class CloudFiles:
|
|
|
968
1009
|
- gs->gs: Uses GCS copy API to minimize data movement
|
|
969
1010
|
- s3->s3: Uses boto s3 copy API to minimize data movement
|
|
970
1011
|
|
|
971
|
-
|
|
1012
|
+
cf_dest: another CloudFiles instance or cloudpath
|
|
972
1013
|
paths: if None transfer all files from src, else if
|
|
973
1014
|
an iterable, transfer only these files.
|
|
974
1015
|
|
|
@@ -996,7 +1037,8 @@ class CloudFiles:
|
|
|
996
1037
|
return cf_dest.transfer_from(
|
|
997
1038
|
self, paths, block_size,
|
|
998
1039
|
reencode, content_type,
|
|
999
|
-
allow_missing,
|
|
1040
|
+
allow_missing,
|
|
1041
|
+
progress,
|
|
1000
1042
|
)
|
|
1001
1043
|
|
|
1002
1044
|
def transfer_from(
|
|
@@ -1007,6 +1049,7 @@ class CloudFiles:
|
|
|
1007
1049
|
reencode:Optional[str] = None,
|
|
1008
1050
|
content_type:Optional[str] = None,
|
|
1009
1051
|
allow_missing:bool = False,
|
|
1052
|
+
progress:Optional[bool] = None,
|
|
1010
1053
|
) -> None:
|
|
1011
1054
|
"""
|
|
1012
1055
|
Transfer all files from the source CloudFiles storage
|
|
@@ -1053,7 +1096,15 @@ class CloudFiles:
|
|
|
1053
1096
|
|
|
1054
1097
|
total = totalfn(paths, None)
|
|
1055
1098
|
|
|
1056
|
-
|
|
1099
|
+
disable = progress
|
|
1100
|
+
if disable is None:
|
|
1101
|
+
disable = self.progress
|
|
1102
|
+
if disable is None:
|
|
1103
|
+
disable = False
|
|
1104
|
+
else:
|
|
1105
|
+
disable = not disable
|
|
1106
|
+
|
|
1107
|
+
with tqdm(desc="Transferring", total=total, disable=disable) as pbar:
|
|
1057
1108
|
if (
|
|
1058
1109
|
cf_src.protocol == "file"
|
|
1059
1110
|
and self.protocol == "file"
|
|
@@ -1210,6 +1261,9 @@ class CloudFiles:
|
|
|
1210
1261
|
else:
|
|
1211
1262
|
raise
|
|
1212
1263
|
|
|
1264
|
+
if dest_path == '':
|
|
1265
|
+
dest_path = src_path
|
|
1266
|
+
|
|
1213
1267
|
to_upload.append({
|
|
1214
1268
|
"path": dest_path,
|
|
1215
1269
|
"content": handle,
|
|
@@ -1261,6 +1315,99 @@ class CloudFiles:
|
|
|
1261
1315
|
)
|
|
1262
1316
|
return len(results)
|
|
1263
1317
|
|
|
1318
|
+
def move(self, src:str, dest:str):
|
|
1319
|
+
"""Move (rename) src to dest.
|
|
1320
|
+
|
|
1321
|
+
src and dest do not have to be on the same filesystem.
|
|
1322
|
+
"""
|
|
1323
|
+
epath = paths.extract(dest)
|
|
1324
|
+
full_cloudpath = paths.asprotocolpath(epath)
|
|
1325
|
+
dest_cloudpath = paths.dirname(full_cloudpath)
|
|
1326
|
+
base_dest = paths.basename(full_cloudpath)
|
|
1327
|
+
|
|
1328
|
+
return self.moves(dest_cloudpath, [
|
|
1329
|
+
(src, base_dest)
|
|
1330
|
+
], block_size=1, progress=False)
|
|
1331
|
+
|
|
1332
|
+
def moves(
|
|
1333
|
+
self,
|
|
1334
|
+
cf_dest:Any,
|
|
1335
|
+
paths:Union[Sequence[str], Sequence[Tuple[str, str]]],
|
|
1336
|
+
block_size:int = 64,
|
|
1337
|
+
total:Optional[int] = None,
|
|
1338
|
+
progress:Optional[bool] = None,
|
|
1339
|
+
):
|
|
1340
|
+
"""
|
|
1341
|
+
Move (rename) files.
|
|
1342
|
+
|
|
1343
|
+
pairs: [ (src, dest), (src, dest), ... ]
|
|
1344
|
+
"""
|
|
1345
|
+
if isinstance(cf_dest, str):
|
|
1346
|
+
cf_dest = CloudFiles(
|
|
1347
|
+
cf_dest, progress=False,
|
|
1348
|
+
green=self.green, num_threads=self.num_threads,
|
|
1349
|
+
)
|
|
1350
|
+
|
|
1351
|
+
total = totalfn(paths, total)
|
|
1352
|
+
|
|
1353
|
+
disable = not (self.progress if progress is None else progress)
|
|
1354
|
+
|
|
1355
|
+
if self.protocol == "file" and cf_dest.protocol == "file":
|
|
1356
|
+
self.__moves_file_to_file(
|
|
1357
|
+
cf_dest, paths, total,
|
|
1358
|
+
disable, block_size
|
|
1359
|
+
)
|
|
1360
|
+
return
|
|
1361
|
+
|
|
1362
|
+
pbar = tqdm(total=total, disable=disable, desc="Moving")
|
|
1363
|
+
|
|
1364
|
+
with pbar:
|
|
1365
|
+
for subpairs in sip(paths, block_size):
|
|
1366
|
+
subpairs = [
|
|
1367
|
+
((pair, pair) if isinstance(pair, str) else pair)
|
|
1368
|
+
for pair in subpairs
|
|
1369
|
+
]
|
|
1370
|
+
|
|
1371
|
+
self.transfer_to(cf_dest, paths=(
|
|
1372
|
+
{
|
|
1373
|
+
"path": src,
|
|
1374
|
+
"dest_path": dest,
|
|
1375
|
+
}
|
|
1376
|
+
for src, dest in subpairs
|
|
1377
|
+
), progress=False)
|
|
1378
|
+
self.delete(( src for src, dest in subpairs ), progress=False)
|
|
1379
|
+
pbar.update(len(subpairs))
|
|
1380
|
+
|
|
1381
|
+
def __moves_file_to_file(
|
|
1382
|
+
self,
|
|
1383
|
+
cf_dest:Any,
|
|
1384
|
+
paths:Union[Sequence[str], Sequence[Tuple[str,str]]],
|
|
1385
|
+
total:Optional[int],
|
|
1386
|
+
disable:bool,
|
|
1387
|
+
block_size:int,
|
|
1388
|
+
):
|
|
1389
|
+
for pair in tqdm(paths, total=total, disable=disable, desc="Moving"):
|
|
1390
|
+
if isinstance(pair, str):
|
|
1391
|
+
src = pair
|
|
1392
|
+
dest = pair
|
|
1393
|
+
else:
|
|
1394
|
+
(src, dest) = pair
|
|
1395
|
+
|
|
1396
|
+
src = self.join(self.cloudpath, src).replace("file://", "")
|
|
1397
|
+
dest = cf_dest.join(cf_dest.cloudpath, dest).replace("file://", "")
|
|
1398
|
+
|
|
1399
|
+
if os.path.isdir(dest):
|
|
1400
|
+
dest = cf_dest.join(dest, os.path.basename(src))
|
|
1401
|
+
else:
|
|
1402
|
+
mkdir(os.path.dirname(dest))
|
|
1403
|
+
|
|
1404
|
+
src, encoding = FileInterface.get_encoded_file_path(src)
|
|
1405
|
+
_, dest_ext = os.path.splitext(dest)
|
|
1406
|
+
dest_ext_compress = FileInterface.get_extension(encoding)
|
|
1407
|
+
if dest_ext_compress != dest_ext:
|
|
1408
|
+
dest += dest_ext_compress
|
|
1409
|
+
shutil.move(src, dest)
|
|
1410
|
+
|
|
1264
1411
|
def join(self, *paths:str) -> str:
|
|
1265
1412
|
"""
|
|
1266
1413
|
Convenience method for joining path strings
|
|
@@ -1439,6 +1586,13 @@ class CloudFile:
|
|
|
1439
1586
|
reencode=reencode,
|
|
1440
1587
|
)
|
|
1441
1588
|
|
|
1589
|
+
def touch(self):
|
|
1590
|
+
return self.cf.touch(self.filename)
|
|
1591
|
+
|
|
1592
|
+
def move(self, dest):
|
|
1593
|
+
"""Move (rename) this file to dest."""
|
|
1594
|
+
return self.cf.move(self.filename, dest)
|
|
1595
|
+
|
|
1442
1596
|
def __len__(self):
|
|
1443
1597
|
return self.size()
|
|
1444
1598
|
|
cloudfiles/interfaces.py
CHANGED
|
@@ -24,7 +24,12 @@ from .compression import COMPRESSION_TYPES
|
|
|
24
24
|
from .connectionpools import S3ConnectionPool, GCloudBucketPool, MemoryPool, MEMORY_DATA
|
|
25
25
|
from .exceptions import MD5IntegrityError, CompressionError
|
|
26
26
|
from .lib import mkdir, sip, md5, validate_s3_multipart_etag
|
|
27
|
-
from .secrets import
|
|
27
|
+
from .secrets import (
|
|
28
|
+
http_credentials,
|
|
29
|
+
cave_credentials,
|
|
30
|
+
CLOUD_FILES_DIR,
|
|
31
|
+
CLOUD_FILES_LOCK_DIR,
|
|
32
|
+
)
|
|
28
33
|
|
|
29
34
|
COMPRESSION_EXTENSIONS = ('.gz', '.br', '.zstd','.bz2','.xz')
|
|
30
35
|
GZIP_TYPES = (True, 'gzip', 1)
|
|
@@ -469,6 +474,14 @@ class MemoryInterface(StorageInterface):
|
|
|
469
474
|
|
|
470
475
|
return None
|
|
471
476
|
|
|
477
|
+
def copy_file(self, src_path, dest_bucket, dest_key):
|
|
478
|
+
key = self.get_path_to_file(src_path)
|
|
479
|
+
with MEM_BUCKET_POOL_LOCK:
|
|
480
|
+
pool = MEM_POOL[MemoryPoolParams(dest_bucket)]
|
|
481
|
+
dest_bucket = pool.get_connection(None, None)
|
|
482
|
+
dest_bucket[dest_key] = self._data[key]
|
|
483
|
+
return True
|
|
484
|
+
|
|
472
485
|
def exists(self, file_path):
|
|
473
486
|
path = self.get_path_to_file(file_path)
|
|
474
487
|
return path in self._data or any(( (path + ext in self._data) for ext in COMPRESSION_EXTENSIONS ))
|
|
@@ -731,6 +744,9 @@ class HttpInterface(StorageInterface):
|
|
|
731
744
|
if secrets and 'user' in secrets and 'password' in secrets:
|
|
732
745
|
self.session.auth = (secrets['user'], secrets['password'])
|
|
733
746
|
|
|
747
|
+
def default_headers(self):
|
|
748
|
+
return {}
|
|
749
|
+
|
|
734
750
|
def get_path_to_file(self, file_path):
|
|
735
751
|
return posixpath.join(self._path.host, self._path.path, file_path)
|
|
736
752
|
|
|
@@ -749,7 +765,8 @@ class HttpInterface(StorageInterface):
|
|
|
749
765
|
@retry
|
|
750
766
|
def head(self, file_path):
|
|
751
767
|
key = self.get_path_to_file(file_path)
|
|
752
|
-
|
|
768
|
+
headers = self.default_headers()
|
|
769
|
+
with self.session.head(key, headers=headers) as resp:
|
|
753
770
|
resp.raise_for_status()
|
|
754
771
|
return resp.headers
|
|
755
772
|
|
|
@@ -761,13 +778,14 @@ class HttpInterface(StorageInterface):
|
|
|
761
778
|
def get_file(self, file_path, start=None, end=None, part_size=None):
|
|
762
779
|
key = self.get_path_to_file(file_path)
|
|
763
780
|
|
|
781
|
+
headers = self.default_headers()
|
|
764
782
|
if start is not None or end is not None:
|
|
765
783
|
start = int(start) if start is not None else 0
|
|
766
784
|
end = int(end - 1) if end is not None else ''
|
|
767
|
-
headers
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
785
|
+
headers["Range"] = f"bytes={start}-{end}"
|
|
786
|
+
|
|
787
|
+
resp = self.session.get(key, headers=headers)
|
|
788
|
+
|
|
771
789
|
if resp.status_code in (404, 403):
|
|
772
790
|
return (None, None, None, None)
|
|
773
791
|
resp.close()
|
|
@@ -788,7 +806,8 @@ class HttpInterface(StorageInterface):
|
|
|
788
806
|
@retry
|
|
789
807
|
def exists(self, file_path):
|
|
790
808
|
key = self.get_path_to_file(file_path)
|
|
791
|
-
|
|
809
|
+
headers = self.default_headers()
|
|
810
|
+
with self.session.get(key, stream=True, headers=headers) as resp:
|
|
792
811
|
return resp.ok
|
|
793
812
|
|
|
794
813
|
def files_exist(self, file_paths):
|
|
@@ -805,11 +824,15 @@ class HttpInterface(StorageInterface):
|
|
|
805
824
|
if prefix and prefix[-1] != '/':
|
|
806
825
|
prefix += '/'
|
|
807
826
|
|
|
827
|
+
headers = self.default_headers()
|
|
828
|
+
|
|
808
829
|
@retry
|
|
809
830
|
def request(token):
|
|
831
|
+
nonlocal headers
|
|
810
832
|
results = self.session.get(
|
|
811
833
|
f"https://storage.googleapis.com/storage/v1/b/{bucket}/o",
|
|
812
834
|
params={ "prefix": prefix, "pageToken": token },
|
|
835
|
+
headers=headers,
|
|
813
836
|
)
|
|
814
837
|
results.raise_for_status()
|
|
815
838
|
results.close()
|
|
@@ -832,12 +855,13 @@ class HttpInterface(StorageInterface):
|
|
|
832
855
|
baseurl = posixpath.join(self._path.host, self._path.path)
|
|
833
856
|
|
|
834
857
|
directories = ['']
|
|
858
|
+
headers = self.default_headers()
|
|
835
859
|
|
|
836
860
|
while directories:
|
|
837
861
|
directory = directories.pop()
|
|
838
862
|
url = posixpath.join(baseurl, directory)
|
|
839
863
|
|
|
840
|
-
resp = requests.get(url)
|
|
864
|
+
resp = requests.get(url, headers=headers)
|
|
841
865
|
resp.raise_for_status()
|
|
842
866
|
|
|
843
867
|
if 'text/html' not in resp.headers["Content-Type"]:
|
|
@@ -1200,3 +1224,16 @@ class S3Interface(StorageInterface):
|
|
|
1200
1224
|
with S3_BUCKET_POOL_LOCK:
|
|
1201
1225
|
pool = S3_POOL[S3ConnectionPoolParams(service, self._path.bucket, self._request_payer)]
|
|
1202
1226
|
pool.release_connection(self._conn)
|
|
1227
|
+
|
|
1228
|
+
class CaveInterface(HttpInterface):
|
|
1229
|
+
"""
|
|
1230
|
+
CAVE is an internal system that powers proofreading
|
|
1231
|
+
systems in Seung Lab. If you have no idea what this
|
|
1232
|
+
is, don't worry about it.
|
|
1233
|
+
see: https://github.com/CAVEconnectome
|
|
1234
|
+
"""
|
|
1235
|
+
def default_headers(self):
|
|
1236
|
+
cred = cave_credentials()
|
|
1237
|
+
return {
|
|
1238
|
+
"Authorization": f"Bearer {cred['token']}",
|
|
1239
|
+
}
|
cloudfiles/lib.py
CHANGED
|
@@ -53,8 +53,11 @@ def mkdir(path):
|
|
|
53
53
|
return path
|
|
54
54
|
|
|
55
55
|
def touch(path):
|
|
56
|
-
|
|
57
|
-
|
|
56
|
+
if os.path.exists(path):
|
|
57
|
+
os.utime(path)
|
|
58
|
+
else:
|
|
59
|
+
mkdir(os.path.dirname(path))
|
|
60
|
+
open(path, 'a').close()
|
|
58
61
|
|
|
59
62
|
def nvl(*args):
|
|
60
63
|
"""Return the leftmost argument that is not None."""
|
cloudfiles/paths.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from functools import lru_cache
|
|
2
|
-
from collections import namedtuple
|
|
2
|
+
from collections import namedtuple, defaultdict
|
|
3
3
|
import orjson
|
|
4
4
|
import os.path
|
|
5
5
|
import posixpath
|
|
@@ -8,9 +8,10 @@ import sys
|
|
|
8
8
|
import urllib.parse
|
|
9
9
|
|
|
10
10
|
from typing import Tuple, Optional
|
|
11
|
+
from .typing import GetPathType
|
|
11
12
|
|
|
12
13
|
from .exceptions import UnsupportedProtocolError
|
|
13
|
-
from .lib import yellow, toabs, jsonify, mkdir
|
|
14
|
+
from .lib import yellow, toabs, jsonify, mkdir, toiter
|
|
14
15
|
from .secrets import CLOUD_FILES_DIR
|
|
15
16
|
|
|
16
17
|
ExtractedPath = namedtuple('ExtractedPath',
|
|
@@ -26,7 +27,8 @@ ALIASES_FROM_FILE = None
|
|
|
26
27
|
ALIASES = {}
|
|
27
28
|
BASE_ALLOWED_PROTOCOLS = [
|
|
28
29
|
'gs', 'file', 's3',
|
|
29
|
-
'http', 'https', 'mem'
|
|
30
|
+
'http', 'https', 'mem',
|
|
31
|
+
'middleauth+https', 'ngauth+https',
|
|
30
32
|
]
|
|
31
33
|
ALLOWED_PROTOCOLS = list(BASE_ALLOWED_PROTOCOLS)
|
|
32
34
|
ALLOWED_FORMATS = [
|
|
@@ -69,7 +71,13 @@ def cloudpath_error(cloudpath):
|
|
|
69
71
|
def mkregexp():
|
|
70
72
|
fmt_capture = r'|'.join(ALLOWED_FORMATS)
|
|
71
73
|
fmt_capture = "(?:(?P<fmt>{})://)".format(fmt_capture)
|
|
72
|
-
|
|
74
|
+
|
|
75
|
+
allowed_protos = [
|
|
76
|
+
p.replace('+', r'\+')
|
|
77
|
+
for p in ALLOWED_PROTOCOLS
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
proto_capture = r'|'.join(allowed_protos)
|
|
73
81
|
proto_capture = "(?:(?P<proto>{})://)".format(proto_capture)
|
|
74
82
|
regexp = "{}?{}?".format(fmt_capture, proto_capture)
|
|
75
83
|
return regexp
|
|
@@ -292,8 +300,12 @@ def extract_format_protocol(cloudpath:str, allow_defaults=True) -> tuple:
|
|
|
292
300
|
proto = m.group('proto')
|
|
293
301
|
endpoint = None
|
|
294
302
|
|
|
295
|
-
|
|
296
|
-
|
|
303
|
+
tmp_proto = None
|
|
304
|
+
if proto is not None:
|
|
305
|
+
tmp_proto = proto.replace("middleauth+", "").replace("ngauth+", "")
|
|
306
|
+
|
|
307
|
+
if tmp_proto in ('http', 'https'):
|
|
308
|
+
cloudpath = tmp_proto + "://" + cloudpath
|
|
297
309
|
parse = urllib.parse.urlparse(cloudpath)
|
|
298
310
|
endpoint = parse.scheme + "://" + parse.netloc
|
|
299
311
|
cloudpath = cloudpath.replace(endpoint, '', 1)
|
|
@@ -379,3 +391,30 @@ def to_https_protocol(cloudpath):
|
|
|
379
391
|
cloudpath = cloudpath.replace(f"{alias}://", host, 1)
|
|
380
392
|
|
|
381
393
|
return cloudpath.replace("s3://", "", 1)
|
|
394
|
+
|
|
395
|
+
def find_common_buckets(cloudpaths:GetPathType):
|
|
396
|
+
cloudpaths, is_multiple = toiter(cloudpaths, is_iter=True)
|
|
397
|
+
clustered = defaultdict(list)
|
|
398
|
+
|
|
399
|
+
for path in cloudpaths:
|
|
400
|
+
pth = path
|
|
401
|
+
byte_range = None
|
|
402
|
+
if isinstance(path, dict):
|
|
403
|
+
pth = path["path"]
|
|
404
|
+
byte_range = path["byte_range"]
|
|
405
|
+
|
|
406
|
+
epath = extract(pth)
|
|
407
|
+
if epath.protocol == "file":
|
|
408
|
+
path = os.sep.join(asfilepath(epath).split(os.sep)[2:])
|
|
409
|
+
bucketpath = "file://" + os.sep.join(asfilepath(epath).split(os.sep)[:2])
|
|
410
|
+
else:
|
|
411
|
+
path = epath.path
|
|
412
|
+
bucketpath = asbucketpath(epath)
|
|
413
|
+
|
|
414
|
+
clustered[bucketpath].append({
|
|
415
|
+
"path": path,
|
|
416
|
+
"start": (byte_range[0] if byte_range else None), # type: ignore
|
|
417
|
+
"end": (byte_range[1] if byte_range else None), # type: ignore
|
|
418
|
+
})
|
|
419
|
+
|
|
420
|
+
return clustered
|
cloudfiles/resumable_tools.py
CHANGED
|
@@ -39,6 +39,9 @@ class ResumableFileSet:
|
|
|
39
39
|
self.conn = sqlite3.connect(db_path)
|
|
40
40
|
self.lease_msec = int(lease_msec)
|
|
41
41
|
|
|
42
|
+
self._total = 0
|
|
43
|
+
self._total_dirty = True
|
|
44
|
+
|
|
42
45
|
def __del__(self):
|
|
43
46
|
self.conn.close()
|
|
44
47
|
|
|
@@ -46,6 +49,7 @@ class ResumableFileSet:
|
|
|
46
49
|
cur = self.conn.cursor()
|
|
47
50
|
cur.execute("""DROP TABLE IF EXISTS filelist""")
|
|
48
51
|
cur.execute("""DROP TABLE IF EXISTS xfermeta""")
|
|
52
|
+
cur.execute("""DROP TABLE IF EXISTS stats""")
|
|
49
53
|
cur.close()
|
|
50
54
|
|
|
51
55
|
def create(self, src, dest, reencode=None):
|
|
@@ -53,6 +57,7 @@ class ResumableFileSet:
|
|
|
53
57
|
|
|
54
58
|
cur.execute("""DROP TABLE IF EXISTS filelist""")
|
|
55
59
|
cur.execute("""DROP TABLE IF EXISTS xfermeta""")
|
|
60
|
+
cur.execute("""DROP TABLE IF EXISTS stats""")
|
|
56
61
|
|
|
57
62
|
cur.execute(f"""
|
|
58
63
|
CREATE TABLE xfermeta (
|
|
@@ -78,6 +83,18 @@ class ResumableFileSet:
|
|
|
78
83
|
""")
|
|
79
84
|
cur.execute("CREATE INDEX resumableidxfin ON filelist(finished,lease)")
|
|
80
85
|
cur.execute("CREATE INDEX resumableidxfile ON filelist(filename)")
|
|
86
|
+
|
|
87
|
+
cur.execute(f"""
|
|
88
|
+
CREATE TABLE stats (
|
|
89
|
+
id {INTEGER} PRIMARY KEY {AUTOINC},
|
|
90
|
+
key TEXT NOT NULL,
|
|
91
|
+
value {INTEGER}
|
|
92
|
+
)
|
|
93
|
+
""")
|
|
94
|
+
cur.execute(
|
|
95
|
+
"INSERT INTO stats(id, key, value) VALUES (?,?,?)",
|
|
96
|
+
[1, 'finished', 0]
|
|
97
|
+
)
|
|
81
98
|
cur.close()
|
|
82
99
|
|
|
83
100
|
def insert(self, fname_iter):
|
|
@@ -91,7 +108,9 @@ class ResumableFileSet:
|
|
|
91
108
|
cur.execute(f"INSERT INTO filelist(filename,finished,lease) VALUES {bindlist}", filenames)
|
|
92
109
|
cur.execute("commit")
|
|
93
110
|
|
|
94
|
-
cur.close()
|
|
111
|
+
cur.close()
|
|
112
|
+
|
|
113
|
+
self._total_dirty = True
|
|
95
114
|
|
|
96
115
|
def metadata(self):
|
|
97
116
|
cur = self.conn.cursor()
|
|
@@ -111,6 +130,7 @@ class ResumableFileSet:
|
|
|
111
130
|
for filenames in sip(fname_iter, SQLITE_MAX_PARAMS):
|
|
112
131
|
bindlist = ",".join([f"{BIND}"] * len(filenames))
|
|
113
132
|
cur.execute(f"UPDATE filelist SET finished = 1 WHERE filename in ({bindlist})", filenames)
|
|
133
|
+
cur.execute(f"UPDATE stats SET value = value + {len(filenames)} WHERE id = 1")
|
|
114
134
|
cur.execute("commit")
|
|
115
135
|
cur.close()
|
|
116
136
|
|
|
@@ -120,7 +140,7 @@ class ResumableFileSet:
|
|
|
120
140
|
N = 0
|
|
121
141
|
|
|
122
142
|
while True:
|
|
123
|
-
ts = now_msec()
|
|
143
|
+
ts = now_msec()
|
|
124
144
|
cur.execute(f"""SELECT filename FROM filelist WHERE finished = 0 AND lease <= {ts} LIMIT {int(block_size)}""")
|
|
125
145
|
rows = cur.fetchmany(block_size)
|
|
126
146
|
N += len(rows)
|
|
@@ -140,31 +160,46 @@ class ResumableFileSet:
|
|
|
140
160
|
|
|
141
161
|
cur.close()
|
|
142
162
|
|
|
143
|
-
def
|
|
163
|
+
def _scalar_query(self, sql:str) -> int:
|
|
144
164
|
cur = self.conn.cursor()
|
|
145
|
-
cur.execute(
|
|
165
|
+
cur.execute(sql)
|
|
146
166
|
res = cur.fetchone()
|
|
147
167
|
cur.close()
|
|
148
168
|
return int(res[0])
|
|
149
169
|
|
|
170
|
+
def total(self):
|
|
171
|
+
"""Returns the total number of tasks (both processed and unprocessed)."""
|
|
172
|
+
if not self._total_dirty:
|
|
173
|
+
return self._total
|
|
174
|
+
|
|
175
|
+
self._total = self._scalar_query(f"SELECT max(id) FROM filelist")
|
|
176
|
+
self._total_dirty = False
|
|
177
|
+
return self._total
|
|
178
|
+
|
|
179
|
+
def finished(self):
|
|
180
|
+
return self._scalar_query(f"SELECT value FROM stats WHERE id = 1")
|
|
181
|
+
|
|
150
182
|
def remaining(self):
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
return
|
|
183
|
+
return self.total() - self.finished()
|
|
184
|
+
|
|
185
|
+
def num_leased(self):
|
|
186
|
+
ts = int(now_msec())
|
|
187
|
+
return self._scalar_query(
|
|
188
|
+
f"SELECT count(filename) FROM filelist WHERE finished = 0 AND lease > {ts}"
|
|
189
|
+
)
|
|
156
190
|
|
|
157
191
|
def available(self):
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
cur.close()
|
|
163
|
-
return int(res[0])
|
|
192
|
+
ts = int(now_msec())
|
|
193
|
+
return self._scalar_query(
|
|
194
|
+
f"SELECT count(filename) FROM filelist WHERE finished = 0 AND lease <= {ts}"
|
|
195
|
+
)
|
|
164
196
|
|
|
165
197
|
def release(self):
|
|
198
|
+
cur = self.conn.cursor()
|
|
166
199
|
cur.execute(f"UPDATE filelist SET lease = 0")
|
|
167
200
|
cur.execute("commit")
|
|
201
|
+
cur.close()
|
|
202
|
+
|
|
168
203
|
|
|
169
204
|
def __len__(self):
|
|
170
205
|
return self.remaining()
|
cloudfiles/secrets.py
CHANGED
|
@@ -137,6 +137,24 @@ def aws_credentials(bucket = '', service = 'aws', skip_files=False):
|
|
|
137
137
|
AWS_CREDENTIALS_CACHE[service][bucket] = aws_credentials
|
|
138
138
|
return aws_credentials
|
|
139
139
|
|
|
140
|
+
CAVE_CREDENTIALS = None
|
|
141
|
+
def cave_credentials():
|
|
142
|
+
global CAVE_CREDENTIALS
|
|
143
|
+
default_file_path = 'cave-secret.json'
|
|
144
|
+
path = secretpath(default_file_path)
|
|
145
|
+
|
|
146
|
+
if CAVE_CREDENTIALS:
|
|
147
|
+
return CAVE_CREDENTIALS
|
|
148
|
+
|
|
149
|
+
if os.path.exists(path):
|
|
150
|
+
with open(path, 'rt') as f:
|
|
151
|
+
CAVE_CREDENTIALS = json.loads(f.read())
|
|
152
|
+
else:
|
|
153
|
+
CAVE_CREDENTIALS = None
|
|
154
|
+
|
|
155
|
+
return CAVE_CREDENTIALS
|
|
156
|
+
|
|
157
|
+
|
|
140
158
|
HTTP_CREDENTIALS = None
|
|
141
159
|
def http_credentials():
|
|
142
160
|
global HTTP_CREDENTIALS
|
cloudfiles_cli/cloudfiles_cli.py
CHANGED
|
@@ -27,7 +27,7 @@ import cloudfiles.paths
|
|
|
27
27
|
from cloudfiles import CloudFiles
|
|
28
28
|
from cloudfiles.resumable_tools import ResumableTransfer
|
|
29
29
|
from cloudfiles.compression import transcode
|
|
30
|
-
from cloudfiles.paths import extract, get_protocol
|
|
30
|
+
from cloudfiles.paths import extract, get_protocol, find_common_buckets
|
|
31
31
|
from cloudfiles.lib import (
|
|
32
32
|
mkdir, toabs, sip, toiter,
|
|
33
33
|
first, red, green,
|
|
@@ -184,10 +184,6 @@ def cp(
|
|
|
184
184
|
|
|
185
185
|
If source is "-" read newline delimited filenames from stdin.
|
|
186
186
|
If destination is "-" output to stdout.
|
|
187
|
-
|
|
188
|
-
Note that for gs:// to gs:// transfers, the gsutil
|
|
189
|
-
tool is more efficient because the files never leave
|
|
190
|
-
Google's network.
|
|
191
187
|
"""
|
|
192
188
|
use_stdout = (destination == '-')
|
|
193
189
|
if len(source) > 1 and not ispathdir(destination) and not use_stdout:
|
|
@@ -330,6 +326,163 @@ def _cp_stdout(src, no_sign_request, paths):
|
|
|
330
326
|
content = res["content"].decode("utf8")
|
|
331
327
|
sys.stdout.write(content)
|
|
332
328
|
|
|
329
|
+
@main.command()
|
|
330
|
+
@click.argument("source", nargs=-1)
|
|
331
|
+
@click.argument("destination", nargs=1)
|
|
332
|
+
@click.option('--progress', is_flag=True, default=False, help="Show transfer progress.", show_default=True)
|
|
333
|
+
@click.option('-b', '--block-size', default=128, help="Number of files to download at a time.", show_default=True)
|
|
334
|
+
@click.option('--part-bytes', default=int(1e8), help="Composite upload threshold in bytes. Splits a file into pieces for some cloud services like gs and s3.", show_default=True)
|
|
335
|
+
@click.option('--no-sign-request', is_flag=True, default=False, help="Use s3 in anonymous mode (don't sign requests) for the source.", show_default=True)
|
|
336
|
+
@click.pass_context
|
|
337
|
+
def mv(
|
|
338
|
+
ctx, source, destination,
|
|
339
|
+
progress, block_size,
|
|
340
|
+
part_bytes, no_sign_request,
|
|
341
|
+
):
|
|
342
|
+
"""
|
|
343
|
+
Move one or more files from a source to destination.
|
|
344
|
+
|
|
345
|
+
If source is "-" read newline delimited filenames from stdin.
|
|
346
|
+
If destination is "-" output to stdout.
|
|
347
|
+
"""
|
|
348
|
+
if len(source) > 1 and not ispathdir(destination):
|
|
349
|
+
print("cloudfiles: destination must be a directory for multiple source files.")
|
|
350
|
+
return
|
|
351
|
+
|
|
352
|
+
ctx.ensure_object(dict)
|
|
353
|
+
parallel = int(ctx.obj.get("parallel", 1))
|
|
354
|
+
|
|
355
|
+
for src in source:
|
|
356
|
+
_mv_single(
|
|
357
|
+
src, destination,
|
|
358
|
+
progress, block_size,
|
|
359
|
+
part_bytes, no_sign_request,
|
|
360
|
+
parallel
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
def _mv_single(
|
|
364
|
+
source, destination,
|
|
365
|
+
progress, block_size,
|
|
366
|
+
part_bytes, no_sign_request,
|
|
367
|
+
parallel
|
|
368
|
+
):
|
|
369
|
+
use_stdin = (source == '-')
|
|
370
|
+
|
|
371
|
+
nsrc = normalize_path(source)
|
|
372
|
+
ndest = normalize_path(destination)
|
|
373
|
+
|
|
374
|
+
issrcdir = (ispathdir(source) or CloudFiles(nsrc).isdir()) and use_stdin == False
|
|
375
|
+
isdestdir = (ispathdir(destination) or CloudFiles(ndest).isdir())
|
|
376
|
+
|
|
377
|
+
ensrc = cloudfiles.paths.extract(nsrc)
|
|
378
|
+
endest = cloudfiles.paths.extract(ndest)
|
|
379
|
+
|
|
380
|
+
if ensrc.protocol == "file" and endest.protocol == "file" and issrcdir:
|
|
381
|
+
shutil.move(nsrc.replace("file://", ""), ndest.replace("file://", ""))
|
|
382
|
+
return
|
|
383
|
+
|
|
384
|
+
recursive = issrcdir
|
|
385
|
+
|
|
386
|
+
# For more information see:
|
|
387
|
+
# https://cloud.google.com/storage/docs/gsutil/commands/cp#how-names-are-constructed
|
|
388
|
+
# Try to follow cp rules. If the directory exists,
|
|
389
|
+
# copy the base source directory into the dest directory
|
|
390
|
+
# If the directory does not exist, then we copy into
|
|
391
|
+
# the dest directory.
|
|
392
|
+
# Both x* and x** should not copy the base directory
|
|
393
|
+
if recursive and nsrc[-1] != "*":
|
|
394
|
+
if isdestdir:
|
|
395
|
+
if nsrc[-1] == '/':
|
|
396
|
+
nsrc = nsrc[:-1]
|
|
397
|
+
ndest = cloudpathjoin(ndest, os.path.basename(nsrc))
|
|
398
|
+
|
|
399
|
+
# The else clause here is to handle single file transfers
|
|
400
|
+
srcpath = nsrc if issrcdir else os.path.dirname(nsrc)
|
|
401
|
+
many, flat, prefix = get_mfp(nsrc, recursive)
|
|
402
|
+
|
|
403
|
+
if issrcdir and not many:
|
|
404
|
+
print(f"cloudfiles: {source} is a directory (not copied).")
|
|
405
|
+
return
|
|
406
|
+
|
|
407
|
+
xferpaths = os.path.basename(nsrc)
|
|
408
|
+
if use_stdin:
|
|
409
|
+
xferpaths = sys.stdin.readlines()
|
|
410
|
+
xferpaths = [ x.replace("\n", "") for x in xferpaths ]
|
|
411
|
+
prefix = os.path.commonprefix(xferpaths)
|
|
412
|
+
xferpaths = [ x.replace(prefix, "") for x in xferpaths ]
|
|
413
|
+
srcpath = cloudpathjoin(srcpath, prefix)
|
|
414
|
+
elif many:
|
|
415
|
+
xferpaths = CloudFiles(
|
|
416
|
+
srcpath, no_sign_request=no_sign_request
|
|
417
|
+
).list(prefix=prefix, flat=flat)
|
|
418
|
+
|
|
419
|
+
destpath = ndest
|
|
420
|
+
if isinstance(xferpaths, str):
|
|
421
|
+
destpath = ndest if isdestdir else os.path.dirname(ndest)
|
|
422
|
+
elif not isdestdir:
|
|
423
|
+
if os.path.exists(ndest.replace("file://", "")):
|
|
424
|
+
print(f"cloudfiles: {ndest} is not a directory (not copied).")
|
|
425
|
+
return
|
|
426
|
+
|
|
427
|
+
if not isinstance(xferpaths, str):
|
|
428
|
+
if parallel == 1:
|
|
429
|
+
_mv(srcpath, destpath, progress, block_size, part_bytes, no_sign_request, xferpaths)
|
|
430
|
+
return
|
|
431
|
+
|
|
432
|
+
total = None
|
|
433
|
+
try:
|
|
434
|
+
total = len(xferpaths)
|
|
435
|
+
except TypeError:
|
|
436
|
+
pass
|
|
437
|
+
|
|
438
|
+
fn = partial(_mv, srcpath, destpath, False, block_size, part_bytes, no_sign_request)
|
|
439
|
+
|
|
440
|
+
with tqdm(desc="Moving", total=total, disable=(not progress)) as pbar:
|
|
441
|
+
with pathos.pools.ProcessPool(parallel) as executor:
|
|
442
|
+
for _ in executor.imap(fn, sip(xferpaths, block_size)):
|
|
443
|
+
pbar.update(block_size)
|
|
444
|
+
else:
|
|
445
|
+
cfsrc = CloudFiles(srcpath, progress=progress, no_sign_request=no_sign_request)
|
|
446
|
+
if not cfsrc.exists(xferpaths):
|
|
447
|
+
print(f"cloudfiles: source path not found: {cfsrc.abspath(xferpaths).replace('file://','')}")
|
|
448
|
+
return
|
|
449
|
+
|
|
450
|
+
cfdest = CloudFiles(
|
|
451
|
+
destpath,
|
|
452
|
+
progress=progress,
|
|
453
|
+
composite_upload_threshold=part_bytes,
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
cfsrc.move(xferpaths, ndest)
|
|
457
|
+
|
|
458
|
+
def _mv(src, dst, progress, block_size, part_bytes, no_sign_request, paths):
|
|
459
|
+
cfsrc = CloudFiles(src, progress=progress, composite_upload_threshold=part_bytes, no_sign_request=no_sign_request)
|
|
460
|
+
cfdest = CloudFiles(dst, progress=progress, composite_upload_threshold=part_bytes)
|
|
461
|
+
cfsrc.moves(
|
|
462
|
+
cfdest, paths=paths, block_size=block_size
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
@main.command()
|
|
466
|
+
@click.argument("sources", nargs=-1)
|
|
467
|
+
@click.option('--progress', is_flag=True, default=False, help="Show transfer progress.", show_default=True)
|
|
468
|
+
@click.option('--no-sign-request', is_flag=True, default=False, help="Use s3 in anonymous mode (don't sign requests) for the source.", show_default=True)
|
|
469
|
+
@click.pass_context
|
|
470
|
+
def touch(
|
|
471
|
+
ctx, sources,
|
|
472
|
+
progress, no_sign_request,
|
|
473
|
+
):
|
|
474
|
+
sources = list(map(normalize_path, sources))
|
|
475
|
+
sources = [ src.replace("precomputed://", "") for src in sources ]
|
|
476
|
+
pbar = tqdm(total=len(sources), desc="Touch", disable=(not progress))
|
|
477
|
+
|
|
478
|
+
clustered = find_common_buckets(sources)
|
|
479
|
+
|
|
480
|
+
with pbar:
|
|
481
|
+
for bucket, items in clustered.items():
|
|
482
|
+
cf = CloudFiles(bucket, no_sign_request=no_sign_request, progress=False)
|
|
483
|
+
cf.touch(items)
|
|
484
|
+
pbar.update(len(items))
|
|
485
|
+
|
|
333
486
|
@main.group("xfer")
|
|
334
487
|
def xfergroup():
|
|
335
488
|
"""
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
cloudfiles/__init__.py,sha256=pLB4CcV2l3Jgv_ni1520Np1pfzFj8Cpr87vNxFT3rNI,493
|
|
2
|
-
cloudfiles/buckets.py,sha256=eRAYdDfvVpNyJyK5ryDRMwgNJUeEuFBJ6doWU2JkAcA,74
|
|
3
|
-
cloudfiles/cloudfiles.py,sha256=YUf_-7DS8-2mCKOWT6mMKxl1glxMTEGomhpCnebtsy8,44801
|
|
4
|
-
cloudfiles/compression.py,sha256=pqYdpu5vfFv-094BpfZ2pgRjVu7ESM9pAZC09P6E8bY,6150
|
|
5
|
-
cloudfiles/connectionpools.py,sha256=aL8RiSjRepECfgAFmJcz80aJFKbou7hsbuEgugDKwB8,4814
|
|
6
|
-
cloudfiles/exceptions.py,sha256=H2IcMlZoy2Bsn-6wCPwyLDjg66LZCyxtcf3s_p21FDw,770
|
|
7
|
-
cloudfiles/gcs.py,sha256=_njJ7TpqwrHCjPHRGkBN5alCrCWKM2m9qdy5DhxMZ7U,3718
|
|
8
|
-
cloudfiles/interfaces.py,sha256=Qqhjv2GIVw3ibaTgPpiGK97i7aBDgU9O0bBuOiLI2KM,36117
|
|
9
|
-
cloudfiles/lib.py,sha256=fEqL5APu_WQhl2yxqQbwE7msHdu7U8pstAJw6LgoKO0,5142
|
|
10
|
-
cloudfiles/paths.py,sha256=WHuMbVtKk9nm9akfNF9dgH94awnrWXVIo5mbCvtc0LQ,10302
|
|
11
|
-
cloudfiles/resumable_tools.py,sha256=pK-VcoPjQ2BjGjvlvH4dDCBf6lNsqHG-weiBgxVFbzA,5838
|
|
12
|
-
cloudfiles/scheduler.py,sha256=DqDANmOpB3NdzFgJDNMMibRIkCrXQqIh2XGL8GWoc9c,3668
|
|
13
|
-
cloudfiles/secrets.py,sha256=3BSV2Hn8FGGn4QCs5FP3eGs4WEs5cIXRBsXuF0eIgIY,4918
|
|
14
|
-
cloudfiles/threaded_queue.py,sha256=Nl4vfXhQ6nDLF8PZpSSBpww0M2zWtcd4DLs3W3BArBw,7082
|
|
15
|
-
cloudfiles/typing.py,sha256=f3ZYkNfN9poxhGu5j-P0KCxjCCqSn9HAg5KiIPkjnCg,416
|
|
16
|
-
cloudfiles_cli/LICENSE,sha256=Jna4xYE8CCQmaxjr5Fs-wmUBnIQJ1DGcNn9MMjbkprk,1538
|
|
17
|
-
cloudfiles_cli/__init__.py,sha256=Wftt3R3F21QsHtWqx49ODuqT9zcSr0em7wk48kcH0WM,29
|
|
18
|
-
cloudfiles_cli/cloudfiles_cli.py,sha256=eETIOK4QyztQcpA4ZRny21SobLtcrPDlzZ_JaKBmmmA,28449
|
|
19
|
-
cloud_files-4.26.0.dist-info/AUTHORS,sha256=7E2vC894bbLPO_kvUuEB2LFZZbIxZn23HabxH7x0Hgo,266
|
|
20
|
-
cloud_files-4.26.0.dist-info/LICENSE,sha256=Jna4xYE8CCQmaxjr5Fs-wmUBnIQJ1DGcNn9MMjbkprk,1538
|
|
21
|
-
cloud_files-4.26.0.dist-info/METADATA,sha256=-Rsfl3gNmmS8zSMTI7FHTYZ8TEuhzWgF0UKhxoBCdRk,26804
|
|
22
|
-
cloud_files-4.26.0.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
|
23
|
-
cloud_files-4.26.0.dist-info/entry_points.txt,sha256=xlirb1FVhn1mbcv4IoyMEGumDqKOA4VMVd3drsRQxIg,51
|
|
24
|
-
cloud_files-4.26.0.dist-info/pbr.json,sha256=Q1hsyLUlpIPjOyXcpmmGewWq1Difl_oiqt8EjJXRGOE,46
|
|
25
|
-
cloud_files-4.26.0.dist-info/top_level.txt,sha256=xPyrST3okJbsmdCF5IC2gYAVxg_aD5AYVTnNo8UuoZU,26
|
|
26
|
-
cloud_files-4.26.0.dist-info/RECORD,,
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"git_version": "3ae7c76", "is_release": true}
|
cloudfiles/buckets.py
DELETED
|
File without changes
|
|
File without changes
|
|
File without changes
|