cloud-files 4.24.2__tar.gz → 4.30.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cloud-files-4.24.2 → cloud_files-4.30.0}/AUTHORS +1 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/ChangeLog +37 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/PKG-INFO +11 -1
- {cloud-files-4.24.2 → cloud_files-4.30.0}/README.md +10 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/automated_test.py +98 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloud_files.egg-info/PKG-INFO +11 -1
- cloud_files-4.30.0/cloud_files.egg-info/pbr.json +1 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloudfiles/cloudfiles.py +191 -27
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloudfiles/interfaces.py +97 -7
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloudfiles/lib.py +5 -2
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloudfiles/paths.py +64 -12
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloudfiles/resumable_tools.py +50 -15
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloudfiles/secrets.py +18 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloudfiles_cli/cloudfiles_cli.py +158 -5
- {cloud-files-4.24.2 → cloud_files-4.30.0}/setup.py +3 -0
- cloud-files-4.24.2/cloud_files.egg-info/pbr.json +0 -1
- {cloud-files-4.24.2 → cloud_files-4.30.0}/.github/workflows/test-suite.yml +0 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/LICENSE +0 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/MANIFEST.in +0 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloud_files.egg-info/SOURCES.txt +0 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloud_files.egg-info/dependency_links.txt +0 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloud_files.egg-info/entry_points.txt +0 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloud_files.egg-info/not-zip-safe +0 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloud_files.egg-info/requires.txt +0 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloud_files.egg-info/top_level.txt +0 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloudfiles/__init__.py +0 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloudfiles/compression.py +0 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloudfiles/connectionpools.py +0 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloudfiles/exceptions.py +0 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloudfiles/gcs.py +0 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloudfiles/scheduler.py +0 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloudfiles/threaded_queue.py +0 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloudfiles/typing.py +0 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloudfiles_cli/LICENSE +0 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/cloudfiles_cli/__init__.py +0 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/requirements.txt +0 -0
- {cloud-files-4.24.2 → cloud_files-4.30.0}/setup.cfg +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
Manuel Castro <macastro@princeton.edu>
|
|
2
2
|
Nico Kemnitz <nkemnitz@princeton.edu>
|
|
3
3
|
V24 <55334829+umarfarouk98@users.noreply.github.com>
|
|
4
|
+
William Silversmith <william.silvermsith@gmail.com>
|
|
4
5
|
William Silversmith <william.silversmith@gmail.com>
|
|
5
6
|
madiganz <madiganz@users.noreply.github.com>
|
|
6
7
|
ranlu <ranlu@users.noreply.github.com>
|
|
@@ -1,6 +1,43 @@
|
|
|
1
1
|
CHANGES
|
|
2
2
|
=======
|
|
3
3
|
|
|
4
|
+
4.30.0
|
|
5
|
+
------
|
|
6
|
+
|
|
7
|
+
* redesign: normalize cloudpaths so file:// isn't required
|
|
8
|
+
|
|
9
|
+
4.29.0
|
|
10
|
+
------
|
|
11
|
+
|
|
12
|
+
* feat(size): add return\_sum argument
|
|
13
|
+
|
|
14
|
+
4.28.1
|
|
15
|
+
------
|
|
16
|
+
|
|
17
|
+
* fix(CloudFile.join): add definition of join to CloudFile
|
|
18
|
+
|
|
19
|
+
4.28.0
|
|
20
|
+
------
|
|
21
|
+
|
|
22
|
+
* feat: add cf.move(s), cf.touch methods and cli mv, touch commands (#107)
|
|
23
|
+
* fix: add drop table stats
|
|
24
|
+
* perf: add in stats table for faster xfer performance
|
|
25
|
+
* feat: import improvements to ResumableFileSet from transcoder
|
|
26
|
+
* fix: release in xfer
|
|
27
|
+
* fix: leasing was broken
|
|
28
|
+
* feat: add middleauth+https paths indicate CAVE interface (#106)
|
|
29
|
+
|
|
30
|
+
4.26.0
|
|
31
|
+
------
|
|
32
|
+
|
|
33
|
+
* feat: make it possible to normalize e.g. zarr2://./helloworld
|
|
34
|
+
* feat: add all current neuroglancer formats to parsing list
|
|
35
|
+
|
|
36
|
+
4.25.0
|
|
37
|
+
------
|
|
38
|
+
|
|
39
|
+
* feat: list for apache servers (#104)
|
|
40
|
+
|
|
4
41
|
4.24.2
|
|
5
42
|
------
|
|
6
43
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cloud-files
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.30.0
|
|
4
4
|
Summary: Fast access to cloud storage and local FS.
|
|
5
5
|
Home-page: https://github.com/seung-lab/cloud-files/
|
|
6
6
|
Author: William Silversmith
|
|
@@ -88,6 +88,12 @@ cf.delete(paths, parallel=2) # threaded + two processes
|
|
|
88
88
|
boolean = cf.exists('filename')
|
|
89
89
|
results = cf.exists([ 'filename_1', ... ]) # threaded
|
|
90
90
|
|
|
91
|
+
cf.move("a", "gs://bucket/b")
|
|
92
|
+
cf.moves("gs://bucket/", [ ("a", "b") ])
|
|
93
|
+
|
|
94
|
+
cf.touch("example")
|
|
95
|
+
cf.touch([ "example", "example2" ])
|
|
96
|
+
|
|
91
97
|
# for single files
|
|
92
98
|
cf = CloudFile("gs://bucket/file1")
|
|
93
99
|
info = cf.head()
|
|
@@ -464,6 +470,10 @@ cloudfiles -p 2 cp --progress -r s3://bkt/ gs://bkt2/
|
|
|
464
470
|
cloudfiles cp -c br s3://bkt/file.txt gs://bkt2/
|
|
465
471
|
# decompress
|
|
466
472
|
cloudfiles cp -c none s3://bkt/file.txt gs://bkt2/
|
|
473
|
+
# move or rename files
|
|
474
|
+
cloudfiles mv s3://bkt/file.txt gs://bkt2/
|
|
475
|
+
# create an empty file if not existing
|
|
476
|
+
cloudfiles touch s3://bkt/empty.txt
|
|
467
477
|
# pass from stdin (use "-" for source argument)
|
|
468
478
|
find some_dir | cloudfiles cp - s3://bkt/
|
|
469
479
|
# resumable transfers
|
|
@@ -38,6 +38,12 @@ cf.delete(paths, parallel=2) # threaded + two processes
|
|
|
38
38
|
boolean = cf.exists('filename')
|
|
39
39
|
results = cf.exists([ 'filename_1', ... ]) # threaded
|
|
40
40
|
|
|
41
|
+
cf.move("a", "gs://bucket/b")
|
|
42
|
+
cf.moves("gs://bucket/", [ ("a", "b") ])
|
|
43
|
+
|
|
44
|
+
cf.touch("example")
|
|
45
|
+
cf.touch([ "example", "example2" ])
|
|
46
|
+
|
|
41
47
|
# for single files
|
|
42
48
|
cf = CloudFile("gs://bucket/file1")
|
|
43
49
|
info = cf.head()
|
|
@@ -414,6 +420,10 @@ cloudfiles -p 2 cp --progress -r s3://bkt/ gs://bkt2/
|
|
|
414
420
|
cloudfiles cp -c br s3://bkt/file.txt gs://bkt2/
|
|
415
421
|
# decompress
|
|
416
422
|
cloudfiles cp -c none s3://bkt/file.txt gs://bkt2/
|
|
423
|
+
# move or rename files
|
|
424
|
+
cloudfiles mv s3://bkt/file.txt gs://bkt2/
|
|
425
|
+
# create an empty file if not existing
|
|
426
|
+
cloudfiles touch s3://bkt/empty.txt
|
|
417
427
|
# pass from stdin (use "-" for source argument)
|
|
418
428
|
find some_dir | cloudfiles cp - s3://bkt/
|
|
419
429
|
# resumable transfers
|
|
@@ -564,6 +564,15 @@ def test_path_extraction():
|
|
|
564
564
|
'a/username2/b/c/d', None, None
|
|
565
565
|
))
|
|
566
566
|
|
|
567
|
+
def test_middleauth_path_extraction():
|
|
568
|
+
import cloudfiles.paths
|
|
569
|
+
path = cloudfiles.paths.extract('middleauth+https://example.com/wow/cool/')
|
|
570
|
+
assert path.format == 'precomputed'
|
|
571
|
+
assert path.protocol == 'middleauth+https'
|
|
572
|
+
assert path.bucket is None
|
|
573
|
+
assert path.path == 'wow/cool/'
|
|
574
|
+
assert path.host == "https://example.com"
|
|
575
|
+
|
|
567
576
|
@pytest.mark.parametrize("protocol", ('mem', 'file', 's3'))
|
|
568
577
|
def test_access_non_cannonical_minimal_path(s3, protocol):
|
|
569
578
|
from cloudfiles import CloudFiles, exceptions
|
|
@@ -1142,3 +1151,92 @@ def test_lock_clearing():
|
|
|
1142
1151
|
assert len(lst) == 0
|
|
1143
1152
|
|
|
1144
1153
|
|
|
1154
|
+
@pytest.mark.parametrize("protocol", ('mem', 'file', 's3'))
|
|
1155
|
+
def test_move(s3, protocol):
|
|
1156
|
+
from cloudfiles import CloudFiles
|
|
1157
|
+
|
|
1158
|
+
url = compute_url(protocol, "move")
|
|
1159
|
+
|
|
1160
|
+
cf = CloudFiles(url)
|
|
1161
|
+
cf.puts([
|
|
1162
|
+
('hello', b'world'),
|
|
1163
|
+
('lamp', b'emporium'),
|
|
1164
|
+
])
|
|
1165
|
+
cf.move("hello", f"{url}/hola")
|
|
1166
|
+
|
|
1167
|
+
assert all(cf.exists(["hola"]).values()) == True
|
|
1168
|
+
assert all(cf.exists(["hello"]).values()) == False
|
|
1169
|
+
|
|
1170
|
+
cf.puts([
|
|
1171
|
+
('hello', b'world'),
|
|
1172
|
+
('lamp', b'emporium'),
|
|
1173
|
+
])
|
|
1174
|
+
|
|
1175
|
+
cf.delete("hola")
|
|
1176
|
+
|
|
1177
|
+
cf.moves(f"{url}", [
|
|
1178
|
+
("hello", f"hola"),
|
|
1179
|
+
("lamp", f"lampara"),
|
|
1180
|
+
])
|
|
1181
|
+
|
|
1182
|
+
assert all(cf.exists(["hola", "lampara"]).values()) == True
|
|
1183
|
+
assert all(cf.exists(["hello", "lamp"]).values()) == False
|
|
1184
|
+
|
|
1185
|
+
cf.delete([ "hola", "hello", "lamp", "lampara" ])
|
|
1186
|
+
|
|
1187
|
+
@pytest.mark.parametrize("protocol", ["file", "s3"])
|
|
1188
|
+
def test_cli_move_python(s3, protocol):
|
|
1189
|
+
from cloudfiles_cli.cloudfiles_cli import _mv_single
|
|
1190
|
+
from cloudfiles import CloudFiles, exceptions
|
|
1191
|
+
|
|
1192
|
+
test_dir = compute_url(protocol, "cli_mv_python")
|
|
1193
|
+
test_dir2 = compute_url(protocol, "cli_mv_python2")
|
|
1194
|
+
cf = CloudFiles(test_dir)
|
|
1195
|
+
|
|
1196
|
+
N = 100
|
|
1197
|
+
|
|
1198
|
+
def mkfiles():
|
|
1199
|
+
cf.delete(cf.list())
|
|
1200
|
+
for i in range(N):
|
|
1201
|
+
cf[str(i)] = b"hello world"
|
|
1202
|
+
|
|
1203
|
+
def run_mv(src, dest):
|
|
1204
|
+
_mv_single(
|
|
1205
|
+
src, dest,
|
|
1206
|
+
progress=False, block_size=5,
|
|
1207
|
+
part_bytes=int(100e6), no_sign_request=True,
|
|
1208
|
+
parallel=1
|
|
1209
|
+
)
|
|
1210
|
+
|
|
1211
|
+
mkfiles()
|
|
1212
|
+
run_mv(test_dir, test_dir2)
|
|
1213
|
+
assert sorted(list(cf)) == []
|
|
1214
|
+
|
|
1215
|
+
cf2 = CloudFiles(test_dir2)
|
|
1216
|
+
print(sorted(list(cf2)))
|
|
1217
|
+
assert sorted(list(cf2)) == sorted([ f'{i}' for i in range(N) ])
|
|
1218
|
+
|
|
1219
|
+
mkfiles()
|
|
1220
|
+
run_mv(f"{test_dir}/*", f"{test_dir}/move/")
|
|
1221
|
+
assert sorted(list(cf.list(prefix="move"))) == sorted([ f'move/{i}' for i in range(N) ])
|
|
1222
|
+
|
|
1223
|
+
mkfiles()
|
|
1224
|
+
run_mv(f"{test_dir}/1", f"{test_dir}/move/1")
|
|
1225
|
+
assert cf.exists("move/1") == True
|
|
1226
|
+
assert cf.exists("1") == False
|
|
1227
|
+
|
|
1228
|
+
@pytest.mark.parametrize("protocol", ["file", "mem", "s3"])
|
|
1229
|
+
def test_touch(s3, protocol):
|
|
1230
|
+
from cloudfiles import CloudFiles
|
|
1231
|
+
|
|
1232
|
+
url = compute_url(protocol, "touch")
|
|
1233
|
+
|
|
1234
|
+
cf = CloudFiles(url)
|
|
1235
|
+
|
|
1236
|
+
cf.touch([ str(i) for i in range(20) ])
|
|
1237
|
+
|
|
1238
|
+
assert sorted(list(cf)) == sorted([ str(i) for i in range(20) ])
|
|
1239
|
+
|
|
1240
|
+
cf.touch([ str(i) for i in range(20) ])
|
|
1241
|
+
|
|
1242
|
+
assert sorted(list(cf)) == sorted([ str(i) for i in range(20) ])
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cloud-files
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.30.0
|
|
4
4
|
Summary: Fast access to cloud storage and local FS.
|
|
5
5
|
Home-page: https://github.com/seung-lab/cloud-files/
|
|
6
6
|
Author: William Silversmith
|
|
@@ -88,6 +88,12 @@ cf.delete(paths, parallel=2) # threaded + two processes
|
|
|
88
88
|
boolean = cf.exists('filename')
|
|
89
89
|
results = cf.exists([ 'filename_1', ... ]) # threaded
|
|
90
90
|
|
|
91
|
+
cf.move("a", "gs://bucket/b")
|
|
92
|
+
cf.moves("gs://bucket/", [ ("a", "b") ])
|
|
93
|
+
|
|
94
|
+
cf.touch("example")
|
|
95
|
+
cf.touch([ "example", "example2" ])
|
|
96
|
+
|
|
91
97
|
# for single files
|
|
92
98
|
cf = CloudFile("gs://bucket/file1")
|
|
93
99
|
info = cf.head()
|
|
@@ -464,6 +470,10 @@ cloudfiles -p 2 cp --progress -r s3://bkt/ gs://bkt2/
|
|
|
464
470
|
cloudfiles cp -c br s3://bkt/file.txt gs://bkt2/
|
|
465
471
|
# decompress
|
|
466
472
|
cloudfiles cp -c none s3://bkt/file.txt gs://bkt2/
|
|
473
|
+
# move or rename files
|
|
474
|
+
cloudfiles mv s3://bkt/file.txt gs://bkt2/
|
|
475
|
+
# create an empty file if not existing
|
|
476
|
+
cloudfiles touch s3://bkt/empty.txt
|
|
467
477
|
# pass from stdin (use "-" for source argument)
|
|
468
478
|
find some_dir | cloudfiles cp - s3://bkt/
|
|
469
479
|
# resumable transfers
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"git_version": "97a23a5", "is_release": true}
|
|
@@ -2,7 +2,7 @@ from typing import (
|
|
|
2
2
|
Any, Dict, Optional,
|
|
3
3
|
Union, List, Tuple,
|
|
4
4
|
Callable, Generator,
|
|
5
|
-
|
|
5
|
+
Sequence, cast, BinaryIO
|
|
6
6
|
)
|
|
7
7
|
|
|
8
8
|
from queue import Queue
|
|
@@ -29,10 +29,10 @@ from . import compression, paths, gcs
|
|
|
29
29
|
from .exceptions import UnsupportedProtocolError, MD5IntegrityError, CRC32CIntegrityError
|
|
30
30
|
from .lib import (
|
|
31
31
|
mkdir, totalfn, toiter, scatter, jsonify, nvl,
|
|
32
|
-
duplicates, first, sip,
|
|
32
|
+
duplicates, first, sip, touch,
|
|
33
33
|
md5, crc32c, decode_crc32c_b64
|
|
34
34
|
)
|
|
35
|
-
from .paths import ALIASES
|
|
35
|
+
from .paths import ALIASES, find_common_buckets
|
|
36
36
|
from .secrets import CLOUD_FILES_DIR, CLOUD_FILES_LOCK_DIR
|
|
37
37
|
from .threaded_queue import ThreadedQueue, DEFAULT_THREADS
|
|
38
38
|
from .typing import (
|
|
@@ -44,7 +44,7 @@ from .scheduler import schedule_jobs
|
|
|
44
44
|
from .interfaces import (
|
|
45
45
|
FileInterface, HttpInterface,
|
|
46
46
|
S3Interface, GoogleCloudStorageInterface,
|
|
47
|
-
MemoryInterface
|
|
47
|
+
MemoryInterface, CaveInterface,
|
|
48
48
|
)
|
|
49
49
|
|
|
50
50
|
INTERFACES = {
|
|
@@ -54,6 +54,7 @@ INTERFACES = {
|
|
|
54
54
|
'http': HttpInterface,
|
|
55
55
|
'https': HttpInterface,
|
|
56
56
|
'mem': MemoryInterface,
|
|
57
|
+
'middleauth+https': CaveInterface,
|
|
57
58
|
}
|
|
58
59
|
for alias in ALIASES:
|
|
59
60
|
INTERFACES[alias] = S3Interface
|
|
@@ -181,7 +182,7 @@ def path_to_byte_range_tags(path):
|
|
|
181
182
|
if isinstance(path, str):
|
|
182
183
|
return (path, None, None, None)
|
|
183
184
|
return (path['path'], path.get('start', None), path.get('end', None), path.get('tags', None))
|
|
184
|
-
|
|
185
|
+
|
|
185
186
|
def dl(
|
|
186
187
|
cloudpaths:GetPathType, raw:bool=False, **kwargs
|
|
187
188
|
) -> Union[bytes,List[dict]]:
|
|
@@ -192,23 +193,8 @@ def dl(
|
|
|
192
193
|
dict.
|
|
193
194
|
"""
|
|
194
195
|
cloudpaths, is_multiple = toiter(cloudpaths, is_iter=True)
|
|
195
|
-
clustered =
|
|
196
|
-
total =
|
|
197
|
-
for path in cloudpaths:
|
|
198
|
-
pth = path
|
|
199
|
-
byte_range = None
|
|
200
|
-
if isinstance(path, dict):
|
|
201
|
-
pth = path["path"]
|
|
202
|
-
byte_range = path["byte_range"]
|
|
203
|
-
|
|
204
|
-
epath = paths.extract(pth)
|
|
205
|
-
bucketpath = paths.asbucketpath(epath)
|
|
206
|
-
clustered[bucketpath].append({
|
|
207
|
-
"path": epath.path,
|
|
208
|
-
"start": (byte_range[0] if byte_range else None), # type: ignore
|
|
209
|
-
"end": (byte_range[1] if byte_range else None), # type: ignore
|
|
210
|
-
})
|
|
211
|
-
total += 1
|
|
196
|
+
clustered = find_common_buckets(cloudpaths)
|
|
197
|
+
total = sum([ len(bucket) for bucket in clustered.values() ])
|
|
212
198
|
|
|
213
199
|
progress = kwargs.get("progress", False) and total > 1
|
|
214
200
|
pbar = tqdm(total=total, desc="Downloading", disable=(not progress))
|
|
@@ -256,6 +242,8 @@ class CloudFiles:
|
|
|
256
242
|
if use_https:
|
|
257
243
|
cloudpath = paths.to_https_protocol(cloudpath)
|
|
258
244
|
|
|
245
|
+
cloudpath = paths.normalize(cloudpath)
|
|
246
|
+
|
|
259
247
|
self.cloudpath = cloudpath
|
|
260
248
|
self.progress = progress
|
|
261
249
|
self.secrets = secrets
|
|
@@ -851,8 +839,10 @@ class CloudFiles:
|
|
|
851
839
|
|
|
852
840
|
def size(
|
|
853
841
|
self, paths:GetPathType,
|
|
854
|
-
total:Optional[int] = None,
|
|
855
|
-
|
|
842
|
+
total:Optional[int] = None,
|
|
843
|
+
progress:Optional[bool] = None,
|
|
844
|
+
return_sum:bool = False,
|
|
845
|
+
) -> Union[Dict[str,int],List[Dict[str,int]],int]:
|
|
856
846
|
"""
|
|
857
847
|
Get the size in bytes of one or more files in its stored state.
|
|
858
848
|
"""
|
|
@@ -873,6 +863,9 @@ class CloudFiles:
|
|
|
873
863
|
green=self.green,
|
|
874
864
|
)
|
|
875
865
|
|
|
866
|
+
if return_sum:
|
|
867
|
+
return sum(( sz for sz in results.values() ))
|
|
868
|
+
|
|
876
869
|
if return_multiple:
|
|
877
870
|
return results
|
|
878
871
|
return first(results.values())
|
|
@@ -918,6 +911,60 @@ class CloudFiles:
|
|
|
918
911
|
)
|
|
919
912
|
return len(results)
|
|
920
913
|
|
|
914
|
+
def touch(
|
|
915
|
+
self,
|
|
916
|
+
paths:GetPathType,
|
|
917
|
+
progress:Optional[bool] = None,
|
|
918
|
+
total:Optional[int] = None,
|
|
919
|
+
nocopy:bool = False,
|
|
920
|
+
):
|
|
921
|
+
"""
|
|
922
|
+
Create a zero byte file if it doesn't exist.
|
|
923
|
+
"""
|
|
924
|
+
paths = toiter(paths)
|
|
925
|
+
progress = nvl(progress, self.progress)
|
|
926
|
+
total = totalfn(paths, total)
|
|
927
|
+
|
|
928
|
+
if self.protocol == "file":
|
|
929
|
+
basepath = self.cloudpath.replace("file://", "")
|
|
930
|
+
for path in tqdm(paths, disable=(not progress), total=total):
|
|
931
|
+
pth = path
|
|
932
|
+
if isinstance(path, dict):
|
|
933
|
+
pth = path["path"]
|
|
934
|
+
touch(self.join(basepath, pth))
|
|
935
|
+
return
|
|
936
|
+
|
|
937
|
+
results = self.exists(paths, total=total, progress=progress)
|
|
938
|
+
|
|
939
|
+
dne = [
|
|
940
|
+
(fname, b'')
|
|
941
|
+
for fname, exists in results.items()
|
|
942
|
+
if not exists
|
|
943
|
+
]
|
|
944
|
+
|
|
945
|
+
self.puts(dne, progress=progress)
|
|
946
|
+
|
|
947
|
+
# def thunk_copy(path):
|
|
948
|
+
# with self._get_connection() as conn:
|
|
949
|
+
# conn.copy_file(path, self._path.bucket, self.join(self._path.path, path))
|
|
950
|
+
# return 1
|
|
951
|
+
|
|
952
|
+
# if not nocopy:
|
|
953
|
+
# already_exists = (
|
|
954
|
+
# fname
|
|
955
|
+
# for fname, exists in results.items()
|
|
956
|
+
# if exists
|
|
957
|
+
# )
|
|
958
|
+
|
|
959
|
+
# results = schedule_jobs(
|
|
960
|
+
# fns=( partial(thunk_copy, path) for path in already_exists ),
|
|
961
|
+
# progress=progress,
|
|
962
|
+
# total=(total - len(dne)),
|
|
963
|
+
# concurrency=self.num_threads,
|
|
964
|
+
# green=self.green,
|
|
965
|
+
# count_return=True,
|
|
966
|
+
# )
|
|
967
|
+
|
|
921
968
|
def list(
|
|
922
969
|
self, prefix:str = "", flat:bool = False
|
|
923
970
|
) -> Generator[str,None,None]:
|
|
@@ -952,6 +999,7 @@ class CloudFiles:
|
|
|
952
999
|
reencode:Optional[str] = None,
|
|
953
1000
|
content_type:Optional[str] = None,
|
|
954
1001
|
allow_missing:bool = False,
|
|
1002
|
+
progress:Optional[bool] = None,
|
|
955
1003
|
) -> None:
|
|
956
1004
|
"""
|
|
957
1005
|
Transfer all files from this CloudFiles storage
|
|
@@ -968,7 +1016,7 @@ class CloudFiles:
|
|
|
968
1016
|
- gs->gs: Uses GCS copy API to minimize data movement
|
|
969
1017
|
- s3->s3: Uses boto s3 copy API to minimize data movement
|
|
970
1018
|
|
|
971
|
-
|
|
1019
|
+
cf_dest: another CloudFiles instance or cloudpath
|
|
972
1020
|
paths: if None transfer all files from src, else if
|
|
973
1021
|
an iterable, transfer only these files.
|
|
974
1022
|
|
|
@@ -996,7 +1044,8 @@ class CloudFiles:
|
|
|
996
1044
|
return cf_dest.transfer_from(
|
|
997
1045
|
self, paths, block_size,
|
|
998
1046
|
reencode, content_type,
|
|
999
|
-
allow_missing,
|
|
1047
|
+
allow_missing,
|
|
1048
|
+
progress,
|
|
1000
1049
|
)
|
|
1001
1050
|
|
|
1002
1051
|
def transfer_from(
|
|
@@ -1007,6 +1056,7 @@ class CloudFiles:
|
|
|
1007
1056
|
reencode:Optional[str] = None,
|
|
1008
1057
|
content_type:Optional[str] = None,
|
|
1009
1058
|
allow_missing:bool = False,
|
|
1059
|
+
progress:Optional[bool] = None,
|
|
1010
1060
|
) -> None:
|
|
1011
1061
|
"""
|
|
1012
1062
|
Transfer all files from the source CloudFiles storage
|
|
@@ -1053,7 +1103,15 @@ class CloudFiles:
|
|
|
1053
1103
|
|
|
1054
1104
|
total = totalfn(paths, None)
|
|
1055
1105
|
|
|
1056
|
-
|
|
1106
|
+
disable = progress
|
|
1107
|
+
if disable is None:
|
|
1108
|
+
disable = self.progress
|
|
1109
|
+
if disable is None:
|
|
1110
|
+
disable = False
|
|
1111
|
+
else:
|
|
1112
|
+
disable = not disable
|
|
1113
|
+
|
|
1114
|
+
with tqdm(desc="Transferring", total=total, disable=disable) as pbar:
|
|
1057
1115
|
if (
|
|
1058
1116
|
cf_src.protocol == "file"
|
|
1059
1117
|
and self.protocol == "file"
|
|
@@ -1210,6 +1268,9 @@ class CloudFiles:
|
|
|
1210
1268
|
else:
|
|
1211
1269
|
raise
|
|
1212
1270
|
|
|
1271
|
+
if dest_path == '':
|
|
1272
|
+
dest_path = src_path
|
|
1273
|
+
|
|
1213
1274
|
to_upload.append({
|
|
1214
1275
|
"path": dest_path,
|
|
1215
1276
|
"content": handle,
|
|
@@ -1261,6 +1322,99 @@ class CloudFiles:
|
|
|
1261
1322
|
)
|
|
1262
1323
|
return len(results)
|
|
1263
1324
|
|
|
1325
|
+
def move(self, src:str, dest:str):
|
|
1326
|
+
"""Move (rename) src to dest.
|
|
1327
|
+
|
|
1328
|
+
src and dest do not have to be on the same filesystem.
|
|
1329
|
+
"""
|
|
1330
|
+
epath = paths.extract(dest)
|
|
1331
|
+
full_cloudpath = paths.asprotocolpath(epath)
|
|
1332
|
+
dest_cloudpath = paths.dirname(full_cloudpath)
|
|
1333
|
+
base_dest = paths.basename(full_cloudpath)
|
|
1334
|
+
|
|
1335
|
+
return self.moves(dest_cloudpath, [
|
|
1336
|
+
(src, base_dest)
|
|
1337
|
+
], block_size=1, progress=False)
|
|
1338
|
+
|
|
1339
|
+
def moves(
|
|
1340
|
+
self,
|
|
1341
|
+
cf_dest:Any,
|
|
1342
|
+
paths:Union[Sequence[str], Sequence[Tuple[str, str]]],
|
|
1343
|
+
block_size:int = 64,
|
|
1344
|
+
total:Optional[int] = None,
|
|
1345
|
+
progress:Optional[bool] = None,
|
|
1346
|
+
):
|
|
1347
|
+
"""
|
|
1348
|
+
Move (rename) files.
|
|
1349
|
+
|
|
1350
|
+
pairs: [ (src, dest), (src, dest), ... ]
|
|
1351
|
+
"""
|
|
1352
|
+
if isinstance(cf_dest, str):
|
|
1353
|
+
cf_dest = CloudFiles(
|
|
1354
|
+
cf_dest, progress=False,
|
|
1355
|
+
green=self.green, num_threads=self.num_threads,
|
|
1356
|
+
)
|
|
1357
|
+
|
|
1358
|
+
total = totalfn(paths, total)
|
|
1359
|
+
|
|
1360
|
+
disable = not (self.progress if progress is None else progress)
|
|
1361
|
+
|
|
1362
|
+
if self.protocol == "file" and cf_dest.protocol == "file":
|
|
1363
|
+
self.__moves_file_to_file(
|
|
1364
|
+
cf_dest, paths, total,
|
|
1365
|
+
disable, block_size
|
|
1366
|
+
)
|
|
1367
|
+
return
|
|
1368
|
+
|
|
1369
|
+
pbar = tqdm(total=total, disable=disable, desc="Moving")
|
|
1370
|
+
|
|
1371
|
+
with pbar:
|
|
1372
|
+
for subpairs in sip(paths, block_size):
|
|
1373
|
+
subpairs = [
|
|
1374
|
+
((pair, pair) if isinstance(pair, str) else pair)
|
|
1375
|
+
for pair in subpairs
|
|
1376
|
+
]
|
|
1377
|
+
|
|
1378
|
+
self.transfer_to(cf_dest, paths=(
|
|
1379
|
+
{
|
|
1380
|
+
"path": src,
|
|
1381
|
+
"dest_path": dest,
|
|
1382
|
+
}
|
|
1383
|
+
for src, dest in subpairs
|
|
1384
|
+
), progress=False)
|
|
1385
|
+
self.delete(( src for src, dest in subpairs ), progress=False)
|
|
1386
|
+
pbar.update(len(subpairs))
|
|
1387
|
+
|
|
1388
|
+
def __moves_file_to_file(
|
|
1389
|
+
self,
|
|
1390
|
+
cf_dest:Any,
|
|
1391
|
+
paths:Union[Sequence[str], Sequence[Tuple[str,str]]],
|
|
1392
|
+
total:Optional[int],
|
|
1393
|
+
disable:bool,
|
|
1394
|
+
block_size:int,
|
|
1395
|
+
):
|
|
1396
|
+
for pair in tqdm(paths, total=total, disable=disable, desc="Moving"):
|
|
1397
|
+
if isinstance(pair, str):
|
|
1398
|
+
src = pair
|
|
1399
|
+
dest = pair
|
|
1400
|
+
else:
|
|
1401
|
+
(src, dest) = pair
|
|
1402
|
+
|
|
1403
|
+
src = self.join(self.cloudpath, src).replace("file://", "")
|
|
1404
|
+
dest = cf_dest.join(cf_dest.cloudpath, dest).replace("file://", "")
|
|
1405
|
+
|
|
1406
|
+
if os.path.isdir(dest):
|
|
1407
|
+
dest = cf_dest.join(dest, os.path.basename(src))
|
|
1408
|
+
else:
|
|
1409
|
+
mkdir(os.path.dirname(dest))
|
|
1410
|
+
|
|
1411
|
+
src, encoding = FileInterface.get_encoded_file_path(src)
|
|
1412
|
+
_, dest_ext = os.path.splitext(dest)
|
|
1413
|
+
dest_ext_compress = FileInterface.get_extension(encoding)
|
|
1414
|
+
if dest_ext_compress != dest_ext:
|
|
1415
|
+
dest += dest_ext_compress
|
|
1416
|
+
shutil.move(src, dest)
|
|
1417
|
+
|
|
1264
1418
|
def join(self, *paths:str) -> str:
|
|
1265
1419
|
"""
|
|
1266
1420
|
Convenience method for joining path strings
|
|
@@ -1439,6 +1593,16 @@ class CloudFile:
|
|
|
1439
1593
|
reencode=reencode,
|
|
1440
1594
|
)
|
|
1441
1595
|
|
|
1596
|
+
def join(self, *args):
|
|
1597
|
+
return self.cf.join(*args)
|
|
1598
|
+
|
|
1599
|
+
def touch(self):
|
|
1600
|
+
return self.cf.touch(self.filename)
|
|
1601
|
+
|
|
1602
|
+
def move(self, dest):
|
|
1603
|
+
"""Move (rename) this file to dest."""
|
|
1604
|
+
return self.cf.move(self.filename, dest)
|
|
1605
|
+
|
|
1442
1606
|
def __len__(self):
|
|
1443
1607
|
return self.size()
|
|
1444
1608
|
|