cloud-files 4.27.0__py3-none-any.whl → 4.28.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  Manuel Castro <macastro@princeton.edu>
2
2
  Nico Kemnitz <nkemnitz@princeton.edu>
3
3
  V24 <55334829+umarfarouk98@users.noreply.github.com>
4
+ William Silversmith <william.silvermsith@gmail.com>
4
5
  William Silversmith <william.silversmith@gmail.com>
5
6
  madiganz <madiganz@users.noreply.github.com>
6
7
  ranlu <ranlu@users.noreply.github.com>
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cloud-files
3
- Version: 4.27.0
3
+ Version: 4.28.1
4
4
  Summary: Fast access to cloud storage and local FS.
5
5
  Home-page: https://github.com/seung-lab/cloud-files/
6
6
  Author: William Silversmith
@@ -20,33 +20,33 @@ Requires-Python: >=3.7,<4.0
20
20
  Description-Content-Type: text/markdown
21
21
  License-File: LICENSE
22
22
  License-File: AUTHORS
23
- Requires-Dist: boto3 (>=1.4.7)
23
+ Requires-Dist: boto3 >=1.4.7
24
24
  Requires-Dist: brotli
25
25
  Requires-Dist: crc32c
26
- Requires-Dist: chardet (>=3.0.4)
26
+ Requires-Dist: chardet >=3.0.4
27
27
  Requires-Dist: click
28
- Requires-Dist: deflate (>=0.2.0)
28
+ Requires-Dist: deflate >=0.2.0
29
29
  Requires-Dist: gevent
30
- Requires-Dist: google-auth (>=1.10.0)
31
- Requires-Dist: google-cloud-core (>=1.1.0)
32
- Requires-Dist: google-cloud-storage (>=1.31.1)
33
- Requires-Dist: google-crc32c (>=1.0.0)
30
+ Requires-Dist: google-auth >=1.10.0
31
+ Requires-Dist: google-cloud-core >=1.1.0
32
+ Requires-Dist: google-cloud-storage >=1.31.1
33
+ Requires-Dist: google-crc32c >=1.0.0
34
34
  Requires-Dist: orjson
35
35
  Requires-Dist: pathos
36
- Requires-Dist: protobuf (>=3.3.0)
37
- Requires-Dist: requests (>=2.22.0)
38
- Requires-Dist: six (>=1.14.0)
39
- Requires-Dist: tenacity (>=4.10.0)
36
+ Requires-Dist: protobuf >=3.3.0
37
+ Requires-Dist: requests >=2.22.0
38
+ Requires-Dist: six >=1.14.0
39
+ Requires-Dist: tenacity >=4.10.0
40
40
  Requires-Dist: tqdm
41
- Requires-Dist: urllib3 (>=1.26.3)
41
+ Requires-Dist: urllib3 >=1.26.3
42
42
  Requires-Dist: zstandard
43
- Requires-Dist: rsa (>=4.7.2)
43
+ Requires-Dist: rsa >=4.7.2
44
44
  Requires-Dist: fasteners
45
45
  Provides-Extra: numpy
46
46
  Requires-Dist: numpy ; extra == 'numpy'
47
47
  Provides-Extra: test
48
48
  Requires-Dist: pytest ; extra == 'test'
49
- Requires-Dist: moto (>=5) ; extra == 'test'
49
+ Requires-Dist: moto >=5 ; extra == 'test'
50
50
 
51
51
  [![PyPI version](https://badge.fury.io/py/cloud-files.svg)](https://badge.fury.io/py/cloud-files) [![Test Suite](https://github.com/seung-lab/cloud-files/workflows/Test%20Suite/badge.svg)](https://github.com/seung-lab/cloud-files/actions?query=workflow%3A%22Test+Suite%22)
52
52
 
@@ -88,6 +88,12 @@ cf.delete(paths, parallel=2) # threaded + two processes
88
88
  boolean = cf.exists('filename')
89
89
  results = cf.exists([ 'filename_1', ... ]) # threaded
90
90
 
91
+ cf.move("a", "gs://bucket/b")
92
+ cf.moves("gs://bucket/", [ ("a", "b") ])
93
+
94
+ cf.touch("example")
95
+ cf.touch([ "example", "example2" ])
96
+
91
97
  # for single files
92
98
  cf = CloudFile("gs://bucket/file1")
93
99
  info = cf.head()
@@ -464,6 +470,10 @@ cloudfiles -p 2 cp --progress -r s3://bkt/ gs://bkt2/
464
470
  cloudfiles cp -c br s3://bkt/file.txt gs://bkt2/
465
471
  # decompress
466
472
  cloudfiles cp -c none s3://bkt/file.txt gs://bkt2/
473
+ # move or rename files
474
+ cloudfiles mv s3://bkt/file.txt gs://bkt2/
475
+ # create an empty file if not existing
476
+ cloudfiles touch s3://bkt/empty.txt
467
477
  # pass from stdin (use "-" for source argument)
468
478
  find some_dir | cloudfiles cp - s3://bkt/
469
479
  # resumable transfers
@@ -0,0 +1,25 @@
1
+ cloudfiles/__init__.py,sha256=pLB4CcV2l3Jgv_ni1520Np1pfzFj8Cpr87vNxFT3rNI,493
2
+ cloudfiles/cloudfiles.py,sha256=CPt0GBb_udN4MoE65g_-lh_N__INyWaaBUXIdRtv9sg,49046
3
+ cloudfiles/compression.py,sha256=pqYdpu5vfFv-094BpfZ2pgRjVu7ESM9pAZC09P6E8bY,6150
4
+ cloudfiles/connectionpools.py,sha256=aL8RiSjRepECfgAFmJcz80aJFKbou7hsbuEgugDKwB8,4814
5
+ cloudfiles/exceptions.py,sha256=H2IcMlZoy2Bsn-6wCPwyLDjg66LZCyxtcf3s_p21FDw,770
6
+ cloudfiles/gcs.py,sha256=_njJ7TpqwrHCjPHRGkBN5alCrCWKM2m9qdy5DhxMZ7U,3718
7
+ cloudfiles/interfaces.py,sha256=lD5hUNTJDkxSnIVRG6my5exEDN72Cqt3VwPfHmYaNDo,37074
8
+ cloudfiles/lib.py,sha256=YOoaEkKtkXc9FdpNnC4FbZJVG1ujbyoxN07WKdUOJcs,5200
9
+ cloudfiles/paths.py,sha256=RnZDDYGUKD6KBFYERgg46WQU8AO-aKlV9klfGcWvOQc,11399
10
+ cloudfiles/resumable_tools.py,sha256=NyuSoGh1SaP5akrHCpd9kgy2-JruEWrHW9lvJxV7jpE,6711
11
+ cloudfiles/scheduler.py,sha256=DqDANmOpB3NdzFgJDNMMibRIkCrXQqIh2XGL8GWoc9c,3668
12
+ cloudfiles/secrets.py,sha256=791b5a8nWSBYtlleGzKeoYIR5jl-FI1bw6INRM4Wy-0,5295
13
+ cloudfiles/threaded_queue.py,sha256=Nl4vfXhQ6nDLF8PZpSSBpww0M2zWtcd4DLs3W3BArBw,7082
14
+ cloudfiles/typing.py,sha256=f3ZYkNfN9poxhGu5j-P0KCxjCCqSn9HAg5KiIPkjnCg,416
15
+ cloudfiles_cli/LICENSE,sha256=Jna4xYE8CCQmaxjr5Fs-wmUBnIQJ1DGcNn9MMjbkprk,1538
16
+ cloudfiles_cli/__init__.py,sha256=Wftt3R3F21QsHtWqx49ODuqT9zcSr0em7wk48kcH0WM,29
17
+ cloudfiles_cli/cloudfiles_cli.py,sha256=HGlX8oyIL7XASl57KXMlVQunF7pA_MVbMq-lpPA90LY,33911
18
+ cloud_files-4.28.1.dist-info/AUTHORS,sha256=BFVmobgAhaVFI5fqbuqAY5XmBQxe09ZZAsAOTy87hKQ,318
19
+ cloud_files-4.28.1.dist-info/LICENSE,sha256=Jna4xYE8CCQmaxjr5Fs-wmUBnIQJ1DGcNn9MMjbkprk,1538
20
+ cloud_files-4.28.1.dist-info/METADATA,sha256=NM8aTPsA4niMc-iap4I_cmHhHqSfnbVZyRN5OkwtelM,27046
21
+ cloud_files-4.28.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
22
+ cloud_files-4.28.1.dist-info/entry_points.txt,sha256=xlirb1FVhn1mbcv4IoyMEGumDqKOA4VMVd3drsRQxIg,51
23
+ cloud_files-4.28.1.dist-info/pbr.json,sha256=hcBlFwDHXMgyi7h2PooxB-oD7QbU5uvcT_cm3gUUTxY,46
24
+ cloud_files-4.28.1.dist-info/top_level.txt,sha256=xPyrST3okJbsmdCF5IC2gYAVxg_aD5AYVTnNo8UuoZU,26
25
+ cloud_files-4.28.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.37.0)
2
+ Generator: bdist_wheel (0.43.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1 @@
1
+ {"git_version": "d352eec", "is_release": true}
cloudfiles/cloudfiles.py CHANGED
@@ -2,7 +2,7 @@ from typing import (
2
2
  Any, Dict, Optional,
3
3
  Union, List, Tuple,
4
4
  Callable, Generator,
5
- Iterable, cast, BinaryIO
5
+ Sequence, cast, BinaryIO
6
6
  )
7
7
 
8
8
  from queue import Queue
@@ -29,10 +29,10 @@ from . import compression, paths, gcs
29
29
  from .exceptions import UnsupportedProtocolError, MD5IntegrityError, CRC32CIntegrityError
30
30
  from .lib import (
31
31
  mkdir, totalfn, toiter, scatter, jsonify, nvl,
32
- duplicates, first, sip,
32
+ duplicates, first, sip, touch,
33
33
  md5, crc32c, decode_crc32c_b64
34
34
  )
35
- from .paths import ALIASES
35
+ from .paths import ALIASES, find_common_buckets
36
36
  from .secrets import CLOUD_FILES_DIR, CLOUD_FILES_LOCK_DIR
37
37
  from .threaded_queue import ThreadedQueue, DEFAULT_THREADS
38
38
  from .typing import (
@@ -182,7 +182,7 @@ def path_to_byte_range_tags(path):
182
182
  if isinstance(path, str):
183
183
  return (path, None, None, None)
184
184
  return (path['path'], path.get('start', None), path.get('end', None), path.get('tags', None))
185
-
185
+
186
186
  def dl(
187
187
  cloudpaths:GetPathType, raw:bool=False, **kwargs
188
188
  ) -> Union[bytes,List[dict]]:
@@ -193,23 +193,8 @@ def dl(
193
193
  dict.
194
194
  """
195
195
  cloudpaths, is_multiple = toiter(cloudpaths, is_iter=True)
196
- clustered = defaultdict(list)
197
- total = 0
198
- for path in cloudpaths:
199
- pth = path
200
- byte_range = None
201
- if isinstance(path, dict):
202
- pth = path["path"]
203
- byte_range = path["byte_range"]
204
-
205
- epath = paths.extract(pth)
206
- bucketpath = paths.asbucketpath(epath)
207
- clustered[bucketpath].append({
208
- "path": epath.path,
209
- "start": (byte_range[0] if byte_range else None), # type: ignore
210
- "end": (byte_range[1] if byte_range else None), # type: ignore
211
- })
212
- total += 1
196
+ clustered = find_common_buckets(cloudpaths)
197
+ total = sum([ len(bucket) for bucket in clustered.values() ])
213
198
 
214
199
  progress = kwargs.get("progress", False) and total > 1
215
200
  pbar = tqdm(total=total, desc="Downloading", disable=(not progress))
@@ -919,6 +904,60 @@ class CloudFiles:
919
904
  )
920
905
  return len(results)
921
906
 
907
+ def touch(
908
+ self,
909
+ paths:GetPathType,
910
+ progress:Optional[bool] = None,
911
+ total:Optional[int] = None,
912
+ nocopy:bool = False,
913
+ ):
914
+ """
915
+ Create a zero byte file if it doesn't exist.
916
+ """
917
+ paths = toiter(paths)
918
+ progress = nvl(progress, self.progress)
919
+ total = totalfn(paths, total)
920
+
921
+ if self.protocol == "file":
922
+ basepath = self.cloudpath.replace("file://", "")
923
+ for path in tqdm(paths, disable=(not progress), total=total):
924
+ pth = path
925
+ if isinstance(path, dict):
926
+ pth = path["path"]
927
+ touch(self.join(basepath, pth))
928
+ return
929
+
930
+ results = self.exists(paths, total=total, progress=progress)
931
+
932
+ dne = [
933
+ (fname, b'')
934
+ for fname, exists in results.items()
935
+ if not exists
936
+ ]
937
+
938
+ self.puts(dne, progress=progress)
939
+
940
+ # def thunk_copy(path):
941
+ # with self._get_connection() as conn:
942
+ # conn.copy_file(path, self._path.bucket, self.join(self._path.path, path))
943
+ # return 1
944
+
945
+ # if not nocopy:
946
+ # already_exists = (
947
+ # fname
948
+ # for fname, exists in results.items()
949
+ # if exists
950
+ # )
951
+
952
+ # results = schedule_jobs(
953
+ # fns=( partial(thunk_copy, path) for path in already_exists ),
954
+ # progress=progress,
955
+ # total=(total - len(dne)),
956
+ # concurrency=self.num_threads,
957
+ # green=self.green,
958
+ # count_return=True,
959
+ # )
960
+
922
961
  def list(
923
962
  self, prefix:str = "", flat:bool = False
924
963
  ) -> Generator[str,None,None]:
@@ -953,6 +992,7 @@ class CloudFiles:
953
992
  reencode:Optional[str] = None,
954
993
  content_type:Optional[str] = None,
955
994
  allow_missing:bool = False,
995
+ progress:Optional[bool] = None,
956
996
  ) -> None:
957
997
  """
958
998
  Transfer all files from this CloudFiles storage
@@ -969,7 +1009,7 @@ class CloudFiles:
969
1009
  - gs->gs: Uses GCS copy API to minimize data movement
970
1010
  - s3->s3: Uses boto s3 copy API to minimize data movement
971
1011
 
972
- cf_src: another CloudFiles instance or cloudpath
1012
+ cf_dest: another CloudFiles instance or cloudpath
973
1013
  paths: if None transfer all files from src, else if
974
1014
  an iterable, transfer only these files.
975
1015
 
@@ -997,7 +1037,8 @@ class CloudFiles:
997
1037
  return cf_dest.transfer_from(
998
1038
  self, paths, block_size,
999
1039
  reencode, content_type,
1000
- allow_missing,
1040
+ allow_missing,
1041
+ progress,
1001
1042
  )
1002
1043
 
1003
1044
  def transfer_from(
@@ -1008,6 +1049,7 @@ class CloudFiles:
1008
1049
  reencode:Optional[str] = None,
1009
1050
  content_type:Optional[str] = None,
1010
1051
  allow_missing:bool = False,
1052
+ progress:Optional[bool] = None,
1011
1053
  ) -> None:
1012
1054
  """
1013
1055
  Transfer all files from the source CloudFiles storage
@@ -1054,7 +1096,15 @@ class CloudFiles:
1054
1096
 
1055
1097
  total = totalfn(paths, None)
1056
1098
 
1057
- with tqdm(desc="Transferring", total=total, disable=(not self.progress)) as pbar:
1099
+ disable = progress
1100
+ if disable is None:
1101
+ disable = self.progress
1102
+ if disable is None:
1103
+ disable = False
1104
+ else:
1105
+ disable = not disable
1106
+
1107
+ with tqdm(desc="Transferring", total=total, disable=disable) as pbar:
1058
1108
  if (
1059
1109
  cf_src.protocol == "file"
1060
1110
  and self.protocol == "file"
@@ -1211,6 +1261,9 @@ class CloudFiles:
1211
1261
  else:
1212
1262
  raise
1213
1263
 
1264
+ if dest_path == '':
1265
+ dest_path = src_path
1266
+
1214
1267
  to_upload.append({
1215
1268
  "path": dest_path,
1216
1269
  "content": handle,
@@ -1262,6 +1315,99 @@ class CloudFiles:
1262
1315
  )
1263
1316
  return len(results)
1264
1317
 
1318
+ def move(self, src:str, dest:str):
1319
+ """Move (rename) src to dest.
1320
+
1321
+ src and dest do not have to be on the same filesystem.
1322
+ """
1323
+ epath = paths.extract(dest)
1324
+ full_cloudpath = paths.asprotocolpath(epath)
1325
+ dest_cloudpath = paths.dirname(full_cloudpath)
1326
+ base_dest = paths.basename(full_cloudpath)
1327
+
1328
+ return self.moves(dest_cloudpath, [
1329
+ (src, base_dest)
1330
+ ], block_size=1, progress=False)
1331
+
1332
+ def moves(
1333
+ self,
1334
+ cf_dest:Any,
1335
+ paths:Union[Sequence[str], Sequence[Tuple[str, str]]],
1336
+ block_size:int = 64,
1337
+ total:Optional[int] = None,
1338
+ progress:Optional[bool] = None,
1339
+ ):
1340
+ """
1341
+ Move (rename) files.
1342
+
1343
+ pairs: [ (src, dest), (src, dest), ... ]
1344
+ """
1345
+ if isinstance(cf_dest, str):
1346
+ cf_dest = CloudFiles(
1347
+ cf_dest, progress=False,
1348
+ green=self.green, num_threads=self.num_threads,
1349
+ )
1350
+
1351
+ total = totalfn(paths, total)
1352
+
1353
+ disable = not (self.progress if progress is None else progress)
1354
+
1355
+ if self.protocol == "file" and cf_dest.protocol == "file":
1356
+ self.__moves_file_to_file(
1357
+ cf_dest, paths, total,
1358
+ disable, block_size
1359
+ )
1360
+ return
1361
+
1362
+ pbar = tqdm(total=total, disable=disable, desc="Moving")
1363
+
1364
+ with pbar:
1365
+ for subpairs in sip(paths, block_size):
1366
+ subpairs = [
1367
+ ((pair, pair) if isinstance(pair, str) else pair)
1368
+ for pair in subpairs
1369
+ ]
1370
+
1371
+ self.transfer_to(cf_dest, paths=(
1372
+ {
1373
+ "path": src,
1374
+ "dest_path": dest,
1375
+ }
1376
+ for src, dest in subpairs
1377
+ ), progress=False)
1378
+ self.delete(( src for src, dest in subpairs ), progress=False)
1379
+ pbar.update(len(subpairs))
1380
+
1381
+ def __moves_file_to_file(
1382
+ self,
1383
+ cf_dest:Any,
1384
+ paths:Union[Sequence[str], Sequence[Tuple[str,str]]],
1385
+ total:Optional[int],
1386
+ disable:bool,
1387
+ block_size:int,
1388
+ ):
1389
+ for pair in tqdm(paths, total=total, disable=disable, desc="Moving"):
1390
+ if isinstance(pair, str):
1391
+ src = pair
1392
+ dest = pair
1393
+ else:
1394
+ (src, dest) = pair
1395
+
1396
+ src = self.join(self.cloudpath, src).replace("file://", "")
1397
+ dest = cf_dest.join(cf_dest.cloudpath, dest).replace("file://", "")
1398
+
1399
+ if os.path.isdir(dest):
1400
+ dest = cf_dest.join(dest, os.path.basename(src))
1401
+ else:
1402
+ mkdir(os.path.dirname(dest))
1403
+
1404
+ src, encoding = FileInterface.get_encoded_file_path(src)
1405
+ _, dest_ext = os.path.splitext(dest)
1406
+ dest_ext_compress = FileInterface.get_extension(encoding)
1407
+ if dest_ext_compress != dest_ext:
1408
+ dest += dest_ext_compress
1409
+ shutil.move(src, dest)
1410
+
1265
1411
  def join(self, *paths:str) -> str:
1266
1412
  """
1267
1413
  Convenience method for joining path strings
@@ -1440,6 +1586,16 @@ class CloudFile:
1440
1586
  reencode=reencode,
1441
1587
  )
1442
1588
 
1589
+ def join(self, *args):
1590
+ return self.cf.join(*args)
1591
+
1592
+ def touch(self):
1593
+ return self.cf.touch(self.filename)
1594
+
1595
+ def move(self, dest):
1596
+ """Move (rename) this file to dest."""
1597
+ return self.cf.move(self.filename, dest)
1598
+
1443
1599
  def __len__(self):
1444
1600
  return self.size()
1445
1601
 
cloudfiles/interfaces.py CHANGED
@@ -474,6 +474,14 @@ class MemoryInterface(StorageInterface):
474
474
 
475
475
  return None
476
476
 
477
+ def copy_file(self, src_path, dest_bucket, dest_key):
478
+ key = self.get_path_to_file(src_path)
479
+ with MEM_BUCKET_POOL_LOCK:
480
+ pool = MEM_POOL[MemoryPoolParams(dest_bucket)]
481
+ dest_bucket = pool.get_connection(None, None)
482
+ dest_bucket[dest_key] = self._data[key]
483
+ return True
484
+
477
485
  def exists(self, file_path):
478
486
  path = self.get_path_to_file(file_path)
479
487
  return path in self._data or any(( (path + ext in self._data) for ext in COMPRESSION_EXTENSIONS ))
cloudfiles/lib.py CHANGED
@@ -53,8 +53,11 @@ def mkdir(path):
53
53
  return path
54
54
 
55
55
  def touch(path):
56
- mkdir(os.path.dirname(path))
57
- open(path, 'a').close()
56
+ if os.path.exists(path):
57
+ os.utime(path)
58
+ else:
59
+ mkdir(os.path.dirname(path))
60
+ open(path, 'a').close()
58
61
 
59
62
  def nvl(*args):
60
63
  """Return the leftmost argument that is not None."""
cloudfiles/paths.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from functools import lru_cache
2
- from collections import namedtuple
2
+ from collections import namedtuple, defaultdict
3
3
  import orjson
4
4
  import os.path
5
5
  import posixpath
@@ -8,9 +8,10 @@ import sys
8
8
  import urllib.parse
9
9
 
10
10
  from typing import Tuple, Optional
11
+ from .typing import GetPathType
11
12
 
12
13
  from .exceptions import UnsupportedProtocolError
13
- from .lib import yellow, toabs, jsonify, mkdir
14
+ from .lib import yellow, toabs, jsonify, mkdir, toiter
14
15
  from .secrets import CLOUD_FILES_DIR
15
16
 
16
17
  ExtractedPath = namedtuple('ExtractedPath',
@@ -390,3 +391,30 @@ def to_https_protocol(cloudpath):
390
391
  cloudpath = cloudpath.replace(f"{alias}://", host, 1)
391
392
 
392
393
  return cloudpath.replace("s3://", "", 1)
394
+
395
+ def find_common_buckets(cloudpaths:GetPathType):
396
+ cloudpaths, is_multiple = toiter(cloudpaths, is_iter=True)
397
+ clustered = defaultdict(list)
398
+
399
+ for path in cloudpaths:
400
+ pth = path
401
+ byte_range = None
402
+ if isinstance(path, dict):
403
+ pth = path["path"]
404
+ byte_range = path["byte_range"]
405
+
406
+ epath = extract(pth)
407
+ if epath.protocol == "file":
408
+ path = os.sep.join(asfilepath(epath).split(os.sep)[2:])
409
+ bucketpath = "file://" + os.sep.join(asfilepath(epath).split(os.sep)[:2])
410
+ else:
411
+ path = epath.path
412
+ bucketpath = asbucketpath(epath)
413
+
414
+ clustered[bucketpath].append({
415
+ "path": path,
416
+ "start": (byte_range[0] if byte_range else None), # type: ignore
417
+ "end": (byte_range[1] if byte_range else None), # type: ignore
418
+ })
419
+
420
+ return clustered
@@ -39,6 +39,9 @@ class ResumableFileSet:
39
39
  self.conn = sqlite3.connect(db_path)
40
40
  self.lease_msec = int(lease_msec)
41
41
 
42
+ self._total = 0
43
+ self._total_dirty = True
44
+
42
45
  def __del__(self):
43
46
  self.conn.close()
44
47
 
@@ -46,6 +49,7 @@ class ResumableFileSet:
46
49
  cur = self.conn.cursor()
47
50
  cur.execute("""DROP TABLE IF EXISTS filelist""")
48
51
  cur.execute("""DROP TABLE IF EXISTS xfermeta""")
52
+ cur.execute("""DROP TABLE IF EXISTS stats""")
49
53
  cur.close()
50
54
 
51
55
  def create(self, src, dest, reencode=None):
@@ -53,6 +57,7 @@ class ResumableFileSet:
53
57
 
54
58
  cur.execute("""DROP TABLE IF EXISTS filelist""")
55
59
  cur.execute("""DROP TABLE IF EXISTS xfermeta""")
60
+ cur.execute("""DROP TABLE IF EXISTS stats""")
56
61
 
57
62
  cur.execute(f"""
58
63
  CREATE TABLE xfermeta (
@@ -78,6 +83,18 @@ class ResumableFileSet:
78
83
  """)
79
84
  cur.execute("CREATE INDEX resumableidxfin ON filelist(finished,lease)")
80
85
  cur.execute("CREATE INDEX resumableidxfile ON filelist(filename)")
86
+
87
+ cur.execute(f"""
88
+ CREATE TABLE stats (
89
+ id {INTEGER} PRIMARY KEY {AUTOINC},
90
+ key TEXT NOT NULL,
91
+ value {INTEGER}
92
+ )
93
+ """)
94
+ cur.execute(
95
+ "INSERT INTO stats(id, key, value) VALUES (?,?,?)",
96
+ [1, 'finished', 0]
97
+ )
81
98
  cur.close()
82
99
 
83
100
  def insert(self, fname_iter):
@@ -91,7 +108,9 @@ class ResumableFileSet:
91
108
  cur.execute(f"INSERT INTO filelist(filename,finished,lease) VALUES {bindlist}", filenames)
92
109
  cur.execute("commit")
93
110
 
94
- cur.close()
111
+ cur.close()
112
+
113
+ self._total_dirty = True
95
114
 
96
115
  def metadata(self):
97
116
  cur = self.conn.cursor()
@@ -111,6 +130,7 @@ class ResumableFileSet:
111
130
  for filenames in sip(fname_iter, SQLITE_MAX_PARAMS):
112
131
  bindlist = ",".join([f"{BIND}"] * len(filenames))
113
132
  cur.execute(f"UPDATE filelist SET finished = 1 WHERE filename in ({bindlist})", filenames)
133
+ cur.execute(f"UPDATE stats SET value = value + {len(filenames)} WHERE id = 1")
114
134
  cur.execute("commit")
115
135
  cur.close()
116
136
 
@@ -120,7 +140,7 @@ class ResumableFileSet:
120
140
  N = 0
121
141
 
122
142
  while True:
123
- ts = now_msec() + self.lease_msec
143
+ ts = now_msec()
124
144
  cur.execute(f"""SELECT filename FROM filelist WHERE finished = 0 AND lease <= {ts} LIMIT {int(block_size)}""")
125
145
  rows = cur.fetchmany(block_size)
126
146
  N += len(rows)
@@ -140,31 +160,46 @@ class ResumableFileSet:
140
160
 
141
161
  cur.close()
142
162
 
143
- def total(self):
163
+ def _scalar_query(self, sql:str) -> int:
144
164
  cur = self.conn.cursor()
145
- cur.execute(f"SELECT count(filename) FROM filelist")
165
+ cur.execute(sql)
146
166
  res = cur.fetchone()
147
167
  cur.close()
148
168
  return int(res[0])
149
169
 
170
+ def total(self):
171
+ """Returns the total number of tasks (both processed and unprocessed)."""
172
+ if not self._total_dirty:
173
+ return self._total
174
+
175
+ self._total = self._scalar_query(f"SELECT max(id) FROM filelist")
176
+ self._total_dirty = False
177
+ return self._total
178
+
179
+ def finished(self):
180
+ return self._scalar_query(f"SELECT value FROM stats WHERE id = 1")
181
+
150
182
  def remaining(self):
151
- cur = self.conn.cursor()
152
- cur.execute(f"SELECT count(filename) FROM filelist WHERE finished = 0")
153
- res = cur.fetchone()
154
- cur.close()
155
- return int(res[0])
183
+ return self.total() - self.finished()
184
+
185
+ def num_leased(self):
186
+ ts = int(now_msec())
187
+ return self._scalar_query(
188
+ f"SELECT count(filename) FROM filelist WHERE finished = 0 AND lease > {ts}"
189
+ )
156
190
 
157
191
  def available(self):
158
- cur = self.conn.cursor()
159
- ts = int(now_msec() + self.lease_msec)
160
- cur.execute(f"SELECT count(filename) FROM filelist WHERE finished = 0 AND lease < {ts}")
161
- res = cur.fetchone()
162
- cur.close()
163
- return int(res[0])
192
+ ts = int(now_msec())
193
+ return self._scalar_query(
194
+ f"SELECT count(filename) FROM filelist WHERE finished = 0 AND lease <= {ts}"
195
+ )
164
196
 
165
197
  def release(self):
198
+ cur = self.conn.cursor()
166
199
  cur.execute(f"UPDATE filelist SET lease = 0")
167
200
  cur.execute("commit")
201
+ cur.close()
202
+
168
203
 
169
204
  def __len__(self):
170
205
  return self.remaining()
@@ -27,7 +27,7 @@ import cloudfiles.paths
27
27
  from cloudfiles import CloudFiles
28
28
  from cloudfiles.resumable_tools import ResumableTransfer
29
29
  from cloudfiles.compression import transcode
30
- from cloudfiles.paths import extract, get_protocol
30
+ from cloudfiles.paths import extract, get_protocol, find_common_buckets
31
31
  from cloudfiles.lib import (
32
32
  mkdir, toabs, sip, toiter,
33
33
  first, red, green,
@@ -184,10 +184,6 @@ def cp(
184
184
 
185
185
  If source is "-" read newline delimited filenames from stdin.
186
186
  If destination is "-" output to stdout.
187
-
188
- Note that for gs:// to gs:// transfers, the gsutil
189
- tool is more efficient because the files never leave
190
- Google's network.
191
187
  """
192
188
  use_stdout = (destination == '-')
193
189
  if len(source) > 1 and not ispathdir(destination) and not use_stdout:
@@ -330,6 +326,163 @@ def _cp_stdout(src, no_sign_request, paths):
330
326
  content = res["content"].decode("utf8")
331
327
  sys.stdout.write(content)
332
328
 
329
+ @main.command()
330
+ @click.argument("source", nargs=-1)
331
+ @click.argument("destination", nargs=1)
332
+ @click.option('--progress', is_flag=True, default=False, help="Show transfer progress.", show_default=True)
333
+ @click.option('-b', '--block-size', default=128, help="Number of files to download at a time.", show_default=True)
334
+ @click.option('--part-bytes', default=int(1e8), help="Composite upload threshold in bytes. Splits a file into pieces for some cloud services like gs and s3.", show_default=True)
335
+ @click.option('--no-sign-request', is_flag=True, default=False, help="Use s3 in anonymous mode (don't sign requests) for the source.", show_default=True)
336
+ @click.pass_context
337
+ def mv(
338
+ ctx, source, destination,
339
+ progress, block_size,
340
+ part_bytes, no_sign_request,
341
+ ):
342
+ """
343
+ Move one or more files from a source to destination.
344
+
345
+ If source is "-" read newline delimited filenames from stdin.
346
+ If destination is "-" output to stdout.
347
+ """
348
+ if len(source) > 1 and not ispathdir(destination):
349
+ print("cloudfiles: destination must be a directory for multiple source files.")
350
+ return
351
+
352
+ ctx.ensure_object(dict)
353
+ parallel = int(ctx.obj.get("parallel", 1))
354
+
355
+ for src in source:
356
+ _mv_single(
357
+ src, destination,
358
+ progress, block_size,
359
+ part_bytes, no_sign_request,
360
+ parallel
361
+ )
362
+
363
+ def _mv_single(
364
+ source, destination,
365
+ progress, block_size,
366
+ part_bytes, no_sign_request,
367
+ parallel
368
+ ):
369
+ use_stdin = (source == '-')
370
+
371
+ nsrc = normalize_path(source)
372
+ ndest = normalize_path(destination)
373
+
374
+ issrcdir = (ispathdir(source) or CloudFiles(nsrc).isdir()) and use_stdin == False
375
+ isdestdir = (ispathdir(destination) or CloudFiles(ndest).isdir())
376
+
377
+ ensrc = cloudfiles.paths.extract(nsrc)
378
+ endest = cloudfiles.paths.extract(ndest)
379
+
380
+ if ensrc.protocol == "file" and endest.protocol == "file" and issrcdir:
381
+ shutil.move(nsrc.replace("file://", ""), ndest.replace("file://", ""))
382
+ return
383
+
384
+ recursive = issrcdir
385
+
386
+ # For more information see:
387
+ # https://cloud.google.com/storage/docs/gsutil/commands/cp#how-names-are-constructed
388
+ # Try to follow cp rules. If the directory exists,
389
+ # copy the base source directory into the dest directory
390
+ # If the directory does not exist, then we copy into
391
+ # the dest directory.
392
+ # Both x* and x** should not copy the base directory
393
+ if recursive and nsrc[-1] != "*":
394
+ if isdestdir:
395
+ if nsrc[-1] == '/':
396
+ nsrc = nsrc[:-1]
397
+ ndest = cloudpathjoin(ndest, os.path.basename(nsrc))
398
+
399
+ # The else clause here is to handle single file transfers
400
+ srcpath = nsrc if issrcdir else os.path.dirname(nsrc)
401
+ many, flat, prefix = get_mfp(nsrc, recursive)
402
+
403
+ if issrcdir and not many:
404
+ print(f"cloudfiles: {source} is a directory (not copied).")
405
+ return
406
+
407
+ xferpaths = os.path.basename(nsrc)
408
+ if use_stdin:
409
+ xferpaths = sys.stdin.readlines()
410
+ xferpaths = [ x.replace("\n", "") for x in xferpaths ]
411
+ prefix = os.path.commonprefix(xferpaths)
412
+ xferpaths = [ x.replace(prefix, "") for x in xferpaths ]
413
+ srcpath = cloudpathjoin(srcpath, prefix)
414
+ elif many:
415
+ xferpaths = CloudFiles(
416
+ srcpath, no_sign_request=no_sign_request
417
+ ).list(prefix=prefix, flat=flat)
418
+
419
+ destpath = ndest
420
+ if isinstance(xferpaths, str):
421
+ destpath = ndest if isdestdir else os.path.dirname(ndest)
422
+ elif not isdestdir:
423
+ if os.path.exists(ndest.replace("file://", "")):
424
+ print(f"cloudfiles: {ndest} is not a directory (not copied).")
425
+ return
426
+
427
+ if not isinstance(xferpaths, str):
428
+ if parallel == 1:
429
+ _mv(srcpath, destpath, progress, block_size, part_bytes, no_sign_request, xferpaths)
430
+ return
431
+
432
+ total = None
433
+ try:
434
+ total = len(xferpaths)
435
+ except TypeError:
436
+ pass
437
+
438
+ fn = partial(_mv, srcpath, destpath, False, block_size, part_bytes, no_sign_request)
439
+
440
+ with tqdm(desc="Moving", total=total, disable=(not progress)) as pbar:
441
+ with pathos.pools.ProcessPool(parallel) as executor:
442
+ for _ in executor.imap(fn, sip(xferpaths, block_size)):
443
+ pbar.update(block_size)
444
+ else:
445
+ cfsrc = CloudFiles(srcpath, progress=progress, no_sign_request=no_sign_request)
446
+ if not cfsrc.exists(xferpaths):
447
+ print(f"cloudfiles: source path not found: {cfsrc.abspath(xferpaths).replace('file://','')}")
448
+ return
449
+
450
+ cfdest = CloudFiles(
451
+ destpath,
452
+ progress=progress,
453
+ composite_upload_threshold=part_bytes,
454
+ )
455
+
456
+ cfsrc.move(xferpaths, ndest)
457
+
458
+ def _mv(src, dst, progress, block_size, part_bytes, no_sign_request, paths):
459
+ cfsrc = CloudFiles(src, progress=progress, composite_upload_threshold=part_bytes, no_sign_request=no_sign_request)
460
+ cfdest = CloudFiles(dst, progress=progress, composite_upload_threshold=part_bytes)
461
+ cfsrc.moves(
462
+ cfdest, paths=paths, block_size=block_size
463
+ )
464
+
465
+ @main.command()
466
+ @click.argument("sources", nargs=-1)
467
+ @click.option('--progress', is_flag=True, default=False, help="Show transfer progress.", show_default=True)
468
+ @click.option('--no-sign-request', is_flag=True, default=False, help="Use s3 in anonymous mode (don't sign requests) for the source.", show_default=True)
469
+ @click.pass_context
470
+ def touch(
471
+ ctx, sources,
472
+ progress, no_sign_request,
473
+ ):
474
+ sources = list(map(normalize_path, sources))
475
+ sources = [ src.replace("precomputed://", "") for src in sources ]
476
+ pbar = tqdm(total=len(sources), desc="Touch", disable=(not progress))
477
+
478
+ clustered = find_common_buckets(sources)
479
+
480
+ with pbar:
481
+ for bucket, items in clustered.items():
482
+ cf = CloudFiles(bucket, no_sign_request=no_sign_request, progress=False)
483
+ cf.touch(items)
484
+ pbar.update(len(items))
485
+
333
486
  @main.group("xfer")
334
487
  def xfergroup():
335
488
  """
@@ -1,26 +0,0 @@
1
- cloudfiles/__init__.py,sha256=pLB4CcV2l3Jgv_ni1520Np1pfzFj8Cpr87vNxFT3rNI,493
2
- cloudfiles/buckets.py,sha256=eRAYdDfvVpNyJyK5ryDRMwgNJUeEuFBJ6doWU2JkAcA,74
3
- cloudfiles/cloudfiles.py,sha256=QlrQlU94gqNhKWyOuP0xe58UEwk2x8wtZ7n9LKiyLpM,44854
4
- cloudfiles/compression.py,sha256=pqYdpu5vfFv-094BpfZ2pgRjVu7ESM9pAZC09P6E8bY,6150
5
- cloudfiles/connectionpools.py,sha256=aL8RiSjRepECfgAFmJcz80aJFKbou7hsbuEgugDKwB8,4814
6
- cloudfiles/exceptions.py,sha256=H2IcMlZoy2Bsn-6wCPwyLDjg66LZCyxtcf3s_p21FDw,770
7
- cloudfiles/gcs.py,sha256=_njJ7TpqwrHCjPHRGkBN5alCrCWKM2m9qdy5DhxMZ7U,3718
8
- cloudfiles/interfaces.py,sha256=4ICm7tnS8HHK5IKw-HYD653V-qndprPRK8e1cxUYKgA,36782
9
- cloudfiles/lib.py,sha256=fEqL5APu_WQhl2yxqQbwE7msHdu7U8pstAJw6LgoKO0,5142
10
- cloudfiles/paths.py,sha256=xadVh5Vw8wAack1cws5dzVIlYQ3r8h8lrP43umUSuT0,10547
11
- cloudfiles/resumable_tools.py,sha256=pK-VcoPjQ2BjGjvlvH4dDCBf6lNsqHG-weiBgxVFbzA,5838
12
- cloudfiles/scheduler.py,sha256=DqDANmOpB3NdzFgJDNMMibRIkCrXQqIh2XGL8GWoc9c,3668
13
- cloudfiles/secrets.py,sha256=791b5a8nWSBYtlleGzKeoYIR5jl-FI1bw6INRM4Wy-0,5295
14
- cloudfiles/threaded_queue.py,sha256=Nl4vfXhQ6nDLF8PZpSSBpww0M2zWtcd4DLs3W3BArBw,7082
15
- cloudfiles/typing.py,sha256=f3ZYkNfN9poxhGu5j-P0KCxjCCqSn9HAg5KiIPkjnCg,416
16
- cloudfiles_cli/LICENSE,sha256=Jna4xYE8CCQmaxjr5Fs-wmUBnIQJ1DGcNn9MMjbkprk,1538
17
- cloudfiles_cli/__init__.py,sha256=Wftt3R3F21QsHtWqx49ODuqT9zcSr0em7wk48kcH0WM,29
18
- cloudfiles_cli/cloudfiles_cli.py,sha256=eETIOK4QyztQcpA4ZRny21SobLtcrPDlzZ_JaKBmmmA,28449
19
- cloud_files-4.27.0.dist-info/AUTHORS,sha256=7E2vC894bbLPO_kvUuEB2LFZZbIxZn23HabxH7x0Hgo,266
20
- cloud_files-4.27.0.dist-info/LICENSE,sha256=Jna4xYE8CCQmaxjr5Fs-wmUBnIQJ1DGcNn9MMjbkprk,1538
21
- cloud_files-4.27.0.dist-info/METADATA,sha256=7xdozjXt0yT1OKZV47M9dWeip9PUx9Wj9adUcO0qZ_M,26804
22
- cloud_files-4.27.0.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
23
- cloud_files-4.27.0.dist-info/entry_points.txt,sha256=xlirb1FVhn1mbcv4IoyMEGumDqKOA4VMVd3drsRQxIg,51
24
- cloud_files-4.27.0.dist-info/pbr.json,sha256=C4Xk2iNLylqeAReQ_DjwEN6rVj4PC_6x96XsvMs7138,46
25
- cloud_files-4.27.0.dist-info/top_level.txt,sha256=xPyrST3okJbsmdCF5IC2gYAVxg_aD5AYVTnNo8UuoZU,26
26
- cloud_files-4.27.0.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- {"git_version": "e4b04bf", "is_release": true}
cloudfiles/buckets.py DELETED
@@ -1,10 +0,0 @@
1
-
2
-
3
-
4
-
5
-
6
- class Bucket:
7
- def __init__(self, cloudpath, secrets=None):
8
- pass
9
-
10
-