PyPI - cloud-files - Versions diffs - 4.26.0__py3-none-any.whl → 4.28.0__py3-none-any.whl - Mend

cloud-files 4.26.0py3-none-any.whl → 4.28.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{cloud_files-4.26.0.dist-info → cloud_files-4.28.0.dist-info}/AUTHORS +1 -0
{cloud_files-4.26.0.dist-info → cloud_files-4.28.0.dist-info}/METADATA +25 -15
cloud_files-4.28.0.dist-info/RECORD +25 -0
{cloud_files-4.26.0.dist-info → cloud_files-4.28.0.dist-info}/WHEEL +1 -1
cloud_files-4.28.0.dist-info/pbr.json +1 -0
cloudfiles/cloudfiles.py +179 -25
cloudfiles/interfaces.py +45 -8
cloudfiles/lib.py +5 -2
cloudfiles/paths.py +45 -6
cloudfiles/resumable_tools.py +50 -15
cloudfiles/secrets.py +18 -0
cloudfiles_cli/cloudfiles_cli.py +158 -5
cloud_files-4.26.0.dist-info/RECORD +0 -26
cloud_files-4.26.0.dist-info/pbr.json +0 -1
cloudfiles/buckets.py +0 -10
{cloud_files-4.26.0.dist-info → cloud_files-4.28.0.dist-info}/LICENSE +0 -0
{cloud_files-4.26.0.dist-info → cloud_files-4.28.0.dist-info}/entry_points.txt +0 -0
{cloud_files-4.26.0.dist-info → cloud_files-4.28.0.dist-info}/top_level.txt +0 -0

{cloud_files-4.26.0.dist-info → cloud_files-4.28.0.dist-info}/AUTHORS RENAMED Viewed

@@ -1,6 +1,7 @@
 Manuel Castro <macastro@princeton.edu>
 Nico Kemnitz <nkemnitz@princeton.edu>
 V24 <55334829+umarfarouk98@users.noreply.github.com>
+William Silversmith <william.silvermsith@gmail.com>
 William Silversmith <william.silversmith@gmail.com>
 madiganz <madiganz@users.noreply.github.com>
 ranlu <ranlu@users.noreply.github.com>

{cloud_files-4.26.0.dist-info → cloud_files-4.28.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cloud-files
-Version: 4.26.0
+Version: 4.28.0
 Summary: Fast access to cloud storage and local FS.
 Home-page: https://github.com/seung-lab/cloud-files/
 Author: William Silversmith
@@ -20,33 +20,33 @@ Requires-Python: >=3.7,<4.0
 Description-Content-Type: text/markdown
 License-File: LICENSE
 License-File: AUTHORS
-Requires-Dist: boto3 (>=1.4.7)
+Requires-Dist: boto3 >=1.4.7
 Requires-Dist: brotli
 Requires-Dist: crc32c
-Requires-Dist: chardet (>=3.0.4)
+Requires-Dist: chardet >=3.0.4
 Requires-Dist: click
-Requires-Dist: deflate (>=0.2.0)
+Requires-Dist: deflate >=0.2.0
 Requires-Dist: gevent
-Requires-Dist: google-auth (>=1.10.0)
-Requires-Dist: google-cloud-core (>=1.1.0)
-Requires-Dist: google-cloud-storage (>=1.31.1)
-Requires-Dist: google-crc32c (>=1.0.0)
+Requires-Dist: google-auth >=1.10.0
+Requires-Dist: google-cloud-core >=1.1.0
+Requires-Dist: google-cloud-storage >=1.31.1
+Requires-Dist: google-crc32c >=1.0.0
 Requires-Dist: orjson
 Requires-Dist: pathos
-Requires-Dist: protobuf (>=3.3.0)
-Requires-Dist: requests (>=2.22.0)
-Requires-Dist: six (>=1.14.0)
-Requires-Dist: tenacity (>=4.10.0)
+Requires-Dist: protobuf >=3.3.0
+Requires-Dist: requests >=2.22.0
+Requires-Dist: six >=1.14.0
+Requires-Dist: tenacity >=4.10.0
 Requires-Dist: tqdm
-Requires-Dist: urllib3 (>=1.26.3)
+Requires-Dist: urllib3 >=1.26.3
 Requires-Dist: zstandard
-Requires-Dist: rsa (>=4.7.2)
+Requires-Dist: rsa >=4.7.2
 Requires-Dist: fasteners
 Provides-Extra: numpy
 Requires-Dist: numpy ; extra == 'numpy'
 Provides-Extra: test
 Requires-Dist: pytest ; extra == 'test'
-Requires-Dist: moto (>=5) ; extra == 'test'
+Requires-Dist: moto >=5 ; extra == 'test'
 [![PyPI version](https://badge.fury.io/py/cloud-files.svg)](https://badge.fury.io/py/cloud-files) [![Test Suite](https://github.com/seung-lab/cloud-files/workflows/Test%20Suite/badge.svg)](https://github.com/seung-lab/cloud-files/actions?query=workflow%3A%22Test+Suite%22)
@@ -88,6 +88,12 @@ cf.delete(paths, parallel=2) # threaded + two processes
 boolean = cf.exists('filename')
 results = cf.exists([ 'filename_1', ... ]) # threaded
+cf.move("a", "gs://bucket/b")
+cf.moves("gs://bucket/", [ ("a", "b") ])
+cf.touch("example")
+cf.touch([ "example", "example2" ])
 # for single files
 cf = CloudFile("gs://bucket/file1")
 info = cf.head()
@@ -464,6 +470,10 @@ cloudfiles -p 2 cp --progress -r s3://bkt/ gs://bkt2/
 cloudfiles cp -c br s3://bkt/file.txt gs://bkt2/
 # decompress
 cloudfiles cp -c none s3://bkt/file.txt gs://bkt2/
+# move or rename files
+cloudfiles mv s3://bkt/file.txt gs://bkt2/
+# create an empty file if not existing
+cloudfiles touch s3://bkt/empty.txt
 # pass from stdin (use "-" for source argument)
 find some_dir | cloudfiles cp - s3://bkt/
 # resumable transfers

cloud_files-4.28.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,25 @@
+cloudfiles/__init__.py,sha256=pLB4CcV2l3Jgv_ni1520Np1pfzFj8Cpr87vNxFT3rNI,493
+cloudfiles/cloudfiles.py,sha256=KcHgVjLjPcOsgXVTr3edFFWcuz53xcOtWpxznkiAMos,48989
+cloudfiles/compression.py,sha256=pqYdpu5vfFv-094BpfZ2pgRjVu7ESM9pAZC09P6E8bY,6150
+cloudfiles/connectionpools.py,sha256=aL8RiSjRepECfgAFmJcz80aJFKbou7hsbuEgugDKwB8,4814
+cloudfiles/exceptions.py,sha256=H2IcMlZoy2Bsn-6wCPwyLDjg66LZCyxtcf3s_p21FDw,770
+cloudfiles/gcs.py,sha256=_njJ7TpqwrHCjPHRGkBN5alCrCWKM2m9qdy5DhxMZ7U,3718
+cloudfiles/interfaces.py,sha256=lD5hUNTJDkxSnIVRG6my5exEDN72Cqt3VwPfHmYaNDo,37074
+cloudfiles/lib.py,sha256=YOoaEkKtkXc9FdpNnC4FbZJVG1ujbyoxN07WKdUOJcs,5200
+cloudfiles/paths.py,sha256=RnZDDYGUKD6KBFYERgg46WQU8AO-aKlV9klfGcWvOQc,11399
+cloudfiles/resumable_tools.py,sha256=NyuSoGh1SaP5akrHCpd9kgy2-JruEWrHW9lvJxV7jpE,6711
+cloudfiles/scheduler.py,sha256=DqDANmOpB3NdzFgJDNMMibRIkCrXQqIh2XGL8GWoc9c,3668
+cloudfiles/secrets.py,sha256=791b5a8nWSBYtlleGzKeoYIR5jl-FI1bw6INRM4Wy-0,5295
+cloudfiles/threaded_queue.py,sha256=Nl4vfXhQ6nDLF8PZpSSBpww0M2zWtcd4DLs3W3BArBw,7082
+cloudfiles/typing.py,sha256=f3ZYkNfN9poxhGu5j-P0KCxjCCqSn9HAg5KiIPkjnCg,416
+cloudfiles_cli/LICENSE,sha256=Jna4xYE8CCQmaxjr5Fs-wmUBnIQJ1DGcNn9MMjbkprk,1538
+cloudfiles_cli/__init__.py,sha256=Wftt3R3F21QsHtWqx49ODuqT9zcSr0em7wk48kcH0WM,29
+cloudfiles_cli/cloudfiles_cli.py,sha256=HGlX8oyIL7XASl57KXMlVQunF7pA_MVbMq-lpPA90LY,33911
+cloud_files-4.28.0.dist-info/AUTHORS,sha256=BFVmobgAhaVFI5fqbuqAY5XmBQxe09ZZAsAOTy87hKQ,318
+cloud_files-4.28.0.dist-info/LICENSE,sha256=Jna4xYE8CCQmaxjr5Fs-wmUBnIQJ1DGcNn9MMjbkprk,1538
+cloud_files-4.28.0.dist-info/METADATA,sha256=gY-SuRG7iU8PM4ckUSGrZyuhGiOkCt6qQ4bsLhYknBY,27046
+cloud_files-4.28.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+cloud_files-4.28.0.dist-info/entry_points.txt,sha256=xlirb1FVhn1mbcv4IoyMEGumDqKOA4VMVd3drsRQxIg,51
+cloud_files-4.28.0.dist-info/pbr.json,sha256=nMag4w8eL7zh1OBHgElm2bR8KdiVNL-xEh79OlA1LPI,46
+cloud_files-4.28.0.dist-info/top_level.txt,sha256=xPyrST3okJbsmdCF5IC2gYAVxg_aD5AYVTnNo8UuoZU,26
+cloud_files-4.28.0.dist-info/RECORD,,

{cloud_files-4.26.0.dist-info → cloud_files-4.28.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.37.0)
+Generator: bdist_wheel (0.43.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

cloud_files-4.28.0.dist-info/pbr.json ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"git_version": "e9510b0", "is_release": true}

cloudfiles/cloudfiles.py CHANGED Viewed

@@ -2,7 +2,7 @@ from typing import (
   Any, Dict, Optional,
   Union, List, Tuple,
   Callable, Generator,
-  Iterable, cast, BinaryIO
+  Sequence, cast, BinaryIO
 )
 from queue import Queue
@@ -29,10 +29,10 @@ from . import compression, paths, gcs
 from .exceptions import UnsupportedProtocolError, MD5IntegrityError, CRC32CIntegrityError
 from .lib import (
   mkdir, totalfn, toiter, scatter, jsonify, nvl,
-  duplicates, first, sip,
+  duplicates, first, sip, touch,
   md5, crc32c, decode_crc32c_b64
 )
-from .paths import ALIASES
+from .paths import ALIASES, find_common_buckets
 from .secrets import CLOUD_FILES_DIR, CLOUD_FILES_LOCK_DIR
 from .threaded_queue import ThreadedQueue, DEFAULT_THREADS
 from .typing import (
@@ -44,7 +44,7 @@ from .scheduler import schedule_jobs
 from .interfaces import (
   FileInterface, HttpInterface,
   S3Interface, GoogleCloudStorageInterface,
-  MemoryInterface
+  MemoryInterface, CaveInterface,
 )
 INTERFACES = {
@@ -54,6 +54,7 @@ INTERFACES = {
   'http': HttpInterface,
   'https': HttpInterface,
   'mem': MemoryInterface,
+  'middleauth+https': CaveInterface,
 }
 for alias in ALIASES:
   INTERFACES[alias] = S3Interface
@@ -181,7 +182,7 @@ def path_to_byte_range_tags(path):
   if isinstance(path, str):
     return (path, None, None, None)
   return (path['path'], path.get('start', None), path.get('end', None), path.get('tags', None))
 def dl(
   cloudpaths:GetPathType, raw:bool=False, **kwargs
 ) -> Union[bytes,List[dict]]:
@@ -192,23 +193,8 @@ def dl(
   dict.
   """
   cloudpaths, is_multiple = toiter(cloudpaths, is_iter=True)
-  clustered = defaultdict(list)
-  total = 0
-  for path in cloudpaths:
-    pth = path
-    byte_range = None
-    if isinstance(path, dict):
-      pth = path["path"]
-      byte_range = path["byte_range"]
-    epath = paths.extract(pth)
-    bucketpath = paths.asbucketpath(epath)
-    clustered[bucketpath].append({
-      "path": epath.path,
-      "start": (byte_range[0] if byte_range else None), # type: ignore
-      "end": (byte_range[1] if byte_range else None), # type: ignore
-    })
-    total += 1
+  clustered = find_common_buckets(cloudpaths)
+  total = sum([ len(bucket) for bucket in clustered.values() ])
   progress = kwargs.get("progress", False) and total > 1
   pbar = tqdm(total=total, desc="Downloading", disable=(not progress))
@@ -918,6 +904,60 @@ class CloudFiles:
     )
     return len(results)
+  def touch(
+    self,
+    paths:GetPathType,
+    progress:Optional[bool] = None,
+    total:Optional[int] = None,
+    nocopy:bool = False,
+  ):
+    """
+    Create a zero byte file if it doesn't exist.
+    """
+    paths = toiter(paths)
+    progress = nvl(progress, self.progress)
+    total = totalfn(paths, total)
+    if self.protocol == "file":
+      basepath = self.cloudpath.replace("file://", "")
+      for path in tqdm(paths, disable=(not progress), total=total):
+        pth = path
+        if isinstance(path, dict):
+          pth = path["path"]
+        touch(self.join(basepath, pth))
+      return
+    results = self.exists(paths, total=total, progress=progress)
+    dne = [
+      (fname, b'')
+      for fname, exists in results.items()
+      if not exists
+    ]
+    self.puts(dne, progress=progress)
+    # def thunk_copy(path):
+    #   with self._get_connection() as conn:
+    #     conn.copy_file(path, self._path.bucket, self.join(self._path.path, path))
+    #   return 1
+    # if not nocopy:
+    #   already_exists = (
+    #     fname
+    #     for fname, exists in results.items()
+    #     if exists
+    #   )
+    #   results = schedule_jobs(
+    #     fns=( partial(thunk_copy, path) for path in already_exists ),
+    #     progress=progress,
+    #     total=(total - len(dne)),
+    #     concurrency=self.num_threads,
+    #     green=self.green,
+    #     count_return=True,
+    #   )
   def list(
     self, prefix:str = "", flat:bool = False
   ) -> Generator[str,None,None]:
@@ -952,6 +992,7 @@ class CloudFiles:
     reencode:Optional[str] = None,
     content_type:Optional[str] = None,
     allow_missing:bool = False,
+    progress:Optional[bool] = None,
   ) -> None:
     """
     Transfer all files from this CloudFiles storage
@@ -968,7 +1009,7 @@ class CloudFiles:
       - gs->gs: Uses GCS copy API to minimize data movement
       - s3->s3: Uses boto s3 copy API to minimize data movement
-    cf_src: another CloudFiles instance or cloudpath
+    cf_dest: another CloudFiles instance or cloudpath
     paths: if None transfer all files from src, else if
       an iterable, transfer only these files.
@@ -996,7 +1037,8 @@ class CloudFiles:
     return cf_dest.transfer_from(
       self, paths, block_size,
       reencode, content_type,
-      allow_missing,
+      allow_missing,
+      progress,
     )
   def transfer_from(
@@ -1007,6 +1049,7 @@ class CloudFiles:
     reencode:Optional[str] = None,
     content_type:Optional[str] = None,
     allow_missing:bool = False,
+    progress:Optional[bool] = None,
   ) -> None:
     """
     Transfer all files from the source CloudFiles storage
@@ -1053,7 +1096,15 @@ class CloudFiles:
     total = totalfn(paths, None)
-    with tqdm(desc="Transferring", total=total, disable=(not self.progress)) as pbar:
+    disable = progress
+    if disable is None:
+      disable = self.progress
+    if disable is None:
+      disable = False
+    else:
+      disable = not disable
+    with tqdm(desc="Transferring", total=total, disable=disable) as pbar:
       if (
         cf_src.protocol == "file"
         and self.protocol == "file"
@@ -1210,6 +1261,9 @@ class CloudFiles:
           else:
             raise
+        if dest_path == '':
+          dest_path = src_path
         to_upload.append({
           "path": dest_path,
           "content": handle,
@@ -1261,6 +1315,99 @@ class CloudFiles:
     )
     return len(results)
+  def move(self, src:str, dest:str):
+    """Move (rename) src to dest.
+    src and dest do not have to be on the same filesystem.
+    """
+    epath = paths.extract(dest)
+    full_cloudpath = paths.asprotocolpath(epath)
+    dest_cloudpath = paths.dirname(full_cloudpath)
+    base_dest = paths.basename(full_cloudpath)
+    return self.moves(dest_cloudpath, [
+      (src, base_dest)
+    ], block_size=1, progress=False)
+  def moves(
+    self,
+    cf_dest:Any,
+    paths:Union[Sequence[str], Sequence[Tuple[str, str]]],
+    block_size:int = 64,
+    total:Optional[int] = None,
+    progress:Optional[bool] = None,
+  ):
+    """
+    Move (rename) files.
+    pairs: [ (src, dest), (src, dest), ... ]
+    """
+    if isinstance(cf_dest, str):
+      cf_dest = CloudFiles(
+        cf_dest, progress=False,
+        green=self.green, num_threads=self.num_threads,
+      )
+    total = totalfn(paths, total)
+    disable = not (self.progress if progress is None else progress)
+    if self.protocol == "file" and cf_dest.protocol == "file":
+      self.__moves_file_to_file(
+        cf_dest, paths, total,
+        disable, block_size
+      )
+      return
+    pbar = tqdm(total=total, disable=disable, desc="Moving")
+    with pbar:
+      for subpairs in sip(paths, block_size):
+        subpairs = [
+          ((pair, pair) if isinstance(pair, str) else pair)
+          for pair in subpairs
+        ]
+        self.transfer_to(cf_dest, paths=(
+          {
+            "path": src,
+            "dest_path": dest,
+          }
+          for src, dest in subpairs
+        ), progress=False)
+        self.delete(( src for src, dest in subpairs ), progress=False)
+        pbar.update(len(subpairs))
+  def __moves_file_to_file(
+    self,
+    cf_dest:Any,
+    paths:Union[Sequence[str], Sequence[Tuple[str,str]]],
+    total:Optional[int],
+    disable:bool,
+    block_size:int,
+  ):
+    for pair in tqdm(paths, total=total, disable=disable, desc="Moving"):
+      if isinstance(pair, str):
+        src = pair
+        dest = pair
+      else:
+        (src, dest) = pair
+      src = self.join(self.cloudpath, src).replace("file://", "")
+      dest = cf_dest.join(cf_dest.cloudpath, dest).replace("file://", "")
+      if os.path.isdir(dest):
+        dest = cf_dest.join(dest, os.path.basename(src))
+      else:
+        mkdir(os.path.dirname(dest))
+      src, encoding = FileInterface.get_encoded_file_path(src)
+      _, dest_ext = os.path.splitext(dest)
+      dest_ext_compress = FileInterface.get_extension(encoding)
+      if dest_ext_compress != dest_ext:
+        dest += dest_ext_compress
+      shutil.move(src, dest)
   def join(self, *paths:str) -> str:
     """
     Convenience method for joining path strings
@@ -1439,6 +1586,13 @@ class CloudFile:
       reencode=reencode,
     )
+  def touch(self):
+    return self.cf.touch(self.filename)
+  def move(self, dest):
+    """Move (rename) this file to dest."""
+    return self.cf.move(self.filename, dest)
   def __len__(self):
     return self.size()

cloudfiles/interfaces.py CHANGED Viewed

@@ -24,7 +24,12 @@ from .compression import COMPRESSION_TYPES
 from .connectionpools import S3ConnectionPool, GCloudBucketPool, MemoryPool, MEMORY_DATA
 from .exceptions import MD5IntegrityError, CompressionError
 from .lib import mkdir, sip, md5, validate_s3_multipart_etag
-from .secrets import http_credentials, CLOUD_FILES_DIR, CLOUD_FILES_LOCK_DIR
+from .secrets import (
+  http_credentials,
+  cave_credentials,
+  CLOUD_FILES_DIR,
+  CLOUD_FILES_LOCK_DIR,
+)
 COMPRESSION_EXTENSIONS = ('.gz', '.br', '.zstd','.bz2','.xz')
 GZIP_TYPES = (True, 'gzip', 1)
@@ -469,6 +474,14 @@ class MemoryInterface(StorageInterface):
     return None
+  def copy_file(self, src_path, dest_bucket, dest_key):
+    key = self.get_path_to_file(src_path)
+    with MEM_BUCKET_POOL_LOCK:
+     pool = MEM_POOL[MemoryPoolParams(dest_bucket)]
+    dest_bucket = pool.get_connection(None, None)
+    dest_bucket[dest_key] = self._data[key]
+    return True
   def exists(self, file_path):
     path = self.get_path_to_file(file_path)
     return path in self._data or any(( (path + ext in self._data) for ext in COMPRESSION_EXTENSIONS ))
@@ -731,6 +744,9 @@ class HttpInterface(StorageInterface):
     if secrets and 'user' in secrets and 'password' in secrets:
       self.session.auth = (secrets['user'], secrets['password'])
+  def default_headers(self):
+    return {}
   def get_path_to_file(self, file_path):
     return posixpath.join(self._path.host, self._path.path, file_path)
@@ -749,7 +765,8 @@ class HttpInterface(StorageInterface):
   @retry
   def head(self, file_path):
     key = self.get_path_to_file(file_path)
-    with self.session.head(key) as resp:
+    headers = self.default_headers()
+    with self.session.head(key, headers=headers) as resp:
       resp.raise_for_status()
       return resp.headers
@@ -761,13 +778,14 @@ class HttpInterface(StorageInterface):
   def get_file(self, file_path, start=None, end=None, part_size=None):
     key = self.get_path_to_file(file_path)
+    headers = self.default_headers()
     if start is not None or end is not None:
       start = int(start) if start is not None else 0
       end = int(end - 1) if end is not None else ''
-      headers = { "Range": "bytes={}-{}".format(start, end) }
-      resp = self.session.get(key, headers=headers)
-    else:
-      resp = self.session.get(key)
+      headers["Range"] = f"bytes={start}-{end}"
+    resp = self.session.get(key, headers=headers)
     if resp.status_code in (404, 403):
       return (None, None, None, None)
     resp.close()
@@ -788,7 +806,8 @@ class HttpInterface(StorageInterface):
   @retry
   def exists(self, file_path):
     key = self.get_path_to_file(file_path)
-    with self.session.get(key, stream=True) as resp:
+    headers = self.default_headers()
+    with self.session.get(key, stream=True, headers=headers) as resp:
       return resp.ok
   def files_exist(self, file_paths):
@@ -805,11 +824,15 @@ class HttpInterface(StorageInterface):
     if prefix and prefix[-1] != '/':
       prefix += '/'
+    headers = self.default_headers()
     @retry
     def request(token):
+      nonlocal headers
       results = self.session.get(
         f"https://storage.googleapis.com/storage/v1/b/{bucket}/o",
         params={ "prefix": prefix, "pageToken": token },
+        headers=headers,
       )
       results.raise_for_status()
       results.close()
@@ -832,12 +855,13 @@ class HttpInterface(StorageInterface):
     baseurl = posixpath.join(self._path.host, self._path.path)
     directories = ['']
+    headers = self.default_headers()
     while directories:
       directory = directories.pop()
       url = posixpath.join(baseurl, directory)
-      resp = requests.get(url)
+      resp = requests.get(url, headers=headers)
       resp.raise_for_status()
       if 'text/html' not in resp.headers["Content-Type"]:
@@ -1200,3 +1224,16 @@ class S3Interface(StorageInterface):
     with S3_BUCKET_POOL_LOCK:
       pool = S3_POOL[S3ConnectionPoolParams(service, self._path.bucket, self._request_payer)]
     pool.release_connection(self._conn)
+class CaveInterface(HttpInterface):
+  """
+  CAVE is an internal system that powers proofreading
+  systems in Seung Lab. If you have no idea what this
+  is, don't worry about it.
+  see: https://github.com/CAVEconnectome
+  """
+  def default_headers(self):
+    cred = cave_credentials()
+    return {
+      "Authorization": f"Bearer {cred['token']}",
+    }

cloudfiles/lib.py CHANGED Viewed

@@ -53,8 +53,11 @@ def mkdir(path):
   return path
 def touch(path):
-  mkdir(os.path.dirname(path))
-  open(path, 'a').close()
+  if os.path.exists(path):
+    os.utime(path)
+  else:
+    mkdir(os.path.dirname(path))
+    open(path, 'a').close()
 def nvl(*args):
   """Return the leftmost argument that is not None."""

cloudfiles/paths.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from functools import lru_cache
-from collections import namedtuple
+from collections import namedtuple, defaultdict
 import orjson
 import os.path
 import posixpath
@@ -8,9 +8,10 @@ import sys
 import urllib.parse
 from typing import Tuple, Optional
+from .typing import GetPathType
 from .exceptions import UnsupportedProtocolError
-from .lib import yellow, toabs, jsonify, mkdir
+from .lib import yellow, toabs, jsonify, mkdir, toiter
 from .secrets import CLOUD_FILES_DIR
 ExtractedPath = namedtuple('ExtractedPath',
@@ -26,7 +27,8 @@ ALIASES_FROM_FILE = None
 ALIASES = {}
 BASE_ALLOWED_PROTOCOLS = [
   'gs', 'file', 's3',
-  'http', 'https', 'mem'
+  'http', 'https', 'mem',
+  'middleauth+https', 'ngauth+https',
 ]
 ALLOWED_PROTOCOLS = list(BASE_ALLOWED_PROTOCOLS)
 ALLOWED_FORMATS = [
@@ -69,7 +71,13 @@ def cloudpath_error(cloudpath):
 def mkregexp():
   fmt_capture = r'|'.join(ALLOWED_FORMATS)
   fmt_capture = "(?:(?P<fmt>{})://)".format(fmt_capture)
-  proto_capture = r'|'.join(ALLOWED_PROTOCOLS)
+  allowed_protos = [
+    p.replace('+', r'\+')
+    for p in ALLOWED_PROTOCOLS
+  ]
+  proto_capture = r'|'.join(allowed_protos)
   proto_capture = "(?:(?P<proto>{})://)".format(proto_capture)
   regexp = "{}?{}?".format(fmt_capture, proto_capture)
   return regexp
@@ -292,8 +300,12 @@ def extract_format_protocol(cloudpath:str, allow_defaults=True) -> tuple:
   proto = m.group('proto')
   endpoint = None
-  if proto in ('http', 'https'):
-    cloudpath = proto + "://" + cloudpath
+  tmp_proto = None
+  if proto is not None:
+    tmp_proto = proto.replace("middleauth+", "").replace("ngauth+", "")
+  if tmp_proto in ('http', 'https'):
+    cloudpath = tmp_proto + "://" + cloudpath
     parse = urllib.parse.urlparse(cloudpath)
     endpoint = parse.scheme + "://" + parse.netloc
     cloudpath = cloudpath.replace(endpoint, '', 1)
@@ -379,3 +391,30 @@ def to_https_protocol(cloudpath):
     cloudpath = cloudpath.replace(f"{alias}://", host, 1)
   return cloudpath.replace("s3://", "", 1)
+def find_common_buckets(cloudpaths:GetPathType):
+  cloudpaths, is_multiple = toiter(cloudpaths, is_iter=True)
+  clustered = defaultdict(list)
+  for path in cloudpaths:
+    pth = path
+    byte_range = None
+    if isinstance(path, dict):
+      pth = path["path"]
+      byte_range = path["byte_range"]
+    epath = extract(pth)
+    if epath.protocol == "file":
+      path = os.sep.join(asfilepath(epath).split(os.sep)[2:])
+      bucketpath = "file://" + os.sep.join(asfilepath(epath).split(os.sep)[:2])
+    else:
+      path = epath.path
+      bucketpath = asbucketpath(epath)
+    clustered[bucketpath].append({
+      "path": path,
+      "start": (byte_range[0] if byte_range else None), # type: ignore
+      "end": (byte_range[1] if byte_range else None), # type: ignore
+    })
+  return clustered

cloudfiles/resumable_tools.py CHANGED Viewed

@@ -39,6 +39,9 @@ class ResumableFileSet:
     self.conn = sqlite3.connect(db_path)
     self.lease_msec = int(lease_msec)
+    self._total = 0
+    self._total_dirty = True
   def __del__(self):
     self.conn.close()
@@ -46,6 +49,7 @@ class ResumableFileSet:
     cur = self.conn.cursor()
     cur.execute("""DROP TABLE IF EXISTS filelist""")
     cur.execute("""DROP TABLE IF EXISTS xfermeta""")
+    cur.execute("""DROP TABLE IF EXISTS stats""")
     cur.close()
   def create(self, src, dest, reencode=None):
@@ -53,6 +57,7 @@ class ResumableFileSet:
     cur.execute("""DROP TABLE IF EXISTS filelist""")
     cur.execute("""DROP TABLE IF EXISTS xfermeta""")
+    cur.execute("""DROP TABLE IF EXISTS stats""")
     cur.execute(f"""
       CREATE TABLE xfermeta (
@@ -78,6 +83,18 @@ class ResumableFileSet:
     """)
     cur.execute("CREATE INDEX resumableidxfin ON filelist(finished,lease)")
     cur.execute("CREATE INDEX resumableidxfile ON filelist(filename)")
+    cur.execute(f"""
+      CREATE TABLE stats (
+        id {INTEGER} PRIMARY KEY {AUTOINC},
+        key TEXT NOT NULL,
+        value {INTEGER}
+      )
+    """)
+    cur.execute(
+      "INSERT INTO stats(id, key, value) VALUES (?,?,?)",
+      [1, 'finished', 0]
+    )
     cur.close()
   def insert(self, fname_iter):
@@ -91,7 +108,9 @@ class ResumableFileSet:
       cur.execute(f"INSERT INTO filelist(filename,finished,lease) VALUES {bindlist}", filenames)
       cur.execute("commit")
-    cur.close()
+    cur.close()
+    self._total_dirty = True
   def metadata(self):
     cur = self.conn.cursor()
@@ -111,6 +130,7 @@ class ResumableFileSet:
     for filenames in sip(fname_iter, SQLITE_MAX_PARAMS):
       bindlist = ",".join([f"{BIND}"] * len(filenames))
       cur.execute(f"UPDATE filelist SET finished = 1 WHERE filename in ({bindlist})", filenames)
+      cur.execute(f"UPDATE stats SET value = value + {len(filenames)} WHERE id = 1")
       cur.execute("commit")
     cur.close()
@@ -120,7 +140,7 @@ class ResumableFileSet:
     N = 0
     while True:
-      ts = now_msec() + self.lease_msec
+      ts = now_msec()
       cur.execute(f"""SELECT filename FROM filelist WHERE finished = 0 AND lease <= {ts} LIMIT {int(block_size)}""")
       rows = cur.fetchmany(block_size)
       N += len(rows)
@@ -140,31 +160,46 @@ class ResumableFileSet:
     cur.close()
-  def total(self):
+  def _scalar_query(self, sql:str) -> int:
     cur = self.conn.cursor()
-    cur.execute(f"SELECT count(filename) FROM filelist")
+    cur.execute(sql)
     res = cur.fetchone()
     cur.close()
     return int(res[0])
+  def total(self):
+    """Returns the total number of tasks (both processed and unprocessed)."""
+    if not self._total_dirty:
+      return self._total
+    self._total = self._scalar_query(f"SELECT max(id) FROM filelist")
+    self._total_dirty = False
+    return self._total
+  def finished(self):
+    return self._scalar_query(f"SELECT value FROM stats WHERE id = 1")
   def remaining(self):
-    cur = self.conn.cursor()
-    cur.execute(f"SELECT count(filename) FROM filelist WHERE finished = 0")
-    res = cur.fetchone()
-    cur.close()
-    return int(res[0])
+    return self.total() - self.finished()
+  def num_leased(self):
+    ts = int(now_msec())
+    return self._scalar_query(
+      f"SELECT count(filename) FROM filelist WHERE finished = 0 AND lease > {ts}"
+    )
   def available(self):
-    cur = self.conn.cursor()
-    ts = int(now_msec() + self.lease_msec)
-    cur.execute(f"SELECT count(filename) FROM filelist WHERE finished = 0 AND lease < {ts}")
-    res = cur.fetchone()
-    cur.close()
-    return int(res[0])
+    ts = int(now_msec())
+    return self._scalar_query(
+      f"SELECT count(filename) FROM filelist WHERE finished = 0 AND lease <= {ts}"
+    )
   def release(self):
+    cur = self.conn.cursor()
     cur.execute(f"UPDATE filelist SET lease = 0")
     cur.execute("commit")
+    cur.close()
   def __len__(self):
     return self.remaining()

cloudfiles/secrets.py CHANGED Viewed

@@ -137,6 +137,24 @@ def aws_credentials(bucket = '', service = 'aws', skip_files=False):
   AWS_CREDENTIALS_CACHE[service][bucket] = aws_credentials
   return aws_credentials
+CAVE_CREDENTIALS = None
+def cave_credentials():
+  global CAVE_CREDENTIALS
+  default_file_path = 'cave-secret.json'
+  path = secretpath(default_file_path)
+  if CAVE_CREDENTIALS:
+    return CAVE_CREDENTIALS
+  if os.path.exists(path):
+    with open(path, 'rt') as f:
+      CAVE_CREDENTIALS = json.loads(f.read())
+  else:
+    CAVE_CREDENTIALS = None
+  return CAVE_CREDENTIALS
 HTTP_CREDENTIALS = None
 def http_credentials():
   global HTTP_CREDENTIALS

cloudfiles_cli/cloudfiles_cli.py CHANGED Viewed

@@ -27,7 +27,7 @@ import cloudfiles.paths
 from cloudfiles import CloudFiles
 from cloudfiles.resumable_tools import ResumableTransfer
 from cloudfiles.compression import transcode
-from cloudfiles.paths import extract, get_protocol
+from cloudfiles.paths import extract, get_protocol, find_common_buckets
 from cloudfiles.lib import (
   mkdir, toabs, sip, toiter,
   first, red, green,
@@ -184,10 +184,6 @@ def cp(
   If source is "-" read newline delimited filenames from stdin.
   If destination is "-" output to stdout.
-  Note that for gs:// to gs:// transfers, the gsutil
-  tool is more efficient because the files never leave
-  Google's network.
   """
   use_stdout = (destination == '-')
   if len(source) > 1 and not ispathdir(destination) and not use_stdout:
@@ -330,6 +326,163 @@ def _cp_stdout(src, no_sign_request, paths):
     content = res["content"].decode("utf8")
     sys.stdout.write(content)
+@main.command()
+@click.argument("source", nargs=-1)
+@click.argument("destination", nargs=1)
+@click.option('--progress', is_flag=True, default=False, help="Show transfer progress.", show_default=True)
+@click.option('-b', '--block-size', default=128, help="Number of files to download at a time.", show_default=True)
+@click.option('--part-bytes', default=int(1e8), help="Composite upload threshold in bytes. Splits a file into pieces for some cloud services like gs and s3.", show_default=True)
+@click.option('--no-sign-request', is_flag=True, default=False, help="Use s3 in anonymous mode (don't sign requests) for the source.", show_default=True)
+@click.pass_context
+def mv(
+  ctx, source, destination,
+  progress, block_size,
+  part_bytes, no_sign_request,
+):
+  """
+  Move one or more files from a source to destination.
+  If source is "-" read newline delimited filenames from stdin.
+  If destination is "-" output to stdout.
+  """
+  if len(source) > 1 and not ispathdir(destination):
+    print("cloudfiles: destination must be a directory for multiple source files.")
+    return
+  ctx.ensure_object(dict)
+  parallel = int(ctx.obj.get("parallel", 1))
+  for src in source:
+    _mv_single(
+      src, destination,
+      progress, block_size,
+      part_bytes, no_sign_request,
+      parallel
+    )
+def _mv_single(
+  source, destination,
+  progress, block_size,
+  part_bytes, no_sign_request,
+  parallel
+):
+  use_stdin = (source == '-')
+  nsrc = normalize_path(source)
+  ndest = normalize_path(destination)
+  issrcdir = (ispathdir(source) or CloudFiles(nsrc).isdir()) and use_stdin == False
+  isdestdir = (ispathdir(destination) or CloudFiles(ndest).isdir())
+  ensrc = cloudfiles.paths.extract(nsrc)
+  endest = cloudfiles.paths.extract(ndest)
+  if ensrc.protocol == "file" and endest.protocol == "file" and issrcdir:
+    shutil.move(nsrc.replace("file://", ""), ndest.replace("file://", ""))
+    return
+  recursive = issrcdir
+  # For more information see:
+  # https://cloud.google.com/storage/docs/gsutil/commands/cp#how-names-are-constructed
+  # Try to follow cp rules. If the directory exists,
+  # copy the base source directory into the dest directory
+  # If the directory does not exist, then we copy into
+  # the dest directory.
+  # Both x* and x** should not copy the base directory
+  if recursive and nsrc[-1] != "*":
+    if isdestdir:
+      if nsrc[-1] == '/':
+        nsrc = nsrc[:-1]
+      ndest = cloudpathjoin(ndest, os.path.basename(nsrc))
+  # The else clause here is to handle single file transfers
+  srcpath = nsrc if issrcdir else os.path.dirname(nsrc)
+  many, flat, prefix = get_mfp(nsrc, recursive)
+  if issrcdir and not many:
+    print(f"cloudfiles: {source} is a directory (not copied).")
+    return
+  xferpaths = os.path.basename(nsrc)
+  if use_stdin:
+    xferpaths = sys.stdin.readlines()
+    xferpaths = [ x.replace("\n", "") for x in xferpaths ]
+    prefix = os.path.commonprefix(xferpaths)
+    xferpaths = [ x.replace(prefix, "") for x in xferpaths ]
+    srcpath = cloudpathjoin(srcpath, prefix)
+  elif many:
+    xferpaths = CloudFiles(
+      srcpath, no_sign_request=no_sign_request
+    ).list(prefix=prefix, flat=flat)
+  destpath = ndest
+  if isinstance(xferpaths, str):
+    destpath = ndest if isdestdir else os.path.dirname(ndest)
+  elif not isdestdir:
+    if os.path.exists(ndest.replace("file://", "")):
+      print(f"cloudfiles: {ndest} is not a directory (not copied).")
+      return
+  if not isinstance(xferpaths, str):
+    if parallel == 1:
+      _mv(srcpath, destpath, progress, block_size, part_bytes, no_sign_request, xferpaths)
+      return
+    total = None
+    try:
+      total = len(xferpaths)
+    except TypeError:
+      pass
+    fn = partial(_mv, srcpath, destpath, False, block_size, part_bytes, no_sign_request)
+    with tqdm(desc="Moving", total=total, disable=(not progress)) as pbar:
+      with pathos.pools.ProcessPool(parallel) as executor:
+        for _ in executor.imap(fn, sip(xferpaths, block_size)):
+          pbar.update(block_size)
+  else:
+    cfsrc = CloudFiles(srcpath, progress=progress, no_sign_request=no_sign_request)
+    if not cfsrc.exists(xferpaths):
+      print(f"cloudfiles: source path not found: {cfsrc.abspath(xferpaths).replace('file://','')}")
+      return
+    cfdest = CloudFiles(
+      destpath,
+      progress=progress,
+      composite_upload_threshold=part_bytes,
+    )
+    cfsrc.move(xferpaths, ndest)
+def _mv(src, dst, progress, block_size, part_bytes, no_sign_request, paths):
+  cfsrc = CloudFiles(src, progress=progress, composite_upload_threshold=part_bytes, no_sign_request=no_sign_request)
+  cfdest = CloudFiles(dst, progress=progress, composite_upload_threshold=part_bytes)
+  cfsrc.moves(
+    cfdest, paths=paths, block_size=block_size
+  )
+@main.command()
+@click.argument("sources", nargs=-1)
+@click.option('--progress', is_flag=True, default=False, help="Show transfer progress.", show_default=True)
+@click.option('--no-sign-request', is_flag=True, default=False, help="Use s3 in anonymous mode (don't sign requests) for the source.", show_default=True)
+@click.pass_context
+def touch(
+  ctx, sources,
+  progress, no_sign_request,
+):
+  sources = list(map(normalize_path, sources))
+  sources = [ src.replace("precomputed://", "") for src in sources ]
+  pbar = tqdm(total=len(sources), desc="Touch", disable=(not progress))
+  clustered = find_common_buckets(sources)
+  with pbar:
+    for bucket, items in clustered.items():
+      cf = CloudFiles(bucket, no_sign_request=no_sign_request, progress=False)
+      cf.touch(items)
+      pbar.update(len(items))
 @main.group("xfer")
 def xfergroup():
   """

cloud_files-4.26.0.dist-info/RECORD DELETED Viewed

@@ -1,26 +0,0 @@
-cloudfiles/__init__.py,sha256=pLB4CcV2l3Jgv_ni1520Np1pfzFj8Cpr87vNxFT3rNI,493
-cloudfiles/buckets.py,sha256=eRAYdDfvVpNyJyK5ryDRMwgNJUeEuFBJ6doWU2JkAcA,74
-cloudfiles/cloudfiles.py,sha256=YUf_-7DS8-2mCKOWT6mMKxl1glxMTEGomhpCnebtsy8,44801
-cloudfiles/compression.py,sha256=pqYdpu5vfFv-094BpfZ2pgRjVu7ESM9pAZC09P6E8bY,6150
-cloudfiles/connectionpools.py,sha256=aL8RiSjRepECfgAFmJcz80aJFKbou7hsbuEgugDKwB8,4814
-cloudfiles/exceptions.py,sha256=H2IcMlZoy2Bsn-6wCPwyLDjg66LZCyxtcf3s_p21FDw,770
-cloudfiles/gcs.py,sha256=_njJ7TpqwrHCjPHRGkBN5alCrCWKM2m9qdy5DhxMZ7U,3718
-cloudfiles/interfaces.py,sha256=Qqhjv2GIVw3ibaTgPpiGK97i7aBDgU9O0bBuOiLI2KM,36117
-cloudfiles/lib.py,sha256=fEqL5APu_WQhl2yxqQbwE7msHdu7U8pstAJw6LgoKO0,5142
-cloudfiles/paths.py,sha256=WHuMbVtKk9nm9akfNF9dgH94awnrWXVIo5mbCvtc0LQ,10302
-cloudfiles/resumable_tools.py,sha256=pK-VcoPjQ2BjGjvlvH4dDCBf6lNsqHG-weiBgxVFbzA,5838
-cloudfiles/scheduler.py,sha256=DqDANmOpB3NdzFgJDNMMibRIkCrXQqIh2XGL8GWoc9c,3668
-cloudfiles/secrets.py,sha256=3BSV2Hn8FGGn4QCs5FP3eGs4WEs5cIXRBsXuF0eIgIY,4918
-cloudfiles/threaded_queue.py,sha256=Nl4vfXhQ6nDLF8PZpSSBpww0M2zWtcd4DLs3W3BArBw,7082
-cloudfiles/typing.py,sha256=f3ZYkNfN9poxhGu5j-P0KCxjCCqSn9HAg5KiIPkjnCg,416
-cloudfiles_cli/LICENSE,sha256=Jna4xYE8CCQmaxjr5Fs-wmUBnIQJ1DGcNn9MMjbkprk,1538
-cloudfiles_cli/__init__.py,sha256=Wftt3R3F21QsHtWqx49ODuqT9zcSr0em7wk48kcH0WM,29
-cloudfiles_cli/cloudfiles_cli.py,sha256=eETIOK4QyztQcpA4ZRny21SobLtcrPDlzZ_JaKBmmmA,28449
-cloud_files-4.26.0.dist-info/AUTHORS,sha256=7E2vC894bbLPO_kvUuEB2LFZZbIxZn23HabxH7x0Hgo,266
-cloud_files-4.26.0.dist-info/LICENSE,sha256=Jna4xYE8CCQmaxjr5Fs-wmUBnIQJ1DGcNn9MMjbkprk,1538
-cloud_files-4.26.0.dist-info/METADATA,sha256=-Rsfl3gNmmS8zSMTI7FHTYZ8TEuhzWgF0UKhxoBCdRk,26804
-cloud_files-4.26.0.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
-cloud_files-4.26.0.dist-info/entry_points.txt,sha256=xlirb1FVhn1mbcv4IoyMEGumDqKOA4VMVd3drsRQxIg,51
-cloud_files-4.26.0.dist-info/pbr.json,sha256=Q1hsyLUlpIPjOyXcpmmGewWq1Difl_oiqt8EjJXRGOE,46
-cloud_files-4.26.0.dist-info/top_level.txt,sha256=xPyrST3okJbsmdCF5IC2gYAVxg_aD5AYVTnNo8UuoZU,26
-cloud_files-4.26.0.dist-info/RECORD,,

cloud_files-4.26.0.dist-info/pbr.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"git_version": "3ae7c76", "is_release": true}

cloudfiles/buckets.py DELETED Viewed

@@ -1,10 +0,0 @@
-class Bucket:
-	def __init__(self, cloudpath, secrets=None):
-		pass

{cloud_files-4.26.0.dist-info → cloud_files-4.28.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{cloud_files-4.26.0.dist-info → cloud_files-4.28.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{cloud_files-4.26.0.dist-info → cloud_files-4.28.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

cloud-files 4.26.0__py3-none-any.whl → 4.28.0__py3-none-any.whl

cloud-files 4.26.0py3-none-any.whl → 4.28.0py3-none-any.whl