PyPI - cloud-files - Versions diffs - 4.27.0__tar.gz → 4.28.1__tar.gz - Mend

cloud-files 4.27.0tar.gz → 4.28.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

{cloud-files-4.27.0 → cloud_files-4.28.1}/AUTHORS RENAMED Viewed

@@ -1,6 +1,7 @@
 Manuel Castro <macastro@princeton.edu>
 Nico Kemnitz <nkemnitz@princeton.edu>
 V24 <55334829+umarfarouk98@users.noreply.github.com>
+William Silversmith <william.silvermsith@gmail.com>
 William Silversmith <william.silversmith@gmail.com>
 madiganz <madiganz@users.noreply.github.com>
 ranlu <ranlu@users.noreply.github.com>

{cloud-files-4.27.0 → cloud_files-4.28.1}/ChangeLog RENAMED Viewed

@@ -1,9 +1,20 @@
 CHANGES
 =======
-4.27.0
+4.28.1
 ------
+* fix(CloudFile.join): add definition of join to CloudFile
+4.28.0
+------
+* feat: add cf.move(s), cf.touch methods and cli mv, touch commands (#107)
+* fix: add drop table stats
+* perf: add in stats table for faster xfer performance
+* feat: import improvements to ResumableFileSet from transcoder
+* fix: release in xfer
+* fix: leasing was broken
 * feat: add middleauth+https paths indicate CAVE interface (#106)
 4.26.0

{cloud-files-4.27.0 → cloud_files-4.28.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cloud-files
-Version: 4.27.0
+Version: 4.28.1
 Summary: Fast access to cloud storage and local FS.
 Home-page: https://github.com/seung-lab/cloud-files/
 Author: William Silversmith
@@ -88,6 +88,12 @@ cf.delete(paths, parallel=2) # threaded + two processes
 boolean = cf.exists('filename')
 results = cf.exists([ 'filename_1', ... ]) # threaded
+cf.move("a", "gs://bucket/b")
+cf.moves("gs://bucket/", [ ("a", "b") ])
+cf.touch("example")
+cf.touch([ "example", "example2" ])
 # for single files
 cf = CloudFile("gs://bucket/file1")
 info = cf.head()
@@ -464,6 +470,10 @@ cloudfiles -p 2 cp --progress -r s3://bkt/ gs://bkt2/
 cloudfiles cp -c br s3://bkt/file.txt gs://bkt2/
 # decompress
 cloudfiles cp -c none s3://bkt/file.txt gs://bkt2/
+# move or rename files
+cloudfiles mv s3://bkt/file.txt gs://bkt2/
+# create an empty file if not existing
+cloudfiles touch s3://bkt/empty.txt
 # pass from stdin (use "-" for source argument)
 find some_dir | cloudfiles cp - s3://bkt/
 # resumable transfers

{cloud-files-4.27.0 → cloud_files-4.28.1}/README.md RENAMED Viewed

@@ -38,6 +38,12 @@ cf.delete(paths, parallel=2) # threaded + two processes
 boolean = cf.exists('filename')
 results = cf.exists([ 'filename_1', ... ]) # threaded
+cf.move("a", "gs://bucket/b")
+cf.moves("gs://bucket/", [ ("a", "b") ])
+cf.touch("example")
+cf.touch([ "example", "example2" ])
 # for single files
 cf = CloudFile("gs://bucket/file1")
 info = cf.head()
@@ -414,6 +420,10 @@ cloudfiles -p 2 cp --progress -r s3://bkt/ gs://bkt2/
 cloudfiles cp -c br s3://bkt/file.txt gs://bkt2/
 # decompress
 cloudfiles cp -c none s3://bkt/file.txt gs://bkt2/
+# move or rename files
+cloudfiles mv s3://bkt/file.txt gs://bkt2/
+# create an empty file if not existing
+cloudfiles touch s3://bkt/empty.txt
 # pass from stdin (use "-" for source argument)
 find some_dir | cloudfiles cp - s3://bkt/
 # resumable transfers

{cloud-files-4.27.0 → cloud_files-4.28.1}/automated_test.py RENAMED Viewed

@@ -1151,3 +1151,92 @@ def test_lock_clearing():
   assert len(lst) == 0
+@pytest.mark.parametrize("protocol", ('mem', 'file', 's3'))
+def test_move(s3, protocol):
+  from cloudfiles import CloudFiles
+  url = compute_url(protocol, "move")
+  cf = CloudFiles(url)
+  cf.puts([
+    ('hello', b'world'),
+    ('lamp', b'emporium'),
+  ])
+  cf.move("hello", f"{url}/hola")
+  assert all(cf.exists(["hola"]).values()) == True
+  assert all(cf.exists(["hello"]).values()) == False
+  cf.puts([
+    ('hello', b'world'),
+    ('lamp', b'emporium'),
+  ])
+  cf.delete("hola")
+  cf.moves(f"{url}", [
+    ("hello", f"hola"),
+    ("lamp", f"lampara"),
+  ])
+  assert all(cf.exists(["hola", "lampara"]).values()) == True
+  assert all(cf.exists(["hello", "lamp"]).values()) == False
+  cf.delete([ "hola", "hello", "lamp", "lampara" ])
+@pytest.mark.parametrize("protocol", ["file", "s3"])
+def test_cli_move_python(s3, protocol):
+  from cloudfiles_cli.cloudfiles_cli import _mv_single
+  from cloudfiles import CloudFiles, exceptions
+  test_dir = compute_url(protocol, "cli_mv_python")
+  test_dir2 = compute_url(protocol, "cli_mv_python2")
+  cf = CloudFiles(test_dir)
+  N = 100
+  def mkfiles():
+    cf.delete(cf.list())
+    for i in range(N):
+      cf[str(i)] = b"hello world"
+  def run_mv(src, dest):
+    _mv_single(
+      src, dest,
+      progress=False, block_size=5,
+      part_bytes=int(100e6), no_sign_request=True,
+      parallel=1
+    )
+  mkfiles()
+  run_mv(test_dir, test_dir2)
+  assert sorted(list(cf)) == []
+  cf2 = CloudFiles(test_dir2)
+  print(sorted(list(cf2)))
+  assert sorted(list(cf2)) == sorted([ f'{i}' for i in range(N) ])
+  mkfiles()
+  run_mv(f"{test_dir}/*", f"{test_dir}/move/")
+  assert sorted(list(cf.list(prefix="move"))) == sorted([ f'move/{i}' for i in range(N) ])
+  mkfiles()
+  run_mv(f"{test_dir}/1", f"{test_dir}/move/1")
+  assert cf.exists("move/1") == True
+  assert cf.exists("1") == False
+@pytest.mark.parametrize("protocol", ["file", "mem", "s3"])
+def test_touch(s3, protocol):
+  from cloudfiles import CloudFiles
+  url = compute_url(protocol, "touch")
+  cf = CloudFiles(url)
+  cf.touch([ str(i) for i in range(20) ])
+  assert sorted(list(cf)) == sorted([ str(i) for i in range(20) ])
+  cf.touch([ str(i) for i in range(20) ])
+  assert sorted(list(cf)) == sorted([ str(i) for i in range(20) ])

{cloud-files-4.27.0 → cloud_files-4.28.1}/cloud_files.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cloud-files
-Version: 4.27.0
+Version: 4.28.1
 Summary: Fast access to cloud storage and local FS.
 Home-page: https://github.com/seung-lab/cloud-files/
 Author: William Silversmith
@@ -88,6 +88,12 @@ cf.delete(paths, parallel=2) # threaded + two processes
 boolean = cf.exists('filename')
 results = cf.exists([ 'filename_1', ... ]) # threaded
+cf.move("a", "gs://bucket/b")
+cf.moves("gs://bucket/", [ ("a", "b") ])
+cf.touch("example")
+cf.touch([ "example", "example2" ])
 # for single files
 cf = CloudFile("gs://bucket/file1")
 info = cf.head()
@@ -464,6 +470,10 @@ cloudfiles -p 2 cp --progress -r s3://bkt/ gs://bkt2/
 cloudfiles cp -c br s3://bkt/file.txt gs://bkt2/
 # decompress
 cloudfiles cp -c none s3://bkt/file.txt gs://bkt2/
+# move or rename files
+cloudfiles mv s3://bkt/file.txt gs://bkt2/
+# create an empty file if not existing
+cloudfiles touch s3://bkt/empty.txt
 # pass from stdin (use "-" for source argument)
 find some_dir | cloudfiles cp - s3://bkt/
 # resumable transfers

cloud_files-4.28.1/cloud_files.egg-info/pbr.json ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"git_version": "d352eec", "is_release": true}

{cloud-files-4.27.0 → cloud_files-4.28.1}/cloudfiles/cloudfiles.py RENAMED Viewed

@@ -2,7 +2,7 @@ from typing import (
   Any, Dict, Optional,
   Union, List, Tuple,
   Callable, Generator,
-  Iterable, cast, BinaryIO
+  Sequence, cast, BinaryIO
 )
 from queue import Queue
@@ -29,10 +29,10 @@ from . import compression, paths, gcs
 from .exceptions import UnsupportedProtocolError, MD5IntegrityError, CRC32CIntegrityError
 from .lib import (
   mkdir, totalfn, toiter, scatter, jsonify, nvl,
-  duplicates, first, sip,
+  duplicates, first, sip, touch,
   md5, crc32c, decode_crc32c_b64
 )
-from .paths import ALIASES
+from .paths import ALIASES, find_common_buckets
 from .secrets import CLOUD_FILES_DIR, CLOUD_FILES_LOCK_DIR
 from .threaded_queue import ThreadedQueue, DEFAULT_THREADS
 from .typing import (
@@ -182,7 +182,7 @@ def path_to_byte_range_tags(path):
   if isinstance(path, str):
     return (path, None, None, None)
   return (path['path'], path.get('start', None), path.get('end', None), path.get('tags', None))
 def dl(
   cloudpaths:GetPathType, raw:bool=False, **kwargs
 ) -> Union[bytes,List[dict]]:
@@ -193,23 +193,8 @@ def dl(
   dict.
   """
   cloudpaths, is_multiple = toiter(cloudpaths, is_iter=True)
-  clustered = defaultdict(list)
-  total = 0
-  for path in cloudpaths:
-    pth = path
-    byte_range = None
-    if isinstance(path, dict):
-      pth = path["path"]
-      byte_range = path["byte_range"]
-    epath = paths.extract(pth)
-    bucketpath = paths.asbucketpath(epath)
-    clustered[bucketpath].append({
-      "path": epath.path,
-      "start": (byte_range[0] if byte_range else None), # type: ignore
-      "end": (byte_range[1] if byte_range else None), # type: ignore
-    })
-    total += 1
+  clustered = find_common_buckets(cloudpaths)
+  total = sum([ len(bucket) for bucket in clustered.values() ])
   progress = kwargs.get("progress", False) and total > 1
   pbar = tqdm(total=total, desc="Downloading", disable=(not progress))
@@ -919,6 +904,60 @@ class CloudFiles:
     )
     return len(results)
+  def touch(
+    self,
+    paths:GetPathType,
+    progress:Optional[bool] = None,
+    total:Optional[int] = None,
+    nocopy:bool = False,
+  ):
+    """
+    Create a zero byte file if it doesn't exist.
+    """
+    paths = toiter(paths)
+    progress = nvl(progress, self.progress)
+    total = totalfn(paths, total)
+    if self.protocol == "file":
+      basepath = self.cloudpath.replace("file://", "")
+      for path in tqdm(paths, disable=(not progress), total=total):
+        pth = path
+        if isinstance(path, dict):
+          pth = path["path"]
+        touch(self.join(basepath, pth))
+      return
+    results = self.exists(paths, total=total, progress=progress)
+    dne = [
+      (fname, b'')
+      for fname, exists in results.items()
+      if not exists
+    ]
+    self.puts(dne, progress=progress)
+    # def thunk_copy(path):
+    #   with self._get_connection() as conn:
+    #     conn.copy_file(path, self._path.bucket, self.join(self._path.path, path))
+    #   return 1
+    # if not nocopy:
+    #   already_exists = (
+    #     fname
+    #     for fname, exists in results.items()
+    #     if exists
+    #   )
+    #   results = schedule_jobs(
+    #     fns=( partial(thunk_copy, path) for path in already_exists ),
+    #     progress=progress,
+    #     total=(total - len(dne)),
+    #     concurrency=self.num_threads,
+    #     green=self.green,
+    #     count_return=True,
+    #   )
   def list(
     self, prefix:str = "", flat:bool = False
   ) -> Generator[str,None,None]:
@@ -953,6 +992,7 @@ class CloudFiles:
     reencode:Optional[str] = None,
     content_type:Optional[str] = None,
     allow_missing:bool = False,
+    progress:Optional[bool] = None,
   ) -> None:
     """
     Transfer all files from this CloudFiles storage
@@ -969,7 +1009,7 @@ class CloudFiles:
       - gs->gs: Uses GCS copy API to minimize data movement
       - s3->s3: Uses boto s3 copy API to minimize data movement
-    cf_src: another CloudFiles instance or cloudpath
+    cf_dest: another CloudFiles instance or cloudpath
     paths: if None transfer all files from src, else if
       an iterable, transfer only these files.
@@ -997,7 +1037,8 @@ class CloudFiles:
     return cf_dest.transfer_from(
       self, paths, block_size,
       reencode, content_type,
-      allow_missing,
+      allow_missing,
+      progress,
     )
   def transfer_from(
@@ -1008,6 +1049,7 @@ class CloudFiles:
     reencode:Optional[str] = None,
     content_type:Optional[str] = None,
     allow_missing:bool = False,
+    progress:Optional[bool] = None,
   ) -> None:
     """
     Transfer all files from the source CloudFiles storage
@@ -1054,7 +1096,15 @@ class CloudFiles:
     total = totalfn(paths, None)
-    with tqdm(desc="Transferring", total=total, disable=(not self.progress)) as pbar:
+    disable = progress
+    if disable is None:
+      disable = self.progress
+    if disable is None:
+      disable = False
+    else:
+      disable = not disable
+    with tqdm(desc="Transferring", total=total, disable=disable) as pbar:
       if (
         cf_src.protocol == "file"
         and self.protocol == "file"
@@ -1211,6 +1261,9 @@ class CloudFiles:
           else:
             raise
+        if dest_path == '':
+          dest_path = src_path
         to_upload.append({
           "path": dest_path,
           "content": handle,
@@ -1262,6 +1315,99 @@ class CloudFiles:
     )
     return len(results)
+  def move(self, src:str, dest:str):
+    """Move (rename) src to dest.
+    src and dest do not have to be on the same filesystem.
+    """
+    epath = paths.extract(dest)
+    full_cloudpath = paths.asprotocolpath(epath)
+    dest_cloudpath = paths.dirname(full_cloudpath)
+    base_dest = paths.basename(full_cloudpath)
+    return self.moves(dest_cloudpath, [
+      (src, base_dest)
+    ], block_size=1, progress=False)
+  def moves(
+    self,
+    cf_dest:Any,
+    paths:Union[Sequence[str], Sequence[Tuple[str, str]]],
+    block_size:int = 64,
+    total:Optional[int] = None,
+    progress:Optional[bool] = None,
+  ):
+    """
+    Move (rename) files.
+    pairs: [ (src, dest), (src, dest), ... ]
+    """
+    if isinstance(cf_dest, str):
+      cf_dest = CloudFiles(
+        cf_dest, progress=False,
+        green=self.green, num_threads=self.num_threads,
+      )
+    total = totalfn(paths, total)
+    disable = not (self.progress if progress is None else progress)
+    if self.protocol == "file" and cf_dest.protocol == "file":
+      self.__moves_file_to_file(
+        cf_dest, paths, total,
+        disable, block_size
+      )
+      return
+    pbar = tqdm(total=total, disable=disable, desc="Moving")
+    with pbar:
+      for subpairs in sip(paths, block_size):
+        subpairs = [
+          ((pair, pair) if isinstance(pair, str) else pair)
+          for pair in subpairs
+        ]
+        self.transfer_to(cf_dest, paths=(
+          {
+            "path": src,
+            "dest_path": dest,
+          }
+          for src, dest in subpairs
+        ), progress=False)
+        self.delete(( src for src, dest in subpairs ), progress=False)
+        pbar.update(len(subpairs))
+  def __moves_file_to_file(
+    self,
+    cf_dest:Any,
+    paths:Union[Sequence[str], Sequence[Tuple[str,str]]],
+    total:Optional[int],
+    disable:bool,
+    block_size:int,
+  ):
+    for pair in tqdm(paths, total=total, disable=disable, desc="Moving"):
+      if isinstance(pair, str):
+        src = pair
+        dest = pair
+      else:
+        (src, dest) = pair
+      src = self.join(self.cloudpath, src).replace("file://", "")
+      dest = cf_dest.join(cf_dest.cloudpath, dest).replace("file://", "")
+      if os.path.isdir(dest):
+        dest = cf_dest.join(dest, os.path.basename(src))
+      else:
+        mkdir(os.path.dirname(dest))
+      src, encoding = FileInterface.get_encoded_file_path(src)
+      _, dest_ext = os.path.splitext(dest)
+      dest_ext_compress = FileInterface.get_extension(encoding)
+      if dest_ext_compress != dest_ext:
+        dest += dest_ext_compress
+      shutil.move(src, dest)
   def join(self, *paths:str) -> str:
     """
     Convenience method for joining path strings
@@ -1440,6 +1586,16 @@ class CloudFile:
       reencode=reencode,
     )
+  def join(self, *args):
+    return self.cf.join(*args)
+  def touch(self):
+    return self.cf.touch(self.filename)
+  def move(self, dest):
+    """Move (rename) this file to dest."""
+    return self.cf.move(self.filename, dest)
   def __len__(self):
     return self.size()

{cloud-files-4.27.0 → cloud_files-4.28.1}/cloudfiles/interfaces.py RENAMED Viewed

@@ -474,6 +474,14 @@ class MemoryInterface(StorageInterface):
     return None
+  def copy_file(self, src_path, dest_bucket, dest_key):
+    key = self.get_path_to_file(src_path)
+    with MEM_BUCKET_POOL_LOCK:
+     pool = MEM_POOL[MemoryPoolParams(dest_bucket)]
+    dest_bucket = pool.get_connection(None, None)
+    dest_bucket[dest_key] = self._data[key]
+    return True
   def exists(self, file_path):
     path = self.get_path_to_file(file_path)
     return path in self._data or any(( (path + ext in self._data) for ext in COMPRESSION_EXTENSIONS ))

{cloud-files-4.27.0 → cloud_files-4.28.1}/cloudfiles/lib.py RENAMED Viewed

@@ -53,8 +53,11 @@ def mkdir(path):
   return path
 def touch(path):
-  mkdir(os.path.dirname(path))
-  open(path, 'a').close()
+  if os.path.exists(path):
+    os.utime(path)
+  else:
+    mkdir(os.path.dirname(path))
+    open(path, 'a').close()
 def nvl(*args):
   """Return the leftmost argument that is not None."""

{cloud-files-4.27.0 → cloud_files-4.28.1}/cloudfiles/paths.py RENAMED Viewed

@@ -1,5 +1,5 @@
 from functools import lru_cache
-from collections import namedtuple
+from collections import namedtuple, defaultdict
 import orjson
 import os.path
 import posixpath
@@ -8,9 +8,10 @@ import sys
 import urllib.parse
 from typing import Tuple, Optional
+from .typing import GetPathType
 from .exceptions import UnsupportedProtocolError
-from .lib import yellow, toabs, jsonify, mkdir
+from .lib import yellow, toabs, jsonify, mkdir, toiter
 from .secrets import CLOUD_FILES_DIR
 ExtractedPath = namedtuple('ExtractedPath',
@@ -390,3 +391,30 @@ def to_https_protocol(cloudpath):
     cloudpath = cloudpath.replace(f"{alias}://", host, 1)
   return cloudpath.replace("s3://", "", 1)
+def find_common_buckets(cloudpaths:GetPathType):
+  cloudpaths, is_multiple = toiter(cloudpaths, is_iter=True)
+  clustered = defaultdict(list)
+  for path in cloudpaths:
+    pth = path
+    byte_range = None
+    if isinstance(path, dict):
+      pth = path["path"]
+      byte_range = path["byte_range"]
+    epath = extract(pth)
+    if epath.protocol == "file":
+      path = os.sep.join(asfilepath(epath).split(os.sep)[2:])
+      bucketpath = "file://" + os.sep.join(asfilepath(epath).split(os.sep)[:2])
+    else:
+      path = epath.path
+      bucketpath = asbucketpath(epath)
+    clustered[bucketpath].append({
+      "path": path,
+      "start": (byte_range[0] if byte_range else None), # type: ignore
+      "end": (byte_range[1] if byte_range else None), # type: ignore
+    })
+  return clustered

{cloud-files-4.27.0 → cloud_files-4.28.1}/cloudfiles/resumable_tools.py RENAMED Viewed

@@ -39,6 +39,9 @@ class ResumableFileSet:
     self.conn = sqlite3.connect(db_path)
     self.lease_msec = int(lease_msec)
+    self._total = 0
+    self._total_dirty = True
   def __del__(self):
     self.conn.close()
@@ -46,6 +49,7 @@ class ResumableFileSet:
     cur = self.conn.cursor()
     cur.execute("""DROP TABLE IF EXISTS filelist""")
     cur.execute("""DROP TABLE IF EXISTS xfermeta""")
+    cur.execute("""DROP TABLE IF EXISTS stats""")
     cur.close()
   def create(self, src, dest, reencode=None):
@@ -53,6 +57,7 @@ class ResumableFileSet:
     cur.execute("""DROP TABLE IF EXISTS filelist""")
     cur.execute("""DROP TABLE IF EXISTS xfermeta""")
+    cur.execute("""DROP TABLE IF EXISTS stats""")
     cur.execute(f"""
       CREATE TABLE xfermeta (
@@ -78,6 +83,18 @@ class ResumableFileSet:
     """)
     cur.execute("CREATE INDEX resumableidxfin ON filelist(finished,lease)")
     cur.execute("CREATE INDEX resumableidxfile ON filelist(filename)")
+    cur.execute(f"""
+      CREATE TABLE stats (
+        id {INTEGER} PRIMARY KEY {AUTOINC},
+        key TEXT NOT NULL,
+        value {INTEGER}
+      )
+    """)
+    cur.execute(
+      "INSERT INTO stats(id, key, value) VALUES (?,?,?)",
+      [1, 'finished', 0]
+    )
     cur.close()
   def insert(self, fname_iter):
@@ -91,7 +108,9 @@ class ResumableFileSet:
       cur.execute(f"INSERT INTO filelist(filename,finished,lease) VALUES {bindlist}", filenames)
       cur.execute("commit")
-    cur.close()
+    cur.close()
+    self._total_dirty = True
   def metadata(self):
     cur = self.conn.cursor()
@@ -111,6 +130,7 @@ class ResumableFileSet:
     for filenames in sip(fname_iter, SQLITE_MAX_PARAMS):
       bindlist = ",".join([f"{BIND}"] * len(filenames))
       cur.execute(f"UPDATE filelist SET finished = 1 WHERE filename in ({bindlist})", filenames)
+      cur.execute(f"UPDATE stats SET value = value + {len(filenames)} WHERE id = 1")
       cur.execute("commit")
     cur.close()
@@ -120,7 +140,7 @@ class ResumableFileSet:
     N = 0
     while True:
-      ts = now_msec() + self.lease_msec
+      ts = now_msec()
       cur.execute(f"""SELECT filename FROM filelist WHERE finished = 0 AND lease <= {ts} LIMIT {int(block_size)}""")
       rows = cur.fetchmany(block_size)
       N += len(rows)
@@ -140,31 +160,46 @@ class ResumableFileSet:
     cur.close()
-  def total(self):
+  def _scalar_query(self, sql:str) -> int:
     cur = self.conn.cursor()
-    cur.execute(f"SELECT count(filename) FROM filelist")
+    cur.execute(sql)
     res = cur.fetchone()
     cur.close()
     return int(res[0])
+  def total(self):
+    """Returns the total number of tasks (both processed and unprocessed)."""
+    if not self._total_dirty:
+      return self._total
+    self._total = self._scalar_query(f"SELECT max(id) FROM filelist")
+    self._total_dirty = False
+    return self._total
+  def finished(self):
+    return self._scalar_query(f"SELECT value FROM stats WHERE id = 1")
   def remaining(self):
-    cur = self.conn.cursor()
-    cur.execute(f"SELECT count(filename) FROM filelist WHERE finished = 0")
-    res = cur.fetchone()
-    cur.close()
-    return int(res[0])
+    return self.total() - self.finished()
+  def num_leased(self):
+    ts = int(now_msec())
+    return self._scalar_query(
+      f"SELECT count(filename) FROM filelist WHERE finished = 0 AND lease > {ts}"
+    )
   def available(self):
-    cur = self.conn.cursor()
-    ts = int(now_msec() + self.lease_msec)
-    cur.execute(f"SELECT count(filename) FROM filelist WHERE finished = 0 AND lease < {ts}")
-    res = cur.fetchone()
-    cur.close()
-    return int(res[0])
+    ts = int(now_msec())
+    return self._scalar_query(
+      f"SELECT count(filename) FROM filelist WHERE finished = 0 AND lease <= {ts}"
+    )
   def release(self):
+    cur = self.conn.cursor()
     cur.execute(f"UPDATE filelist SET lease = 0")
     cur.execute("commit")
+    cur.close()
   def __len__(self):
     return self.remaining()

{cloud-files-4.27.0 → cloud_files-4.28.1}/cloudfiles_cli/cloudfiles_cli.py RENAMED Viewed

@@ -27,7 +27,7 @@ import cloudfiles.paths
 from cloudfiles import CloudFiles
 from cloudfiles.resumable_tools import ResumableTransfer
 from cloudfiles.compression import transcode
-from cloudfiles.paths import extract, get_protocol
+from cloudfiles.paths import extract, get_protocol, find_common_buckets
 from cloudfiles.lib import (
   mkdir, toabs, sip, toiter,
   first, red, green,
@@ -184,10 +184,6 @@ def cp(
   If source is "-" read newline delimited filenames from stdin.
   If destination is "-" output to stdout.
-  Note that for gs:// to gs:// transfers, the gsutil
-  tool is more efficient because the files never leave
-  Google's network.
   """
   use_stdout = (destination == '-')
   if len(source) > 1 and not ispathdir(destination) and not use_stdout:
@@ -330,6 +326,163 @@ def _cp_stdout(src, no_sign_request, paths):
     content = res["content"].decode("utf8")
     sys.stdout.write(content)
+@main.command()
+@click.argument("source", nargs=-1)
+@click.argument("destination", nargs=1)
+@click.option('--progress', is_flag=True, default=False, help="Show transfer progress.", show_default=True)
+@click.option('-b', '--block-size', default=128, help="Number of files to download at a time.", show_default=True)
+@click.option('--part-bytes', default=int(1e8), help="Composite upload threshold in bytes. Splits a file into pieces for some cloud services like gs and s3.", show_default=True)
+@click.option('--no-sign-request', is_flag=True, default=False, help="Use s3 in anonymous mode (don't sign requests) for the source.", show_default=True)
+@click.pass_context
+def mv(
+  ctx, source, destination,
+  progress, block_size,
+  part_bytes, no_sign_request,
+):
+  """
+  Move one or more files from a source to destination.
+  If source is "-" read newline delimited filenames from stdin.
+  If destination is "-" output to stdout.
+  """
+  if len(source) > 1 and not ispathdir(destination):
+    print("cloudfiles: destination must be a directory for multiple source files.")
+    return
+  ctx.ensure_object(dict)
+  parallel = int(ctx.obj.get("parallel", 1))
+  for src in source:
+    _mv_single(
+      src, destination,
+      progress, block_size,
+      part_bytes, no_sign_request,
+      parallel
+    )
+def _mv_single(
+  source, destination,
+  progress, block_size,
+  part_bytes, no_sign_request,
+  parallel
+):
+  use_stdin = (source == '-')
+  nsrc = normalize_path(source)
+  ndest = normalize_path(destination)
+  issrcdir = (ispathdir(source) or CloudFiles(nsrc).isdir()) and use_stdin == False
+  isdestdir = (ispathdir(destination) or CloudFiles(ndest).isdir())
+  ensrc = cloudfiles.paths.extract(nsrc)
+  endest = cloudfiles.paths.extract(ndest)
+  if ensrc.protocol == "file" and endest.protocol == "file" and issrcdir:
+    shutil.move(nsrc.replace("file://", ""), ndest.replace("file://", ""))
+    return
+  recursive = issrcdir
+  # For more information see:
+  # https://cloud.google.com/storage/docs/gsutil/commands/cp#how-names-are-constructed
+  # Try to follow cp rules. If the directory exists,
+  # copy the base source directory into the dest directory
+  # If the directory does not exist, then we copy into
+  # the dest directory.
+  # Both x* and x** should not copy the base directory
+  if recursive and nsrc[-1] != "*":
+    if isdestdir:
+      if nsrc[-1] == '/':
+        nsrc = nsrc[:-1]
+      ndest = cloudpathjoin(ndest, os.path.basename(nsrc))
+  # The else clause here is to handle single file transfers
+  srcpath = nsrc if issrcdir else os.path.dirname(nsrc)
+  many, flat, prefix = get_mfp(nsrc, recursive)
+  if issrcdir and not many:
+    print(f"cloudfiles: {source} is a directory (not copied).")
+    return
+  xferpaths = os.path.basename(nsrc)
+  if use_stdin:
+    xferpaths = sys.stdin.readlines()
+    xferpaths = [ x.replace("\n", "") for x in xferpaths ]
+    prefix = os.path.commonprefix(xferpaths)
+    xferpaths = [ x.replace(prefix, "") for x in xferpaths ]
+    srcpath = cloudpathjoin(srcpath, prefix)
+  elif many:
+    xferpaths = CloudFiles(
+      srcpath, no_sign_request=no_sign_request
+    ).list(prefix=prefix, flat=flat)
+  destpath = ndest
+  if isinstance(xferpaths, str):
+    destpath = ndest if isdestdir else os.path.dirname(ndest)
+  elif not isdestdir:
+    if os.path.exists(ndest.replace("file://", "")):
+      print(f"cloudfiles: {ndest} is not a directory (not copied).")
+      return
+  if not isinstance(xferpaths, str):
+    if parallel == 1:
+      _mv(srcpath, destpath, progress, block_size, part_bytes, no_sign_request, xferpaths)
+      return
+    total = None
+    try:
+      total = len(xferpaths)
+    except TypeError:
+      pass
+    fn = partial(_mv, srcpath, destpath, False, block_size, part_bytes, no_sign_request)
+    with tqdm(desc="Moving", total=total, disable=(not progress)) as pbar:
+      with pathos.pools.ProcessPool(parallel) as executor:
+        for _ in executor.imap(fn, sip(xferpaths, block_size)):
+          pbar.update(block_size)
+  else:
+    cfsrc = CloudFiles(srcpath, progress=progress, no_sign_request=no_sign_request)
+    if not cfsrc.exists(xferpaths):
+      print(f"cloudfiles: source path not found: {cfsrc.abspath(xferpaths).replace('file://','')}")
+      return
+    cfdest = CloudFiles(
+      destpath,
+      progress=progress,
+      composite_upload_threshold=part_bytes,
+    )
+    cfsrc.move(xferpaths, ndest)
+def _mv(src, dst, progress, block_size, part_bytes, no_sign_request, paths):
+  cfsrc = CloudFiles(src, progress=progress, composite_upload_threshold=part_bytes, no_sign_request=no_sign_request)
+  cfdest = CloudFiles(dst, progress=progress, composite_upload_threshold=part_bytes)
+  cfsrc.moves(
+    cfdest, paths=paths, block_size=block_size
+  )
+@main.command()
+@click.argument("sources", nargs=-1)
+@click.option('--progress', is_flag=True, default=False, help="Show transfer progress.", show_default=True)
+@click.option('--no-sign-request', is_flag=True, default=False, help="Use s3 in anonymous mode (don't sign requests) for the source.", show_default=True)
+@click.pass_context
+def touch(
+  ctx, sources,
+  progress, no_sign_request,
+):
+  sources = list(map(normalize_path, sources))
+  sources = [ src.replace("precomputed://", "") for src in sources ]
+  pbar = tqdm(total=len(sources), desc="Touch", disable=(not progress))
+  clustered = find_common_buckets(sources)
+  with pbar:
+    for bucket, items in clustered.items():
+      cf = CloudFiles(bucket, no_sign_request=no_sign_request, progress=False)
+      cf.touch(items)
+      pbar.update(len(items))
 @main.group("xfer")
 def xfergroup():
   """