PyPI - dcicutils - Versions diffs - 8.9.0.0b0__tar.gz → 8.9.0.1b2__tar.gz - Mend

dcicutils 8.9.0.0b0tar.gz → 8.9.0.1b2tar.gz

Files changed (82) hide show

{dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/PKG-INFO RENAMED Viewed

@@ -1,12 +1,12 @@
 Metadata-Version: 2.1
 Name: dcicutils
-Version: 8.9.0.0b0
+Version: 8.9.0.1b2
 Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
 Home-page: https://github.com/4dn-dcic/utils
 License: MIT
 Author: 4DN-DCIC Team
 Author-email: support@4dnucleome.org
-Requires-Python: >=3.8,<3.12
+Requires-Python: >=3.8,<3.13
 Classifier: Development Status :: 4 - Beta
 Classifier: Intended Audience :: Developers
 Classifier: Intended Audience :: Science/Research
@@ -24,9 +24,10 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Topic :: Database :: Database Engines/Servers
 Requires-Dist: PyJWT (>=2.6.0,<3.0.0)
 Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
+Requires-Dist: appdirs (>=1.4.4,<2.0.0)
 Requires-Dist: aws-requests-auth (>=0.4.2,<1)
-Requires-Dist: boto3 (>=1.28.57,<2.0.0)
-Requires-Dist: botocore (>=1.31.57,<2.0.0)
+Requires-Dist: boto3 (>=1.34.93,<2.0.0)
+Requires-Dist: botocore (>=1.34.93,<2.0.0)
 Requires-Dist: chardet (>=5.2.0,<6.0.0)
 Requires-Dist: docker (>=4.4.4,<5.0.0)
 Requires-Dist: elasticsearch (==7.13.4)
@@ -42,6 +43,7 @@ Requires-Dist: pytz (>=2020.4)
 Requires-Dist: redis (>=4.5.1,<5.0.0)
 Requires-Dist: requests (>=2.21.0,<3.0.0)
 Requires-Dist: rfc3986 (>=1.4.0,<2.0.0)
+Requires-Dist: shortuuid (>=1.0.13,<2.0.0)
 Requires-Dist: structlog (>=19.2.0,<20.0.0)
 Requires-Dist: toml (>=0.10.1,<1)
 Requires-Dist: tqdm (>=4.66.2,<5.0.0)

{dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/command_utils.py RENAMED Viewed

@@ -1,3 +1,4 @@
+from __future__ import annotations
 import contextlib
 import functools
 import glob
@@ -7,7 +8,7 @@ import re
 import requests
 import subprocess
-from typing import Optional
+from typing import Callable, Optional
 from .exceptions import InvalidParameterError
 from .lang_utils import there_are
 from .misc_utils import INPUT, PRINT, environ_bool, print_error_message, decorator
@@ -384,3 +385,70 @@ def script_catch_errors():
                 message = str(e)  # Note: We ignore the type, which isn't intended to be shown.
                 PRINT(message)
             exit(1)
+class Question:
+    """
+    Supports asking the user (via stdin) a yes/no question, possibly repeatedly; and after
+    some maximum number times of the same answer in a row (consecutively), then asks them
+    if they want to automatically give that same answer to any/all subsequent questions.
+    Supports static/global list of such Question instances, hashed (only) by the question text.
+    """
+    _static_instances = {}
+    @staticmethod
+    def instance(question: Optional[str] = None,
+                 max: Optional[int] = None, printf: Optional[Callable] = None) -> Question:
+        question = question if isinstance(question, str) else ""
+        if not (instance := Question._static_instances.get(question)):
+            Question._static_instances[question] = (instance := Question(question, max=max, printf=printf))
+        return instance
+    @staticmethod
+    def yes(question: Optional[str] = None,
+            max: Optional[int] = None, printf: Optional[Callable] = None) -> bool:
+        return Question.instance(question, max=max, printf=printf).ask()
+    def __init__(self, question: Optional[str] = None,
+                 max: Optional[int] = None, printf: Optional[Callable] = None) -> None:
+        self._question = question if isinstance(question, str) else ""
+        self._max = max if isinstance(max, int) and max > 0 else None
+        self._print = printf if callable(printf) else print
+        self._yes_consecutive_count = 0
+        self._no_consecutive_count = 0
+        self._yes_automatic = False
+        self._no_automatic = False
+    def ask(self, question: Optional[str] = None) -> bool:
+        def question_automatic(value: str) -> bool:
+            nonlocal self
+            RARROW = "▶"
+            LARROW = "◀"
+            if yes_or_no(f"{RARROW}{RARROW}{RARROW}"
+                         f" Do you want to answer {value} to all such questions?"
+                         f" {LARROW}{LARROW}{LARROW}"):
+                return True
+            self._yes_consecutive_count = 0
+            self._no_consecutive_count = 0
+        if self._yes_automatic:
+            return True
+        elif self._no_automatic:
+            return False
+        elif yes_or_no((question if isinstance(question, str) else "") or self._question or "Undefined question"):
+            self._yes_consecutive_count += 1
+            self._no_consecutive_count = 0
+            if (self._no_consecutive_count == 0) and self._max and (self._yes_consecutive_count >= self._max):
+                # Have reached the maximum number of consecutive YES answers; ask if YES to all subsequent.
+                if question_automatic("YES"):
+                    self._yes_automatic = True
+            return True
+        else:
+            self._no_consecutive_count += 1
+            self._yes_consecutive_count = 0
+            if (self._yes_consecutive_count == 0) and self._max and (self._no_consecutive_count >= self._max):
+                # Have reached the maximum number of consecutive NO answers; ask if NO to all subsequent.
+                if question_automatic("NO"):
+                    self._no_automatic = True
+            return False

{dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/creds_utils.py RENAMED Viewed

@@ -170,7 +170,7 @@ class KeyManager:
                 raise ValueError(f"A KeyManager named {name!r} has already been defined.")
             key_manager_class._init_class_variables()
             key_manager_class._REGISTERED = True
-            _KEY_MANAGERS[name] = cls
+            _KEY_MANAGERS[name] = key_manager_class
             return key_manager_class
         return _register_class

{dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/ff_utils.py RENAMED Viewed

@@ -895,9 +895,12 @@ def _get_es_metadata(uuids, es_client, filters, sources, chunk_size, auth):
     used to create the generator.
     Should NOT be used directly
     """
+    def get_es_host_local() -> Optional[str]:
+        return os.environ.get("ES_HOST_LOCAL", None)
     health = get_health_page(key=auth)
     if es_client is None:
-        es_url = health['elasticsearch']
+        if not (es_url := get_es_host_local()):
+            es_url = health['elasticsearch']
         es_client = es_utils.create_es_client(es_url, use_aws_auth=True)
     namespace_star = health.get('namespace', '') + '*'
     # match all given uuids to _id fields

dcicutils-8.9.0.1b2/dcicutils/file_utils.py ADDED Viewed

@@ -0,0 +1,267 @@
+import glob
+import hashlib
+import io
+import os
+import pathlib
+from datetime import datetime
+import random
+import string
+from tempfile import gettempdir as get_temporary_directory
+from typing import List, Optional, Union
+from uuid import uuid4 as uuid
+HOME_DIRECTORY = str(pathlib.Path().home())
+def search_for_file(file: str,
+                    location: Union[str, pathlib.PosixPath, Optional[List[Union[str, pathlib.PosixPath]]]] = None,
+                    recursive: bool = False,
+                    single: bool = False,
+                    order: bool = True) -> Union[List[str], Optional[str]]:
+    """
+    Searches for the existence of the given file name, first directly in the given directory or list
+    of directories, if specified, and if not then just in the current (working) directory; if the
+    given recursive flag is True then also searches all sub-directories of these directories;
+    returns the full path name to the file if found. If the single flag is True then just the
+    first file which is found is returns (as a string), or None if none; if the single flag
+    is False, then all matched files are returned in a list, or and empty list if none.
+    """
+    def order_by_fewest_number_of_paths_and_then_alphabetically(paths: List[str]) -> List[str]:
+        def order_by(path: str):
+            return len(path.split(os.path.sep)), path
+        return sorted(paths, key=order_by)
+    if not (file and isinstance(file, (str, pathlib.PosixPath))):
+        return None if single is True else []
+    if os.path.isabs(file):
+        if os.path.exists(file):
+            return str(file) if single is True else [str(file)]
+        return None if single is True else []
+    files_found = []
+    if not location:
+        location = ["."]
+    elif isinstance(location, (str, pathlib.PosixPath)):
+        location = [location]
+    elif not isinstance(location, list):
+        location = []
+    location_pruned = []
+    for directory in location:
+        if not isinstance(directory, str):
+            if not isinstance(directory, pathlib.PosixPath):
+                continue
+            directory = str(directory)
+        if not (directory := directory.strip()):
+            continue
+        if os.path.isfile(directory := os.path.abspath(os.path.normpath(directory))):
+            # Actually, allow a file rather then a directory; assume its parent directory was intended.
+            if not (directory := os.path.dirname(directory)):
+                continue
+        if directory not in location_pruned:
+            location_pruned.append(directory)
+    location = location_pruned
+    for directory in location:
+        if os.path.exists(os.path.join(directory, file)):
+            file_found = os.path.abspath(os.path.normpath(os.path.join(directory, file)))
+            if single is True:
+                return file_found
+            if file_found not in files_found:
+                files_found.append(file_found)
+    if recursive is True:
+        for directory in location:
+            if not directory.endswith("/**") and not file.startswith("**/"):
+                path = f"{directory}/**/{file}"
+            else:
+                path = f"{directory}/{file}"
+            files = glob.glob(path, recursive=True if recursive is True else False)
+            if files:
+                for file_found in files:
+                    file_found = os.path.abspath(file_found)
+                    if single is True:
+                        return file_found
+                    if file_found not in files_found:
+                        files_found.append(file_found)
+    if single is True:
+        return files_found[0] if files_found else None
+    elif order is True:
+        return order_by_fewest_number_of_paths_and_then_alphabetically(files_found)
+    else:
+        return files_found
+def normalize_path(value: Union[str, pathlib.Path], absolute: bool = False, expand_home: Optional[bool] = None) -> str:
+    """
+    Normalizes the given path value and returns the result; does things like remove redundant
+    consecutive directory separators and redundant parent paths. If the given absolute argument
+    is True than converts the path to an absolute path. If the given expand_home argument is False
+    and if the path can reasonably be represented with a home directory indicator (i.e. "~"), then
+    converts it to such. If the expand_home argument is True and path starts with the home directory
+    indicator (i.e. "~") then expands it to the actual (absolute) home path of the caller. If the
+    given path value is not actually even a string (or pathlib.Path) then returns an empty string.
+    """
+    if isinstance(value, pathlib.Path):
+        value = str(value)
+    elif not isinstance(value, str):
+        return ""
+    if not (value := value.strip()) or not (value := os.path.normpath(value)):
+        return ""
+    if expand_home is True:
+        value = os.path.expanduser(value)
+    elif (expand_home is False) and (os.name == "posix"):
+        if value.startswith(home := HOME_DIRECTORY + os.sep):
+            value = "~/" + value[len(home):]
+        elif value == HOME_DIRECTORY:
+            value = "~"
+    if absolute is True:
+        value = os.path.abspath(value)
+    return value
+def get_file_size(file: str, raise_exception: bool = True) -> Optional[int]:
+    try:
+        return os.path.getsize(file) if isinstance(file, str) else None
+    except Exception:
+        if raise_exception is True:
+            raise
+        return None
+def get_file_modified_datetime(file: str, raise_exception: bool = True) -> Optional[datetime]:
+    try:
+        return datetime.fromtimestamp(os.path.getmtime(file)) if isinstance(file, str) else None
+    except Exception:
+        if raise_exception is True:
+            raise
+        return None
+def are_files_equal(filea: str, fileb: str, raise_exception: bool = True) -> bool:
+    """
+    Returns True iff the contents of the two given files are exactly the same.
+    """
+    try:
+        with open(filea, "rb") as fa:
+            with open(fileb, "rb") as fb:
+                chunk_size = 4096
+                while True:
+                    chunka = fa.read(chunk_size)
+                    chunkb = fb.read(chunk_size)
+                    if chunka != chunkb:
+                        return False
+                    if not chunka:
+                        break
+        return True
+    except Exception:
+        if raise_exception is True:
+            raise
+        return False
+def compute_file_md5(file: str, raise_exception: bool = True) -> str:
+    """
+    Returns the md5 checksum for the given file.
+    """
+    if not isinstance(file, str):
+        return ""
+    try:
+        md5 = hashlib.md5()
+        with open(file, "rb") as file:
+            for chunk in iter(lambda: file.read(4096), b""):
+                md5.update(chunk)
+        return md5.hexdigest()
+    except Exception:
+        if raise_exception is True:
+            raise
+        return ""
+def compute_file_etag(file: str, raise_exception: bool = True) -> Optional[str]:
+    """
+    Returns the AWS S3 "etag" for the given file; this value is md5-like but
+    not the same as a normal md5. We use this to compare that a file in S3
+    appears to be the exact the same file as a local file.
+    """
+    try:
+        with io.open(file, "rb") as f:
+            return _compute_file_etag(f)
+    except Exception:
+        if raise_exception is True:
+            raise
+        return None
+def _compute_file_etag(f: io.BufferedReader) -> str:
+    # See: https://stackoverflow.com/questions/75723647/calculate-md5-from-aws-s3-etag
+    MULTIPART_THRESHOLD = 8388608
+    MULTIPART_CHUNKSIZE = 8388608
+    # BUFFER_SIZE = 1048576
+    # Verify some assumptions are correct
+    # assert(MULTIPART_CHUNKSIZE >= MULTIPART_THRESHOLD)
+    # assert((MULTIPART_THRESHOLD % BUFFER_SIZE) == 0)
+    # assert((MULTIPART_CHUNKSIZE % BUFFER_SIZE) == 0)
+    hash = hashlib.md5()
+    read = 0
+    chunks = None
+    while True:
+        # Read some from stdin, if we're at the end, stop reading
+        bits = f.read(1048576)
+        if len(bits) == 0:
+            break
+        read += len(bits)
+        hash.update(bits)
+        if chunks is None:
+            # We're handling a multi-part upload, so switch to calculating
+            # hashes of each chunk
+            if read >= MULTIPART_THRESHOLD:
+                chunks = b''
+        if chunks is not None:
+            if (read % MULTIPART_CHUNKSIZE) == 0:
+                # Dont with a chunk, add it to the list of hashes to hash later
+                chunks += hash.digest()
+                hash = hashlib.md5()
+    if chunks is None:
+        # Normal upload, just output the MD5 hash
+        etag = hash.hexdigest()
+    else:
+        # Multipart upload, need to output the hash of the hashes
+        if (read % MULTIPART_CHUNKSIZE) != 0:
+            # Add the last part if we have a partial chunk
+            chunks += hash.digest()
+        etag = hashlib.md5(chunks).hexdigest() + "-" + str(len(chunks) // 16)
+    return etag
+def create_random_file(file: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
+                       nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
+    """
+    Write to the given file (name/path) some random content. If the given file is None then writes
+    to a temporary file. In either case, returns the file written to. The of bytes written is 1024
+    by default be can be specified with the nbytes argument; default to writing ASCII text but if
+    the binary argument is True then writes binary data as well; if not binary the content is in
+    lines of 80 characters each; use the line_length argumetn in this case to change the line length.
+    """
+    if not isinstance(nbytes, int) or nbytes < 0:
+        nbytes = 0
+    if not isinstance(file, str) or not file:
+        if not isinstance(prefix, str):
+            prefix = ""
+        if not isinstance(suffix, str):
+            suffix = ""
+        file = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}{str(uuid()).replace('-', '')}"
+        file = os.path.join(get_temporary_directory(), file)
+    with open(file, "wb" if binary is True else "w") as f:
+        if binary is True:
+            f.write(os.urandom(nbytes))
+        else:
+            if (not isinstance(line_length, int)) or (line_length < 1):
+                line_length = 80
+            line_length += 1
+            nlines = nbytes // line_length
+            nremainder = nbytes % line_length
+            for n in range(nlines):
+                f.write("".join(random.choices(string.ascii_letters + string.digits, k=line_length - 1)))
+                f.write("\n")
+            if nremainder > 1:
+                f.write("".join(random.choices(string.ascii_letters + string.digits, k=nremainder - 1)))
+            if nremainder > 0:
+                f.write("\n")
+    return file

dcicutils-8.9.0.1b2/dcicutils/http_utils.py ADDED Viewed

@@ -0,0 +1,39 @@
+from contextlib import contextmanager
+import requests
+from typing import Callable, Optional
+from dcicutils.tmpfile_utils import temporary_file
+@contextmanager
+def download(url: str, suffix: Optional[str] = None, binary: bool = True,
+             progress: Optional[Callable] = None) -> Optional[str]:
+    """
+    Context manager to download the given URL into a temporary file and yields the file
+    path to it. An optional file suffix may be specified for this temporary file name.
+    Defaults to binary file mode; if not desired then pass False as the binary argument.
+    """
+    with temporary_file(suffix=suffix) as file:
+        download_to(url, file, binary=binary, progress=progress)
+        yield file
+def download_to(url: str, file: str, binary: bool = True, progress: Optional[Callable] = None) -> None:
+    """
+    Download the given URL into the given file. Defaults to binary
+    file mode; if not desired then pass False as the binary argument.
+    """
+    if not callable(progress):
+        progress = None
+    response = requests.get(url, stream=True)
+    if progress:
+        nbytes = 0
+        nbytes_total = None
+        if isinstance(content_length := response.headers.get("Content-Length"), str) and content_length.isdigit():
+            nbytes_total = int(content_length)
+    with open(file, "wb" if binary is True else "w") as f:
+        for chunk in response.iter_content(chunk_size=8192):
+            if chunk:
+                f.write(chunk)
+            if progress:
+                nbytes += len(chunk)
+                progress(nbytes, nbytes_total)

{dcicutils-8.9.0.0b0 → dcicutils-8.9.0.1b2}/dcicutils/misc_utils.py RENAMED Viewed

@@ -3,6 +3,7 @@ This file contains functions that might be generally useful.
 """
 from collections import namedtuple
+import appdirs
 import contextlib
 import datetime
 import functools
@@ -13,10 +14,12 @@ import json
 import logging
 import math
 import os
+import platform
 import pytz
 import re
 import rfc3986.validators
 import rfc3986.exceptions
+import shortuuid
 import time
 import uuid
 import warnings
@@ -1152,7 +1155,8 @@ def remove_suffix(suffix: str, text: str, required: bool = False):
 def remove_empty_properties(data: Optional[Union[list, dict]],
                             isempty: Optional[Callable] = None,
-                            isempty_array_element: Optional[Callable] = None) -> None:
+                            isempty_array_element: Optional[Callable] = None,
+                            raise_exception_on_nonempty_array_element_after_empty: bool = False) -> None:
     def _isempty(value: Any) -> bool:  # noqa
         return isempty(value) if callable(isempty) else value in [None, "", {}, []]
     if isinstance(data, dict):
@@ -1160,11 +1164,22 @@ def remove_empty_properties(data: Optional[Union[list, dict]],
             if _isempty(value := data[key]):
                 del data[key]
             else:
-                remove_empty_properties(value, isempty=isempty, isempty_array_element=isempty_array_element)
+                remove_empty_properties(value, isempty=isempty, isempty_array_element=isempty_array_element,
+                                        raise_exception_on_nonempty_array_element_after_empty=  # noqa
+                                        raise_exception_on_nonempty_array_element_after_empty)
     elif isinstance(data, list):
         for item in data:
-            remove_empty_properties(item, isempty=isempty, isempty_array_element=isempty_array_element)
+            remove_empty_properties(item, isempty=isempty, isempty_array_element=isempty_array_element,
+                                    raise_exception_on_nonempty_array_element_after_empty=  # noqa
+                                    raise_exception_on_nonempty_array_element_after_empty)
         if callable(isempty_array_element):
+            if raise_exception_on_nonempty_array_element_after_empty is True:
+                empty_element_seen = False
+                for item in data:
+                    if not empty_element_seen and isempty_array_element(item):
+                        empty_element_seen = True
+                    elif empty_element_seen and not isempty_array_element(item):
+                        raise Exception("Non-empty element found after empty element.")
             data[:] = [item for item in data if not isempty_array_element(item)]
@@ -1522,7 +1537,7 @@ def right_trim(list_or_tuple: Union[List[Any], Tuple[Any]],
 def create_dict(**kwargs) -> dict:
     result = {}
     for name in kwargs:
-        if kwargs[name]:
+        if not (kwargs[name] is None):
             result[name] = kwargs[name]
     return result
@@ -2548,6 +2563,19 @@ def normalize_spaces(value: str) -> str:
     return re.sub(r"\s+", " ", value).strip()
+def normalize_string(value: Optional[str]) -> Optional[str]:
+    """
+    Strips leading/trailing spaces, and converts multiple consecutive spaces to a single space
+    in the given string value and returns the result. If the given value is None returns an
+    empty string. If the given value is not actually even a string then return None.
+    """
+    if value is None:
+        return ""
+    elif isinstance(value, str):
+        return re.sub(r"\s+", " ", value).strip()
+    return None
 def find_nth_from_end(string: str, substring: str, nth: int) -> int:
     """
     Returns the index of the nth occurrence of the given substring within
@@ -2590,7 +2618,11 @@ def format_size(nbytes: Union[int, float], precision: int = 2, nospace: bool = F
         nbytes = int(nbytes)
         return f"{nbytes} byte{'s' if nbytes != 1 else ''}"
     unit = (UNITS_TERSE if terse else UNITS)[index]
-    return f"{nbytes:.{precision}f}{'' if nospace else ' '}{unit}"
+    size = f"{nbytes:.{precision}f}"
+    if size.endswith(f".{'0' * precision}"):
+        # Tidy up extraneous zeros.
+        size = size[:-(precision - 1)]
+    return f"{size}{'' if nospace else ' '}{unit}"
 def format_duration(seconds: Union[int, float]) -> str:
@@ -2670,3 +2702,48 @@ class JsonLinesReader:
                 yield line
             else:
                 raise Exception(f"If the first line is not a list, all lines must be dictionaries: {line!r}")
+def get_app_specific_directory() -> str:
+    """
+    Returns the standard system application specific directory:
+    - On MacOS this directory: is: ~/Library/Application Support
+    - On Linux this directory is: ~/.local/share
+    - On Windows this directory is: %USERPROFILE%\\AppData\\Local  # noqa
+    N.B. This is has been tested on MacOS and Linux but not on Windows.
+    """
+    return appdirs.user_data_dir()
+def get_os_name() -> str:
+    if os_name := platform.system():
+        if os_name == "Darwin": return "osx"  # noqa
+        elif os_name == "Linux": return "linux"  # noqa
+        elif os_name == "Windows": return "windows"  # noqa
+    return ""
+def get_cpu_architecture_name() -> str:
+    if os_architecture_name := platform.machine():
+        if os_architecture_name == "x86_64": return "amd64"  # noqa
+        return os_architecture_name
+    return ""
+def create_uuid(nodash: bool = False, upper: bool = False) -> str:
+    value = str(uuid.uuid4())
+    if nodash is True:
+        value = value.replace("-", "")
+    if upper is True:
+        value = value.upper()
+    return value
+def create_short_uuid(length: Optional[int] = None, upper: bool = False):
+    # Not really techincally a uuid of course.
+    if (length is None) or (not isinstance(length, int)) or (length < 1):
+        length = 16
+    value = shortuuid.ShortUUID().random(length=length)
+    if upper is True:
+        value = value.upper()
+    return value

dcicutils 8.9.0.0b0__tar.gz → 8.9.0.1b2__tar.gz

dcicutils 8.9.0.0b0tar.gz → 8.9.0.1b2tar.gz