PyPI - dcicutils - Versions diffs - 8.9.0.0b0__py3-none-any.whl → 8.9.0.1b2__py3-none-any.whl - Mend

dcicutils 8.9.0.0b0py3-none-any.whl → 8.9.0.1b2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

dcicutils/command_utils.py +69 -1
dcicutils/creds_utils.py +1 -1
dcicutils/ff_utils.py +4 -1
dcicutils/file_utils.py +250 -41
dcicutils/http_utils.py +39 -0
dcicutils/misc_utils.py +82 -5
dcicutils/portal_object_utils.py +24 -89
dcicutils/portal_utils.py +249 -37
dcicutils/schema_utils.py +1 -1
dcicutils/scripts/view_portal_object.py +87 -5
dcicutils/structured_data.py +59 -17
dcicutils/submitr/ref_lookup_strategy.py +31 -25
dcicutils/tmpfile_utils.py +50 -10
dcicutils/zip_utils.py +27 -0
{dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/METADATA +6 -4
{dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/RECORD +19 -18
{dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/LICENSE.txt +0 -0
{dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/WHEEL +0 -0
{dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/entry_points.txt +0 -0

dcicutils/command_utils.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from __future__ import annotations
 import contextlib
 import functools
 import glob
@@ -7,7 +8,7 @@ import re
 import requests
 import subprocess
-from typing import Optional
+from typing import Callable, Optional
 from .exceptions import InvalidParameterError
 from .lang_utils import there_are
 from .misc_utils import INPUT, PRINT, environ_bool, print_error_message, decorator
@@ -384,3 +385,70 @@ def script_catch_errors():
                 message = str(e)  # Note: We ignore the type, which isn't intended to be shown.
                 PRINT(message)
             exit(1)
+class Question:
+    """
+    Supports asking the user (via stdin) a yes/no question, possibly repeatedly; and after
+    some maximum number times of the same answer in a row (consecutively), then asks them
+    if they want to automatically give that same answer to any/all subsequent questions.
+    Supports static/global list of such Question instances, hashed (only) by the question text.
+    """
+    _static_instances = {}
+    @staticmethod
+    def instance(question: Optional[str] = None,
+                 max: Optional[int] = None, printf: Optional[Callable] = None) -> Question:
+        question = question if isinstance(question, str) else ""
+        if not (instance := Question._static_instances.get(question)):
+            Question._static_instances[question] = (instance := Question(question, max=max, printf=printf))
+        return instance
+    @staticmethod
+    def yes(question: Optional[str] = None,
+            max: Optional[int] = None, printf: Optional[Callable] = None) -> bool:
+        return Question.instance(question, max=max, printf=printf).ask()
+    def __init__(self, question: Optional[str] = None,
+                 max: Optional[int] = None, printf: Optional[Callable] = None) -> None:
+        self._question = question if isinstance(question, str) else ""
+        self._max = max if isinstance(max, int) and max > 0 else None
+        self._print = printf if callable(printf) else print
+        self._yes_consecutive_count = 0
+        self._no_consecutive_count = 0
+        self._yes_automatic = False
+        self._no_automatic = False
+    def ask(self, question: Optional[str] = None) -> bool:
+        def question_automatic(value: str) -> bool:
+            nonlocal self
+            RARROW = "▶"
+            LARROW = "◀"
+            if yes_or_no(f"{RARROW}{RARROW}{RARROW}"
+                         f" Do you want to answer {value} to all such questions?"
+                         f" {LARROW}{LARROW}{LARROW}"):
+                return True
+            self._yes_consecutive_count = 0
+            self._no_consecutive_count = 0
+        if self._yes_automatic:
+            return True
+        elif self._no_automatic:
+            return False
+        elif yes_or_no((question if isinstance(question, str) else "") or self._question or "Undefined question"):
+            self._yes_consecutive_count += 1
+            self._no_consecutive_count = 0
+            if (self._no_consecutive_count == 0) and self._max and (self._yes_consecutive_count >= self._max):
+                # Have reached the maximum number of consecutive YES answers; ask if YES to all subsequent.
+                if question_automatic("YES"):
+                    self._yes_automatic = True
+            return True
+        else:
+            self._no_consecutive_count += 1
+            self._yes_consecutive_count = 0
+            if (self._yes_consecutive_count == 0) and self._max and (self._no_consecutive_count >= self._max):
+                # Have reached the maximum number of consecutive NO answers; ask if NO to all subsequent.
+                if question_automatic("NO"):
+                    self._no_automatic = True
+            return False

dcicutils/creds_utils.py CHANGED Viewed

@@ -170,7 +170,7 @@ class KeyManager:
                 raise ValueError(f"A KeyManager named {name!r} has already been defined.")
             key_manager_class._init_class_variables()
             key_manager_class._REGISTERED = True
-            _KEY_MANAGERS[name] = cls
+            _KEY_MANAGERS[name] = key_manager_class
             return key_manager_class
         return _register_class

dcicutils/ff_utils.py CHANGED Viewed

@@ -895,9 +895,12 @@ def _get_es_metadata(uuids, es_client, filters, sources, chunk_size, auth):
     used to create the generator.
     Should NOT be used directly
     """
+    def get_es_host_local() -> Optional[str]:
+        return os.environ.get("ES_HOST_LOCAL", None)
     health = get_health_page(key=auth)
     if es_client is None:
-        es_url = health['elasticsearch']
+        if not (es_url := get_es_host_local()):
+            es_url = health['elasticsearch']
         es_client = es_utils.create_es_client(es_url, use_aws_auth=True)
     namespace_star = health.get('namespace', '') + '*'
     # match all given uuids to _id fields

dcicutils/file_utils.py CHANGED Viewed

@@ -1,13 +1,23 @@
 import glob
+import hashlib
+import io
 import os
 import pathlib
+from datetime import datetime
+import random
+import string
+from tempfile import gettempdir as get_temporary_directory
 from typing import List, Optional, Union
+from uuid import uuid4 as uuid
+HOME_DIRECTORY = str(pathlib.Path().home())
 def search_for_file(file: str,
-                    location: Union[str, Optional[List[str]]] = None,
+                    location: Union[str, pathlib.PosixPath, Optional[List[Union[str, pathlib.PosixPath]]]] = None,
                     recursive: bool = False,
-                    single: bool = False) -> Union[List[str], Optional[str]]:
+                    single: bool = False,
+                    order: bool = True) -> Union[List[str], Optional[str]]:
     """
     Searches for the existence of the given file name, first directly in the given directory or list
     of directories, if specified, and if not then just in the current (working) directory; if the
@@ -16,43 +26,242 @@ def search_for_file(file: str,
     first file which is found is returns (as a string), or None if none; if the single flag
     is False, then all matched files are returned in a list, or and empty list if none.
     """
-    if file and isinstance(file, (str, pathlib.PosixPath)):
-        if os.path.isabs(file):
-            if os.path.exists(file):
-                return file if single else [file]
-            return None if single else []
-        files_found = []
-        if not location:
-            location = ["."]
-        elif isinstance(location, (str, pathlib.PosixPath)):
-            location = [location]
-        elif not isinstance(location, list):
-            location = []
-        for directory in location:
-            if not directory:
+    def order_by_fewest_number_of_paths_and_then_alphabetically(paths: List[str]) -> List[str]:
+        def order_by(path: str):
+            return len(path.split(os.path.sep)), path
+        return sorted(paths, key=order_by)
+    if not (file and isinstance(file, (str, pathlib.PosixPath))):
+        return None if single is True else []
+    if os.path.isabs(file):
+        if os.path.exists(file):
+            return str(file) if single is True else [str(file)]
+        return None if single is True else []
+    files_found = []
+    if not location:
+        location = ["."]
+    elif isinstance(location, (str, pathlib.PosixPath)):
+        location = [location]
+    elif not isinstance(location, list):
+        location = []
+    location_pruned = []
+    for directory in location:
+        if not isinstance(directory, str):
+            if not isinstance(directory, pathlib.PosixPath):
+                continue
+            directory = str(directory)
+        if not (directory := directory.strip()):
+            continue
+        if os.path.isfile(directory := os.path.abspath(os.path.normpath(directory))):
+            # Actually, allow a file rather then a directory; assume its parent directory was intended.
+            if not (directory := os.path.dirname(directory)):
                 continue
-            if isinstance(directory, (str, pathlib.PosixPath)) and os.path.exists(os.path.join(directory, file)):
-                file_found = os.path.abspath(os.path.normpath(os.path.join(directory, file)))
-                if single:
-                    return file_found
-                if file_found not in files_found:
-                    files_found.append(file_found)
-        if recursive:
-            for directory in location:
-                if not directory:
-                    continue
-                if not directory.endswith("/**") and not file.startswith("**/"):
-                    path = f"{directory}/**/{file}"
-                else:
-                    path = f"{directory}/{file}"
-                files = glob.glob(path, recursive=recursive)
-                if files:
-                    for file_found in files:
-                        file_found = os.path.abspath(file_found)
-                        if single:
-                            return file_found
-                        if file_found not in files_found:
-                            files_found.append(file_found)
-        if files_found:
-            return files_found[0] if single else files_found
-        return None if single else []
+        if directory not in location_pruned:
+            location_pruned.append(directory)
+    location = location_pruned
+    for directory in location:
+        if os.path.exists(os.path.join(directory, file)):
+            file_found = os.path.abspath(os.path.normpath(os.path.join(directory, file)))
+            if single is True:
+                return file_found
+            if file_found not in files_found:
+                files_found.append(file_found)
+    if recursive is True:
+        for directory in location:
+            if not directory.endswith("/**") and not file.startswith("**/"):
+                path = f"{directory}/**/{file}"
+            else:
+                path = f"{directory}/{file}"
+            files = glob.glob(path, recursive=True if recursive is True else False)
+            if files:
+                for file_found in files:
+                    file_found = os.path.abspath(file_found)
+                    if single is True:
+                        return file_found
+                    if file_found not in files_found:
+                        files_found.append(file_found)
+    if single is True:
+        return files_found[0] if files_found else None
+    elif order is True:
+        return order_by_fewest_number_of_paths_and_then_alphabetically(files_found)
+    else:
+        return files_found
+def normalize_path(value: Union[str, pathlib.Path], absolute: bool = False, expand_home: Optional[bool] = None) -> str:
+    """
+    Normalizes the given path value and returns the result; does things like remove redundant
+    consecutive directory separators and redundant parent paths. If the given absolute argument
+    is True than converts the path to an absolute path. If the given expand_home argument is False
+    and if the path can reasonably be represented with a home directory indicator (i.e. "~"), then
+    converts it to such. If the expand_home argument is True and path starts with the home directory
+    indicator (i.e. "~") then expands it to the actual (absolute) home path of the caller. If the
+    given path value is not actually even a string (or pathlib.Path) then returns an empty string.
+    """
+    if isinstance(value, pathlib.Path):
+        value = str(value)
+    elif not isinstance(value, str):
+        return ""
+    if not (value := value.strip()) or not (value := os.path.normpath(value)):
+        return ""
+    if expand_home is True:
+        value = os.path.expanduser(value)
+    elif (expand_home is False) and (os.name == "posix"):
+        if value.startswith(home := HOME_DIRECTORY + os.sep):
+            value = "~/" + value[len(home):]
+        elif value == HOME_DIRECTORY:
+            value = "~"
+    if absolute is True:
+        value = os.path.abspath(value)
+    return value
+def get_file_size(file: str, raise_exception: bool = True) -> Optional[int]:
+    try:
+        return os.path.getsize(file) if isinstance(file, str) else None
+    except Exception:
+        if raise_exception is True:
+            raise
+        return None
+def get_file_modified_datetime(file: str, raise_exception: bool = True) -> Optional[datetime]:
+    try:
+        return datetime.fromtimestamp(os.path.getmtime(file)) if isinstance(file, str) else None
+    except Exception:
+        if raise_exception is True:
+            raise
+        return None
+def are_files_equal(filea: str, fileb: str, raise_exception: bool = True) -> bool:
+    """
+    Returns True iff the contents of the two given files are exactly the same.
+    """
+    try:
+        with open(filea, "rb") as fa:
+            with open(fileb, "rb") as fb:
+                chunk_size = 4096
+                while True:
+                    chunka = fa.read(chunk_size)
+                    chunkb = fb.read(chunk_size)
+                    if chunka != chunkb:
+                        return False
+                    if not chunka:
+                        break
+        return True
+    except Exception:
+        if raise_exception is True:
+            raise
+        return False
+def compute_file_md5(file: str, raise_exception: bool = True) -> str:
+    """
+    Returns the md5 checksum for the given file.
+    """
+    if not isinstance(file, str):
+        return ""
+    try:
+        md5 = hashlib.md5()
+        with open(file, "rb") as file:
+            for chunk in iter(lambda: file.read(4096), b""):
+                md5.update(chunk)
+        return md5.hexdigest()
+    except Exception:
+        if raise_exception is True:
+            raise
+        return ""
+def compute_file_etag(file: str, raise_exception: bool = True) -> Optional[str]:
+    """
+    Returns the AWS S3 "etag" for the given file; this value is md5-like but
+    not the same as a normal md5. We use this to compare that a file in S3
+    appears to be the exact the same file as a local file.
+    """
+    try:
+        with io.open(file, "rb") as f:
+            return _compute_file_etag(f)
+    except Exception:
+        if raise_exception is True:
+            raise
+        return None
+def _compute_file_etag(f: io.BufferedReader) -> str:
+    # See: https://stackoverflow.com/questions/75723647/calculate-md5-from-aws-s3-etag
+    MULTIPART_THRESHOLD = 8388608
+    MULTIPART_CHUNKSIZE = 8388608
+    # BUFFER_SIZE = 1048576
+    # Verify some assumptions are correct
+    # assert(MULTIPART_CHUNKSIZE >= MULTIPART_THRESHOLD)
+    # assert((MULTIPART_THRESHOLD % BUFFER_SIZE) == 0)
+    # assert((MULTIPART_CHUNKSIZE % BUFFER_SIZE) == 0)
+    hash = hashlib.md5()
+    read = 0
+    chunks = None
+    while True:
+        # Read some from stdin, if we're at the end, stop reading
+        bits = f.read(1048576)
+        if len(bits) == 0:
+            break
+        read += len(bits)
+        hash.update(bits)
+        if chunks is None:
+            # We're handling a multi-part upload, so switch to calculating
+            # hashes of each chunk
+            if read >= MULTIPART_THRESHOLD:
+                chunks = b''
+        if chunks is not None:
+            if (read % MULTIPART_CHUNKSIZE) == 0:
+                # Dont with a chunk, add it to the list of hashes to hash later
+                chunks += hash.digest()
+                hash = hashlib.md5()
+    if chunks is None:
+        # Normal upload, just output the MD5 hash
+        etag = hash.hexdigest()
+    else:
+        # Multipart upload, need to output the hash of the hashes
+        if (read % MULTIPART_CHUNKSIZE) != 0:
+            # Add the last part if we have a partial chunk
+            chunks += hash.digest()
+        etag = hashlib.md5(chunks).hexdigest() + "-" + str(len(chunks) // 16)
+    return etag
+def create_random_file(file: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
+                       nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
+    """
+    Write to the given file (name/path) some random content. If the given file is None then writes
+    to a temporary file. In either case, returns the file written to. The of bytes written is 1024
+    by default be can be specified with the nbytes argument; default to writing ASCII text but if
+    the binary argument is True then writes binary data as well; if not binary the content is in
+    lines of 80 characters each; use the line_length argumetn in this case to change the line length.
+    """
+    if not isinstance(nbytes, int) or nbytes < 0:
+        nbytes = 0
+    if not isinstance(file, str) or not file:
+        if not isinstance(prefix, str):
+            prefix = ""
+        if not isinstance(suffix, str):
+            suffix = ""
+        file = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}{str(uuid()).replace('-', '')}"
+        file = os.path.join(get_temporary_directory(), file)
+    with open(file, "wb" if binary is True else "w") as f:
+        if binary is True:
+            f.write(os.urandom(nbytes))
+        else:
+            if (not isinstance(line_length, int)) or (line_length < 1):
+                line_length = 80
+            line_length += 1
+            nlines = nbytes // line_length
+            nremainder = nbytes % line_length
+            for n in range(nlines):
+                f.write("".join(random.choices(string.ascii_letters + string.digits, k=line_length - 1)))
+                f.write("\n")
+            if nremainder > 1:
+                f.write("".join(random.choices(string.ascii_letters + string.digits, k=nremainder - 1)))
+            if nremainder > 0:
+                f.write("\n")
+    return file

dcicutils/http_utils.py ADDED Viewed

@@ -0,0 +1,39 @@
+from contextlib import contextmanager
+import requests
+from typing import Callable, Optional
+from dcicutils.tmpfile_utils import temporary_file
+@contextmanager
+def download(url: str, suffix: Optional[str] = None, binary: bool = True,
+             progress: Optional[Callable] = None) -> Optional[str]:
+    """
+    Context manager to download the given URL into a temporary file and yields the file
+    path to it. An optional file suffix may be specified for this temporary file name.
+    Defaults to binary file mode; if not desired then pass False as the binary argument.
+    """
+    with temporary_file(suffix=suffix) as file:
+        download_to(url, file, binary=binary, progress=progress)
+        yield file
+def download_to(url: str, file: str, binary: bool = True, progress: Optional[Callable] = None) -> None:
+    """
+    Download the given URL into the given file. Defaults to binary
+    file mode; if not desired then pass False as the binary argument.
+    """
+    if not callable(progress):
+        progress = None
+    response = requests.get(url, stream=True)
+    if progress:
+        nbytes = 0
+        nbytes_total = None
+        if isinstance(content_length := response.headers.get("Content-Length"), str) and content_length.isdigit():
+            nbytes_total = int(content_length)
+    with open(file, "wb" if binary is True else "w") as f:
+        for chunk in response.iter_content(chunk_size=8192):
+            if chunk:
+                f.write(chunk)
+            if progress:
+                nbytes += len(chunk)
+                progress(nbytes, nbytes_total)

dcicutils/misc_utils.py CHANGED Viewed

@@ -3,6 +3,7 @@ This file contains functions that might be generally useful.
 """
 from collections import namedtuple
+import appdirs
 import contextlib
 import datetime
 import functools
@@ -13,10 +14,12 @@ import json
 import logging
 import math
 import os
+import platform
 import pytz
 import re
 import rfc3986.validators
 import rfc3986.exceptions
+import shortuuid
 import time
 import uuid
 import warnings
@@ -1152,7 +1155,8 @@ def remove_suffix(suffix: str, text: str, required: bool = False):
 def remove_empty_properties(data: Optional[Union[list, dict]],
                             isempty: Optional[Callable] = None,
-                            isempty_array_element: Optional[Callable] = None) -> None:
+                            isempty_array_element: Optional[Callable] = None,
+                            raise_exception_on_nonempty_array_element_after_empty: bool = False) -> None:
     def _isempty(value: Any) -> bool:  # noqa
         return isempty(value) if callable(isempty) else value in [None, "", {}, []]
     if isinstance(data, dict):
@@ -1160,11 +1164,22 @@ def remove_empty_properties(data: Optional[Union[list, dict]],
             if _isempty(value := data[key]):
                 del data[key]
             else:
-                remove_empty_properties(value, isempty=isempty, isempty_array_element=isempty_array_element)
+                remove_empty_properties(value, isempty=isempty, isempty_array_element=isempty_array_element,
+                                        raise_exception_on_nonempty_array_element_after_empty=  # noqa
+                                        raise_exception_on_nonempty_array_element_after_empty)
     elif isinstance(data, list):
         for item in data:
-            remove_empty_properties(item, isempty=isempty, isempty_array_element=isempty_array_element)
+            remove_empty_properties(item, isempty=isempty, isempty_array_element=isempty_array_element,
+                                    raise_exception_on_nonempty_array_element_after_empty=  # noqa
+                                    raise_exception_on_nonempty_array_element_after_empty)
         if callable(isempty_array_element):
+            if raise_exception_on_nonempty_array_element_after_empty is True:
+                empty_element_seen = False
+                for item in data:
+                    if not empty_element_seen and isempty_array_element(item):
+                        empty_element_seen = True
+                    elif empty_element_seen and not isempty_array_element(item):
+                        raise Exception("Non-empty element found after empty element.")
             data[:] = [item for item in data if not isempty_array_element(item)]
@@ -1522,7 +1537,7 @@ def right_trim(list_or_tuple: Union[List[Any], Tuple[Any]],
 def create_dict(**kwargs) -> dict:
     result = {}
     for name in kwargs:
-        if kwargs[name]:
+        if not (kwargs[name] is None):
             result[name] = kwargs[name]
     return result
@@ -2548,6 +2563,19 @@ def normalize_spaces(value: str) -> str:
     return re.sub(r"\s+", " ", value).strip()
+def normalize_string(value: Optional[str]) -> Optional[str]:
+    """
+    Strips leading/trailing spaces, and converts multiple consecutive spaces to a single space
+    in the given string value and returns the result. If the given value is None returns an
+    empty string. If the given value is not actually even a string then return None.
+    """
+    if value is None:
+        return ""
+    elif isinstance(value, str):
+        return re.sub(r"\s+", " ", value).strip()
+    return None
 def find_nth_from_end(string: str, substring: str, nth: int) -> int:
     """
     Returns the index of the nth occurrence of the given substring within
@@ -2590,7 +2618,11 @@ def format_size(nbytes: Union[int, float], precision: int = 2, nospace: bool = F
         nbytes = int(nbytes)
         return f"{nbytes} byte{'s' if nbytes != 1 else ''}"
     unit = (UNITS_TERSE if terse else UNITS)[index]
-    return f"{nbytes:.{precision}f}{'' if nospace else ' '}{unit}"
+    size = f"{nbytes:.{precision}f}"
+    if size.endswith(f".{'0' * precision}"):
+        # Tidy up extraneous zeros.
+        size = size[:-(precision - 1)]
+    return f"{size}{'' if nospace else ' '}{unit}"
 def format_duration(seconds: Union[int, float]) -> str:
@@ -2670,3 +2702,48 @@ class JsonLinesReader:
                 yield line
             else:
                 raise Exception(f"If the first line is not a list, all lines must be dictionaries: {line!r}")
+def get_app_specific_directory() -> str:
+    """
+    Returns the standard system application specific directory:
+    - On MacOS this directory: is: ~/Library/Application Support
+    - On Linux this directory is: ~/.local/share
+    - On Windows this directory is: %USERPROFILE%\\AppData\\Local  # noqa
+    N.B. This is has been tested on MacOS and Linux but not on Windows.
+    """
+    return appdirs.user_data_dir()
+def get_os_name() -> str:
+    if os_name := platform.system():
+        if os_name == "Darwin": return "osx"  # noqa
+        elif os_name == "Linux": return "linux"  # noqa
+        elif os_name == "Windows": return "windows"  # noqa
+    return ""
+def get_cpu_architecture_name() -> str:
+    if os_architecture_name := platform.machine():
+        if os_architecture_name == "x86_64": return "amd64"  # noqa
+        return os_architecture_name
+    return ""
+def create_uuid(nodash: bool = False, upper: bool = False) -> str:
+    value = str(uuid.uuid4())
+    if nodash is True:
+        value = value.replace("-", "")
+    if upper is True:
+        value = value.upper()
+    return value
+def create_short_uuid(length: Optional[int] = None, upper: bool = False):
+    # Not really techincally a uuid of course.
+    if (length is None) or (not isinstance(length, int)) or (length < 1):
+        length = 16
+    value = shortuuid.ShortUUID().random(length=length)
+    if upper is True:
+        value = value.upper()
+    return value

dcicutils 8.9.0.0b0__py3-none-any.whl → 8.9.0.1b2__py3-none-any.whl

dcicutils 8.9.0.0b0py3-none-any.whl → 8.9.0.1b2py3-none-any.whl