dcicutils 8.9.0.0b0__py3-none-any.whl → 8.9.0.1b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dcicutils/command_utils.py +69 -1
- dcicutils/creds_utils.py +1 -1
- dcicutils/ff_utils.py +4 -1
- dcicutils/file_utils.py +250 -41
- dcicutils/http_utils.py +39 -0
- dcicutils/misc_utils.py +82 -5
- dcicutils/portal_object_utils.py +24 -89
- dcicutils/portal_utils.py +249 -37
- dcicutils/schema_utils.py +1 -1
- dcicutils/scripts/view_portal_object.py +87 -5
- dcicutils/structured_data.py +59 -17
- dcicutils/submitr/ref_lookup_strategy.py +31 -25
- dcicutils/tmpfile_utils.py +50 -10
- dcicutils/zip_utils.py +27 -0
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/METADATA +6 -4
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/RECORD +19 -18
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/WHEEL +0 -0
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/entry_points.txt +0 -0
    
        dcicutils/command_utils.py
    CHANGED
    
    | @@ -1,3 +1,4 @@ | |
| 1 | 
            +
            from __future__ import annotations
         | 
| 1 2 | 
             
            import contextlib
         | 
| 2 3 | 
             
            import functools
         | 
| 3 4 | 
             
            import glob
         | 
| @@ -7,7 +8,7 @@ import re | |
| 7 8 | 
             
            import requests
         | 
| 8 9 | 
             
            import subprocess
         | 
| 9 10 |  | 
| 10 | 
            -
            from typing import Optional
         | 
| 11 | 
            +
            from typing import Callable, Optional
         | 
| 11 12 | 
             
            from .exceptions import InvalidParameterError
         | 
| 12 13 | 
             
            from .lang_utils import there_are
         | 
| 13 14 | 
             
            from .misc_utils import INPUT, PRINT, environ_bool, print_error_message, decorator
         | 
| @@ -384,3 +385,70 @@ def script_catch_errors(): | |
| 384 385 | 
             
                            message = str(e)  # Note: We ignore the type, which isn't intended to be shown.
         | 
| 385 386 | 
             
                            PRINT(message)
         | 
| 386 387 | 
             
                        exit(1)
         | 
| 388 | 
            +
             | 
| 389 | 
            +
             | 
| 390 | 
            +
            class Question:
         | 
| 391 | 
            +
                """
         | 
| 392 | 
            +
                Supports asking the user (via stdin) a yes/no question, possibly repeatedly; and after
         | 
| 393 | 
            +
                some maximum number times of the same answer in a row (consecutively), then asks them
         | 
| 394 | 
            +
                if they want to automatically give that same answer to any/all subsequent questions.
         | 
| 395 | 
            +
                Supports static/global list of such Question instances, hashed (only) by the question text.
         | 
| 396 | 
            +
                """
         | 
| 397 | 
            +
                _static_instances = {}
         | 
| 398 | 
            +
             | 
| 399 | 
            +
                @staticmethod
         | 
| 400 | 
            +
                def instance(question: Optional[str] = None,
         | 
| 401 | 
            +
                             max: Optional[int] = None, printf: Optional[Callable] = None) -> Question:
         | 
| 402 | 
            +
                    question = question if isinstance(question, str) else ""
         | 
| 403 | 
            +
                    if not (instance := Question._static_instances.get(question)):
         | 
| 404 | 
            +
                        Question._static_instances[question] = (instance := Question(question, max=max, printf=printf))
         | 
| 405 | 
            +
                    return instance
         | 
| 406 | 
            +
             | 
| 407 | 
            +
                @staticmethod
         | 
| 408 | 
            +
                def yes(question: Optional[str] = None,
         | 
| 409 | 
            +
                        max: Optional[int] = None, printf: Optional[Callable] = None) -> bool:
         | 
| 410 | 
            +
                    return Question.instance(question, max=max, printf=printf).ask()
         | 
| 411 | 
            +
             | 
| 412 | 
            +
                def __init__(self, question: Optional[str] = None,
         | 
| 413 | 
            +
                             max: Optional[int] = None, printf: Optional[Callable] = None) -> None:
         | 
| 414 | 
            +
                    self._question = question if isinstance(question, str) else ""
         | 
| 415 | 
            +
                    self._max = max if isinstance(max, int) and max > 0 else None
         | 
| 416 | 
            +
                    self._print = printf if callable(printf) else print
         | 
| 417 | 
            +
                    self._yes_consecutive_count = 0
         | 
| 418 | 
            +
                    self._no_consecutive_count = 0
         | 
| 419 | 
            +
                    self._yes_automatic = False
         | 
| 420 | 
            +
                    self._no_automatic = False
         | 
| 421 | 
            +
             | 
| 422 | 
            +
                def ask(self, question: Optional[str] = None) -> bool:
         | 
| 423 | 
            +
             | 
| 424 | 
            +
                    def question_automatic(value: str) -> bool:
         | 
| 425 | 
            +
                        nonlocal self
         | 
| 426 | 
            +
                        RARROW = "▶"
         | 
| 427 | 
            +
                        LARROW = "◀"
         | 
| 428 | 
            +
                        if yes_or_no(f"{RARROW}{RARROW}{RARROW}"
         | 
| 429 | 
            +
                                     f" Do you want to answer {value} to all such questions?"
         | 
| 430 | 
            +
                                     f" {LARROW}{LARROW}{LARROW}"):
         | 
| 431 | 
            +
                            return True
         | 
| 432 | 
            +
                        self._yes_consecutive_count = 0
         | 
| 433 | 
            +
                        self._no_consecutive_count = 0
         | 
| 434 | 
            +
             | 
| 435 | 
            +
                    if self._yes_automatic:
         | 
| 436 | 
            +
                        return True
         | 
| 437 | 
            +
                    elif self._no_automatic:
         | 
| 438 | 
            +
                        return False
         | 
| 439 | 
            +
                    elif yes_or_no((question if isinstance(question, str) else "") or self._question or "Undefined question"):
         | 
| 440 | 
            +
                        self._yes_consecutive_count += 1
         | 
| 441 | 
            +
                        self._no_consecutive_count = 0
         | 
| 442 | 
            +
                        if (self._no_consecutive_count == 0) and self._max and (self._yes_consecutive_count >= self._max):
         | 
| 443 | 
            +
                            # Have reached the maximum number of consecutive YES answers; ask if YES to all subsequent.
         | 
| 444 | 
            +
                            if question_automatic("YES"):
         | 
| 445 | 
            +
                                self._yes_automatic = True
         | 
| 446 | 
            +
                        return True
         | 
| 447 | 
            +
                    else:
         | 
| 448 | 
            +
                        self._no_consecutive_count += 1
         | 
| 449 | 
            +
                        self._yes_consecutive_count = 0
         | 
| 450 | 
            +
                        if (self._yes_consecutive_count == 0) and self._max and (self._no_consecutive_count >= self._max):
         | 
| 451 | 
            +
                            # Have reached the maximum number of consecutive NO answers; ask if NO to all subsequent.
         | 
| 452 | 
            +
                            if question_automatic("NO"):
         | 
| 453 | 
            +
                                self._no_automatic = True
         | 
| 454 | 
            +
                        return False
         | 
    
        dcicutils/creds_utils.py
    CHANGED
    
    | @@ -170,7 +170,7 @@ class KeyManager: | |
| 170 170 | 
             
                            raise ValueError(f"A KeyManager named {name!r} has already been defined.")
         | 
| 171 171 | 
             
                        key_manager_class._init_class_variables()
         | 
| 172 172 | 
             
                        key_manager_class._REGISTERED = True
         | 
| 173 | 
            -
                        _KEY_MANAGERS[name] =  | 
| 173 | 
            +
                        _KEY_MANAGERS[name] = key_manager_class
         | 
| 174 174 | 
             
                        return key_manager_class
         | 
| 175 175 | 
             
                    return _register_class
         | 
| 176 176 |  | 
    
        dcicutils/ff_utils.py
    CHANGED
    
    | @@ -895,9 +895,12 @@ def _get_es_metadata(uuids, es_client, filters, sources, chunk_size, auth): | |
| 895 895 | 
             
                used to create the generator.
         | 
| 896 896 | 
             
                Should NOT be used directly
         | 
| 897 897 | 
             
                """
         | 
| 898 | 
            +
                def get_es_host_local() -> Optional[str]:
         | 
| 899 | 
            +
                    return os.environ.get("ES_HOST_LOCAL", None)
         | 
| 898 900 | 
             
                health = get_health_page(key=auth)
         | 
| 899 901 | 
             
                if es_client is None:
         | 
| 900 | 
            -
                    es_url  | 
| 902 | 
            +
                    if not (es_url := get_es_host_local()):
         | 
| 903 | 
            +
                        es_url = health['elasticsearch']
         | 
| 901 904 | 
             
                    es_client = es_utils.create_es_client(es_url, use_aws_auth=True)
         | 
| 902 905 | 
             
                namespace_star = health.get('namespace', '') + '*'
         | 
| 903 906 | 
             
                # match all given uuids to _id fields
         | 
    
        dcicutils/file_utils.py
    CHANGED
    
    | @@ -1,13 +1,23 @@ | |
| 1 1 | 
             
            import glob
         | 
| 2 | 
            +
            import hashlib
         | 
| 3 | 
            +
            import io
         | 
| 2 4 | 
             
            import os
         | 
| 3 5 | 
             
            import pathlib
         | 
| 6 | 
            +
            from datetime import datetime
         | 
| 7 | 
            +
            import random
         | 
| 8 | 
            +
            import string
         | 
| 9 | 
            +
            from tempfile import gettempdir as get_temporary_directory
         | 
| 4 10 | 
             
            from typing import List, Optional, Union
         | 
| 11 | 
            +
            from uuid import uuid4 as uuid
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            HOME_DIRECTORY = str(pathlib.Path().home())
         | 
| 5 14 |  | 
| 6 15 |  | 
| 7 16 | 
             
            def search_for_file(file: str,
         | 
| 8 | 
            -
                                location: Union[str, Optional[List[str]]] = None,
         | 
| 17 | 
            +
                                location: Union[str, pathlib.PosixPath, Optional[List[Union[str, pathlib.PosixPath]]]] = None,
         | 
| 9 18 | 
             
                                recursive: bool = False,
         | 
| 10 | 
            -
                                single: bool = False | 
| 19 | 
            +
                                single: bool = False,
         | 
| 20 | 
            +
                                order: bool = True) -> Union[List[str], Optional[str]]:
         | 
| 11 21 | 
             
                """
         | 
| 12 22 | 
             
                Searches for the existence of the given file name, first directly in the given directory or list
         | 
| 13 23 | 
             
                of directories, if specified, and if not then just in the current (working) directory; if the
         | 
| @@ -16,43 +26,242 @@ def search_for_file(file: str, | |
| 16 26 | 
             
                first file which is found is returns (as a string), or None if none; if the single flag
         | 
| 17 27 | 
             
                is False, then all matched files are returned in a list, or and empty list if none.
         | 
| 18 28 | 
             
                """
         | 
| 19 | 
            -
                 | 
| 20 | 
            -
                     | 
| 21 | 
            -
                         | 
| 22 | 
            -
             | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 25 | 
            -
                    if  | 
| 26 | 
            -
             | 
| 27 | 
            -
                     | 
| 28 | 
            -
                         | 
| 29 | 
            -
                     | 
| 30 | 
            -
             | 
| 31 | 
            -
             | 
| 32 | 
            -
             | 
| 29 | 
            +
                def order_by_fewest_number_of_paths_and_then_alphabetically(paths: List[str]) -> List[str]:
         | 
| 30 | 
            +
                    def order_by(path: str):
         | 
| 31 | 
            +
                        return len(path.split(os.path.sep)), path
         | 
| 32 | 
            +
                    return sorted(paths, key=order_by)
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                if not (file and isinstance(file, (str, pathlib.PosixPath))):
         | 
| 35 | 
            +
                    return None if single is True else []
         | 
| 36 | 
            +
                if os.path.isabs(file):
         | 
| 37 | 
            +
                    if os.path.exists(file):
         | 
| 38 | 
            +
                        return str(file) if single is True else [str(file)]
         | 
| 39 | 
            +
                    return None if single is True else []
         | 
| 40 | 
            +
                files_found = []
         | 
| 41 | 
            +
                if not location:
         | 
| 42 | 
            +
                    location = ["."]
         | 
| 43 | 
            +
                elif isinstance(location, (str, pathlib.PosixPath)):
         | 
| 44 | 
            +
                    location = [location]
         | 
| 45 | 
            +
                elif not isinstance(location, list):
         | 
| 46 | 
            +
                    location = []
         | 
| 47 | 
            +
                location_pruned = []
         | 
| 48 | 
            +
                for directory in location:
         | 
| 49 | 
            +
                    if not isinstance(directory, str):
         | 
| 50 | 
            +
                        if not isinstance(directory, pathlib.PosixPath):
         | 
| 51 | 
            +
                            continue
         | 
| 52 | 
            +
                        directory = str(directory)
         | 
| 53 | 
            +
                    if not (directory := directory.strip()):
         | 
| 54 | 
            +
                        continue
         | 
| 55 | 
            +
                    if os.path.isfile(directory := os.path.abspath(os.path.normpath(directory))):
         | 
| 56 | 
            +
                        # Actually, allow a file rather then a directory; assume its parent directory was intended.
         | 
| 57 | 
            +
                        if not (directory := os.path.dirname(directory)):
         | 
| 33 58 | 
             
                            continue
         | 
| 34 | 
            -
             | 
| 35 | 
            -
             | 
| 36 | 
            -
             | 
| 37 | 
            -
             | 
| 38 | 
            -
             | 
| 39 | 
            -
             | 
| 40 | 
            -
             | 
| 41 | 
            -
             | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 44 | 
            -
             | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
| 47 | 
            -
             | 
| 48 | 
            -
             | 
| 49 | 
            -
                             | 
| 50 | 
            -
             | 
| 51 | 
            -
             | 
| 52 | 
            -
             | 
| 53 | 
            -
             | 
| 54 | 
            -
             | 
| 55 | 
            -
             | 
| 56 | 
            -
             | 
| 57 | 
            -
             | 
| 58 | 
            -
             | 
| 59 | 
            +
                    if directory not in location_pruned:
         | 
| 60 | 
            +
                        location_pruned.append(directory)
         | 
| 61 | 
            +
                location = location_pruned
         | 
| 62 | 
            +
                for directory in location:
         | 
| 63 | 
            +
                    if os.path.exists(os.path.join(directory, file)):
         | 
| 64 | 
            +
                        file_found = os.path.abspath(os.path.normpath(os.path.join(directory, file)))
         | 
| 65 | 
            +
                        if single is True:
         | 
| 66 | 
            +
                            return file_found
         | 
| 67 | 
            +
                        if file_found not in files_found:
         | 
| 68 | 
            +
                            files_found.append(file_found)
         | 
| 69 | 
            +
                if recursive is True:
         | 
| 70 | 
            +
                    for directory in location:
         | 
| 71 | 
            +
                        if not directory.endswith("/**") and not file.startswith("**/"):
         | 
| 72 | 
            +
                            path = f"{directory}/**/{file}"
         | 
| 73 | 
            +
                        else:
         | 
| 74 | 
            +
                            path = f"{directory}/{file}"
         | 
| 75 | 
            +
                        files = glob.glob(path, recursive=True if recursive is True else False)
         | 
| 76 | 
            +
                        if files:
         | 
| 77 | 
            +
                            for file_found in files:
         | 
| 78 | 
            +
                                file_found = os.path.abspath(file_found)
         | 
| 79 | 
            +
                                if single is True:
         | 
| 80 | 
            +
                                    return file_found
         | 
| 81 | 
            +
                                if file_found not in files_found:
         | 
| 82 | 
            +
                                    files_found.append(file_found)
         | 
| 83 | 
            +
                if single is True:
         | 
| 84 | 
            +
                    return files_found[0] if files_found else None
         | 
| 85 | 
            +
                elif order is True:
         | 
| 86 | 
            +
                    return order_by_fewest_number_of_paths_and_then_alphabetically(files_found)
         | 
| 87 | 
            +
                else:
         | 
| 88 | 
            +
                    return files_found
         | 
| 89 | 
            +
             | 
| 90 | 
            +
             | 
| 91 | 
            +
            def normalize_path(value: Union[str, pathlib.Path], absolute: bool = False, expand_home: Optional[bool] = None) -> str:
         | 
| 92 | 
            +
                """
         | 
| 93 | 
            +
                Normalizes the given path value and returns the result; does things like remove redundant
         | 
| 94 | 
            +
                consecutive directory separators and redundant parent paths. If the given absolute argument
         | 
| 95 | 
            +
                is True than converts the path to an absolute path. If the given expand_home argument is False
         | 
| 96 | 
            +
                and if the path can reasonably be represented with a home directory indicator (i.e. "~"), then
         | 
| 97 | 
            +
                converts it to such. If the expand_home argument is True and path starts with the home directory
         | 
| 98 | 
            +
                indicator (i.e. "~") then expands it to the actual (absolute) home path of the caller. If the
         | 
| 99 | 
            +
                given path value is not actually even a string (or pathlib.Path) then returns an empty string.
         | 
| 100 | 
            +
                """
         | 
| 101 | 
            +
                if isinstance(value, pathlib.Path):
         | 
| 102 | 
            +
                    value = str(value)
         | 
| 103 | 
            +
                elif not isinstance(value, str):
         | 
| 104 | 
            +
                    return ""
         | 
| 105 | 
            +
                if not (value := value.strip()) or not (value := os.path.normpath(value)):
         | 
| 106 | 
            +
                    return ""
         | 
| 107 | 
            +
                if expand_home is True:
         | 
| 108 | 
            +
                    value = os.path.expanduser(value)
         | 
| 109 | 
            +
                elif (expand_home is False) and (os.name == "posix"):
         | 
| 110 | 
            +
                    if value.startswith(home := HOME_DIRECTORY + os.sep):
         | 
| 111 | 
            +
                        value = "~/" + value[len(home):]
         | 
| 112 | 
            +
                    elif value == HOME_DIRECTORY:
         | 
| 113 | 
            +
                        value = "~"
         | 
| 114 | 
            +
                if absolute is True:
         | 
| 115 | 
            +
                    value = os.path.abspath(value)
         | 
| 116 | 
            +
                return value
         | 
| 117 | 
            +
             | 
| 118 | 
            +
             | 
| 119 | 
            +
            def get_file_size(file: str, raise_exception: bool = True) -> Optional[int]:
         | 
| 120 | 
            +
                try:
         | 
| 121 | 
            +
                    return os.path.getsize(file) if isinstance(file, str) else None
         | 
| 122 | 
            +
                except Exception:
         | 
| 123 | 
            +
                    if raise_exception is True:
         | 
| 124 | 
            +
                        raise
         | 
| 125 | 
            +
                    return None
         | 
| 126 | 
            +
             | 
| 127 | 
            +
             | 
| 128 | 
            +
            def get_file_modified_datetime(file: str, raise_exception: bool = True) -> Optional[datetime]:
         | 
| 129 | 
            +
                try:
         | 
| 130 | 
            +
                    return datetime.fromtimestamp(os.path.getmtime(file)) if isinstance(file, str) else None
         | 
| 131 | 
            +
                except Exception:
         | 
| 132 | 
            +
                    if raise_exception is True:
         | 
| 133 | 
            +
                        raise
         | 
| 134 | 
            +
                    return None
         | 
| 135 | 
            +
             | 
| 136 | 
            +
             | 
| 137 | 
            +
            def are_files_equal(filea: str, fileb: str, raise_exception: bool = True) -> bool:
         | 
| 138 | 
            +
                """
         | 
| 139 | 
            +
                Returns True iff the contents of the two given files are exactly the same.
         | 
| 140 | 
            +
                """
         | 
| 141 | 
            +
                try:
         | 
| 142 | 
            +
                    with open(filea, "rb") as fa:
         | 
| 143 | 
            +
                        with open(fileb, "rb") as fb:
         | 
| 144 | 
            +
                            chunk_size = 4096
         | 
| 145 | 
            +
                            while True:
         | 
| 146 | 
            +
                                chunka = fa.read(chunk_size)
         | 
| 147 | 
            +
                                chunkb = fb.read(chunk_size)
         | 
| 148 | 
            +
                                if chunka != chunkb:
         | 
| 149 | 
            +
                                    return False
         | 
| 150 | 
            +
                                if not chunka:
         | 
| 151 | 
            +
                                    break
         | 
| 152 | 
            +
                    return True
         | 
| 153 | 
            +
                except Exception:
         | 
| 154 | 
            +
                    if raise_exception is True:
         | 
| 155 | 
            +
                        raise
         | 
| 156 | 
            +
                    return False
         | 
| 157 | 
            +
             | 
| 158 | 
            +
             | 
| 159 | 
            +
            def compute_file_md5(file: str, raise_exception: bool = True) -> str:
         | 
| 160 | 
            +
                """
         | 
| 161 | 
            +
                Returns the md5 checksum for the given file.
         | 
| 162 | 
            +
                """
         | 
| 163 | 
            +
                if not isinstance(file, str):
         | 
| 164 | 
            +
                    return ""
         | 
| 165 | 
            +
                try:
         | 
| 166 | 
            +
                    md5 = hashlib.md5()
         | 
| 167 | 
            +
                    with open(file, "rb") as file:
         | 
| 168 | 
            +
                        for chunk in iter(lambda: file.read(4096), b""):
         | 
| 169 | 
            +
                            md5.update(chunk)
         | 
| 170 | 
            +
                    return md5.hexdigest()
         | 
| 171 | 
            +
                except Exception:
         | 
| 172 | 
            +
                    if raise_exception is True:
         | 
| 173 | 
            +
                        raise
         | 
| 174 | 
            +
                    return ""
         | 
| 175 | 
            +
             | 
| 176 | 
            +
             | 
| 177 | 
            +
            def compute_file_etag(file: str, raise_exception: bool = True) -> Optional[str]:
         | 
| 178 | 
            +
                """
         | 
| 179 | 
            +
                Returns the AWS S3 "etag" for the given file; this value is md5-like but
         | 
| 180 | 
            +
                not the same as a normal md5. We use this to compare that a file in S3
         | 
| 181 | 
            +
                appears to be the exact the same file as a local file.
         | 
| 182 | 
            +
                """
         | 
| 183 | 
            +
                try:
         | 
| 184 | 
            +
                    with io.open(file, "rb") as f:
         | 
| 185 | 
            +
                        return _compute_file_etag(f)
         | 
| 186 | 
            +
                except Exception:
         | 
| 187 | 
            +
                    if raise_exception is True:
         | 
| 188 | 
            +
                        raise
         | 
| 189 | 
            +
                    return None
         | 
| 190 | 
            +
             | 
| 191 | 
            +
             | 
| 192 | 
            +
            def _compute_file_etag(f: io.BufferedReader) -> str:
         | 
| 193 | 
            +
                # See: https://stackoverflow.com/questions/75723647/calculate-md5-from-aws-s3-etag
         | 
| 194 | 
            +
                MULTIPART_THRESHOLD = 8388608
         | 
| 195 | 
            +
                MULTIPART_CHUNKSIZE = 8388608
         | 
| 196 | 
            +
                # BUFFER_SIZE = 1048576
         | 
| 197 | 
            +
                # Verify some assumptions are correct
         | 
| 198 | 
            +
                # assert(MULTIPART_CHUNKSIZE >= MULTIPART_THRESHOLD)
         | 
| 199 | 
            +
                # assert((MULTIPART_THRESHOLD % BUFFER_SIZE) == 0)
         | 
| 200 | 
            +
                # assert((MULTIPART_CHUNKSIZE % BUFFER_SIZE) == 0)
         | 
| 201 | 
            +
                hash = hashlib.md5()
         | 
| 202 | 
            +
                read = 0
         | 
| 203 | 
            +
                chunks = None
         | 
| 204 | 
            +
                while True:
         | 
| 205 | 
            +
                    # Read some from stdin, if we're at the end, stop reading
         | 
| 206 | 
            +
                    bits = f.read(1048576)
         | 
| 207 | 
            +
                    if len(bits) == 0:
         | 
| 208 | 
            +
                        break
         | 
| 209 | 
            +
                    read += len(bits)
         | 
| 210 | 
            +
                    hash.update(bits)
         | 
| 211 | 
            +
                    if chunks is None:
         | 
| 212 | 
            +
                        # We're handling a multi-part upload, so switch to calculating
         | 
| 213 | 
            +
                        # hashes of each chunk
         | 
| 214 | 
            +
                        if read >= MULTIPART_THRESHOLD:
         | 
| 215 | 
            +
                            chunks = b''
         | 
| 216 | 
            +
                    if chunks is not None:
         | 
| 217 | 
            +
                        if (read % MULTIPART_CHUNKSIZE) == 0:
         | 
| 218 | 
            +
                            # Dont with a chunk, add it to the list of hashes to hash later
         | 
| 219 | 
            +
                            chunks += hash.digest()
         | 
| 220 | 
            +
                            hash = hashlib.md5()
         | 
| 221 | 
            +
                if chunks is None:
         | 
| 222 | 
            +
                    # Normal upload, just output the MD5 hash
         | 
| 223 | 
            +
                    etag = hash.hexdigest()
         | 
| 224 | 
            +
                else:
         | 
| 225 | 
            +
                    # Multipart upload, need to output the hash of the hashes
         | 
| 226 | 
            +
                    if (read % MULTIPART_CHUNKSIZE) != 0:
         | 
| 227 | 
            +
                        # Add the last part if we have a partial chunk
         | 
| 228 | 
            +
                        chunks += hash.digest()
         | 
| 229 | 
            +
                    etag = hashlib.md5(chunks).hexdigest() + "-" + str(len(chunks) // 16)
         | 
| 230 | 
            +
                return etag
         | 
| 231 | 
            +
             | 
| 232 | 
            +
             | 
| 233 | 
            +
            def create_random_file(file: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
         | 
| 234 | 
            +
                                   nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
         | 
| 235 | 
            +
                """
         | 
| 236 | 
            +
                Write to the given file (name/path) some random content. If the given file is None then writes
         | 
| 237 | 
            +
                to a temporary file. In either case, returns the file written to. The of bytes written is 1024
         | 
| 238 | 
            +
                by default be can be specified with the nbytes argument; default to writing ASCII text but if
         | 
| 239 | 
            +
                the binary argument is True then writes binary data as well; if not binary the content is in
         | 
| 240 | 
            +
                lines of 80 characters each; use the line_length argumetn in this case to change the line length.
         | 
| 241 | 
            +
                """
         | 
| 242 | 
            +
                if not isinstance(nbytes, int) or nbytes < 0:
         | 
| 243 | 
            +
                    nbytes = 0
         | 
| 244 | 
            +
                if not isinstance(file, str) or not file:
         | 
| 245 | 
            +
                    if not isinstance(prefix, str):
         | 
| 246 | 
            +
                        prefix = ""
         | 
| 247 | 
            +
                    if not isinstance(suffix, str):
         | 
| 248 | 
            +
                        suffix = ""
         | 
| 249 | 
            +
                    file = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}{str(uuid()).replace('-', '')}"
         | 
| 250 | 
            +
                    file = os.path.join(get_temporary_directory(), file)
         | 
| 251 | 
            +
                with open(file, "wb" if binary is True else "w") as f:
         | 
| 252 | 
            +
                    if binary is True:
         | 
| 253 | 
            +
                        f.write(os.urandom(nbytes))
         | 
| 254 | 
            +
                    else:
         | 
| 255 | 
            +
                        if (not isinstance(line_length, int)) or (line_length < 1):
         | 
| 256 | 
            +
                            line_length = 80
         | 
| 257 | 
            +
                        line_length += 1
         | 
| 258 | 
            +
                        nlines = nbytes // line_length
         | 
| 259 | 
            +
                        nremainder = nbytes % line_length
         | 
| 260 | 
            +
                        for n in range(nlines):
         | 
| 261 | 
            +
                            f.write("".join(random.choices(string.ascii_letters + string.digits, k=line_length - 1)))
         | 
| 262 | 
            +
                            f.write("\n")
         | 
| 263 | 
            +
                        if nremainder > 1:
         | 
| 264 | 
            +
                            f.write("".join(random.choices(string.ascii_letters + string.digits, k=nremainder - 1)))
         | 
| 265 | 
            +
                        if nremainder > 0:
         | 
| 266 | 
            +
                            f.write("\n")
         | 
| 267 | 
            +
                return file
         | 
    
        dcicutils/http_utils.py
    ADDED
    
    | @@ -0,0 +1,39 @@ | |
| 1 | 
            +
            from contextlib import contextmanager
         | 
| 2 | 
            +
            import requests
         | 
| 3 | 
            +
            from typing import Callable, Optional
         | 
| 4 | 
            +
            from dcicutils.tmpfile_utils import temporary_file
         | 
| 5 | 
            +
             | 
| 6 | 
            +
             | 
| 7 | 
            +
            @contextmanager
         | 
| 8 | 
            +
            def download(url: str, suffix: Optional[str] = None, binary: bool = True,
         | 
| 9 | 
            +
                         progress: Optional[Callable] = None) -> Optional[str]:
         | 
| 10 | 
            +
                """
         | 
| 11 | 
            +
                Context manager to download the given URL into a temporary file and yields the file
         | 
| 12 | 
            +
                path to it. An optional file suffix may be specified for this temporary file name.
         | 
| 13 | 
            +
                Defaults to binary file mode; if not desired then pass False as the binary argument.
         | 
| 14 | 
            +
                """
         | 
| 15 | 
            +
                with temporary_file(suffix=suffix) as file:
         | 
| 16 | 
            +
                    download_to(url, file, binary=binary, progress=progress)
         | 
| 17 | 
            +
                    yield file
         | 
| 18 | 
            +
             | 
| 19 | 
            +
             | 
| 20 | 
            +
            def download_to(url: str, file: str, binary: bool = True, progress: Optional[Callable] = None) -> None:
         | 
| 21 | 
            +
                """
         | 
| 22 | 
            +
                Download the given URL into the given file. Defaults to binary
         | 
| 23 | 
            +
                file mode; if not desired then pass False as the binary argument.
         | 
| 24 | 
            +
                """
         | 
| 25 | 
            +
                if not callable(progress):
         | 
| 26 | 
            +
                    progress = None
         | 
| 27 | 
            +
                response = requests.get(url, stream=True)
         | 
| 28 | 
            +
                if progress:
         | 
| 29 | 
            +
                    nbytes = 0
         | 
| 30 | 
            +
                    nbytes_total = None
         | 
| 31 | 
            +
                    if isinstance(content_length := response.headers.get("Content-Length"), str) and content_length.isdigit():
         | 
| 32 | 
            +
                        nbytes_total = int(content_length)
         | 
| 33 | 
            +
                with open(file, "wb" if binary is True else "w") as f:
         | 
| 34 | 
            +
                    for chunk in response.iter_content(chunk_size=8192):
         | 
| 35 | 
            +
                        if chunk:
         | 
| 36 | 
            +
                            f.write(chunk)
         | 
| 37 | 
            +
                        if progress:
         | 
| 38 | 
            +
                            nbytes += len(chunk)
         | 
| 39 | 
            +
                            progress(nbytes, nbytes_total)
         | 
    
        dcicutils/misc_utils.py
    CHANGED
    
    | @@ -3,6 +3,7 @@ This file contains functions that might be generally useful. | |
| 3 3 | 
             
            """
         | 
| 4 4 |  | 
| 5 5 | 
             
            from collections import namedtuple
         | 
| 6 | 
            +
            import appdirs
         | 
| 6 7 | 
             
            import contextlib
         | 
| 7 8 | 
             
            import datetime
         | 
| 8 9 | 
             
            import functools
         | 
| @@ -13,10 +14,12 @@ import json | |
| 13 14 | 
             
            import logging
         | 
| 14 15 | 
             
            import math
         | 
| 15 16 | 
             
            import os
         | 
| 17 | 
            +
            import platform
         | 
| 16 18 | 
             
            import pytz
         | 
| 17 19 | 
             
            import re
         | 
| 18 20 | 
             
            import rfc3986.validators
         | 
| 19 21 | 
             
            import rfc3986.exceptions
         | 
| 22 | 
            +
            import shortuuid
         | 
| 20 23 | 
             
            import time
         | 
| 21 24 | 
             
            import uuid
         | 
| 22 25 | 
             
            import warnings
         | 
| @@ -1152,7 +1155,8 @@ def remove_suffix(suffix: str, text: str, required: bool = False): | |
| 1152 1155 |  | 
| 1153 1156 | 
             
            def remove_empty_properties(data: Optional[Union[list, dict]],
         | 
| 1154 1157 | 
             
                                        isempty: Optional[Callable] = None,
         | 
| 1155 | 
            -
                                        isempty_array_element: Optional[Callable] = None | 
| 1158 | 
            +
                                        isempty_array_element: Optional[Callable] = None,
         | 
| 1159 | 
            +
                                        raise_exception_on_nonempty_array_element_after_empty: bool = False) -> None:
         | 
| 1156 1160 | 
             
                def _isempty(value: Any) -> bool:  # noqa
         | 
| 1157 1161 | 
             
                    return isempty(value) if callable(isempty) else value in [None, "", {}, []]
         | 
| 1158 1162 | 
             
                if isinstance(data, dict):
         | 
| @@ -1160,11 +1164,22 @@ def remove_empty_properties(data: Optional[Union[list, dict]], | |
| 1160 1164 | 
             
                        if _isempty(value := data[key]):
         | 
| 1161 1165 | 
             
                            del data[key]
         | 
| 1162 1166 | 
             
                        else:
         | 
| 1163 | 
            -
                            remove_empty_properties(value, isempty=isempty, isempty_array_element=isempty_array_element | 
| 1167 | 
            +
                            remove_empty_properties(value, isempty=isempty, isempty_array_element=isempty_array_element,
         | 
| 1168 | 
            +
                                                    raise_exception_on_nonempty_array_element_after_empty=  # noqa
         | 
| 1169 | 
            +
                                                    raise_exception_on_nonempty_array_element_after_empty)
         | 
| 1164 1170 | 
             
                elif isinstance(data, list):
         | 
| 1165 1171 | 
             
                    for item in data:
         | 
| 1166 | 
            -
                        remove_empty_properties(item, isempty=isempty, isempty_array_element=isempty_array_element | 
| 1172 | 
            +
                        remove_empty_properties(item, isempty=isempty, isempty_array_element=isempty_array_element,
         | 
| 1173 | 
            +
                                                raise_exception_on_nonempty_array_element_after_empty=  # noqa
         | 
| 1174 | 
            +
                                                raise_exception_on_nonempty_array_element_after_empty)
         | 
| 1167 1175 | 
             
                    if callable(isempty_array_element):
         | 
| 1176 | 
            +
                        if raise_exception_on_nonempty_array_element_after_empty is True:
         | 
| 1177 | 
            +
                            empty_element_seen = False
         | 
| 1178 | 
            +
                            for item in data:
         | 
| 1179 | 
            +
                                if not empty_element_seen and isempty_array_element(item):
         | 
| 1180 | 
            +
                                    empty_element_seen = True
         | 
| 1181 | 
            +
                                elif empty_element_seen and not isempty_array_element(item):
         | 
| 1182 | 
            +
                                    raise Exception("Non-empty element found after empty element.")
         | 
| 1168 1183 | 
             
                        data[:] = [item for item in data if not isempty_array_element(item)]
         | 
| 1169 1184 |  | 
| 1170 1185 |  | 
| @@ -1522,7 +1537,7 @@ def right_trim(list_or_tuple: Union[List[Any], Tuple[Any]], | |
| 1522 1537 | 
             
            def create_dict(**kwargs) -> dict:
         | 
| 1523 1538 | 
             
                result = {}
         | 
| 1524 1539 | 
             
                for name in kwargs:
         | 
| 1525 | 
            -
                    if kwargs[name]:
         | 
| 1540 | 
            +
                    if not (kwargs[name] is None):
         | 
| 1526 1541 | 
             
                        result[name] = kwargs[name]
         | 
| 1527 1542 | 
             
                return result
         | 
| 1528 1543 |  | 
| @@ -2548,6 +2563,19 @@ def normalize_spaces(value: str) -> str: | |
| 2548 2563 | 
             
                return re.sub(r"\s+", " ", value).strip()
         | 
| 2549 2564 |  | 
| 2550 2565 |  | 
| 2566 | 
            +
            def normalize_string(value: Optional[str]) -> Optional[str]:
         | 
| 2567 | 
            +
                """
         | 
| 2568 | 
            +
                Strips leading/trailing spaces, and converts multiple consecutive spaces to a single space
         | 
| 2569 | 
            +
                in the given string value and returns the result. If the given value is None returns an
         | 
| 2570 | 
            +
                empty string. If the given value is not actually even a string then return None.
         | 
| 2571 | 
            +
                """
         | 
| 2572 | 
            +
                if value is None:
         | 
| 2573 | 
            +
                    return ""
         | 
| 2574 | 
            +
                elif isinstance(value, str):
         | 
| 2575 | 
            +
                    return re.sub(r"\s+", " ", value).strip()
         | 
| 2576 | 
            +
                return None
         | 
| 2577 | 
            +
             | 
| 2578 | 
            +
             | 
| 2551 2579 | 
             
            def find_nth_from_end(string: str, substring: str, nth: int) -> int:
         | 
| 2552 2580 | 
             
                """
         | 
| 2553 2581 | 
             
                Returns the index of the nth occurrence of the given substring within
         | 
| @@ -2590,7 +2618,11 @@ def format_size(nbytes: Union[int, float], precision: int = 2, nospace: bool = F | |
| 2590 2618 | 
             
                    nbytes = int(nbytes)
         | 
| 2591 2619 | 
             
                    return f"{nbytes} byte{'s' if nbytes != 1 else ''}"
         | 
| 2592 2620 | 
             
                unit = (UNITS_TERSE if terse else UNITS)[index]
         | 
| 2593 | 
            -
                 | 
| 2621 | 
            +
                size = f"{nbytes:.{precision}f}"
         | 
| 2622 | 
            +
                if size.endswith(f".{'0' * precision}"):
         | 
| 2623 | 
            +
                    # Tidy up extraneous zeros.
         | 
| 2624 | 
            +
                    size = size[:-(precision - 1)]
         | 
| 2625 | 
            +
                return f"{size}{'' if nospace else ' '}{unit}"
         | 
| 2594 2626 |  | 
| 2595 2627 |  | 
| 2596 2628 | 
             
            def format_duration(seconds: Union[int, float]) -> str:
         | 
| @@ -2670,3 +2702,48 @@ class JsonLinesReader: | |
| 2670 2702 | 
             
                            yield line
         | 
| 2671 2703 | 
             
                        else:
         | 
| 2672 2704 | 
             
                            raise Exception(f"If the first line is not a list, all lines must be dictionaries: {line!r}")
         | 
| 2705 | 
            +
             | 
| 2706 | 
            +
             | 
| 2707 | 
            +
            def get_app_specific_directory() -> str:
         | 
| 2708 | 
            +
                """
         | 
| 2709 | 
            +
                Returns the standard system application specific directory:
         | 
| 2710 | 
            +
                - On MacOS this directory: is: ~/Library/Application Support
         | 
| 2711 | 
            +
                - On Linux this directory is: ~/.local/share
         | 
| 2712 | 
            +
                - On Windows this directory is: %USERPROFILE%\\AppData\\Local  # noqa
         | 
| 2713 | 
            +
                N.B. This is has been tested on MacOS and Linux but not on Windows.
         | 
| 2714 | 
            +
                """
         | 
| 2715 | 
            +
                return appdirs.user_data_dir()
         | 
| 2716 | 
            +
             | 
| 2717 | 
            +
             | 
| 2718 | 
            +
            def get_os_name() -> str:
         | 
| 2719 | 
            +
                if os_name := platform.system():
         | 
| 2720 | 
            +
                    if os_name == "Darwin": return "osx"  # noqa
         | 
| 2721 | 
            +
                    elif os_name == "Linux": return "linux"  # noqa
         | 
| 2722 | 
            +
                    elif os_name == "Windows": return "windows"  # noqa
         | 
| 2723 | 
            +
                return ""
         | 
| 2724 | 
            +
             | 
| 2725 | 
            +
             | 
| 2726 | 
            +
            def get_cpu_architecture_name() -> str:
         | 
| 2727 | 
            +
                if os_architecture_name := platform.machine():
         | 
| 2728 | 
            +
                    if os_architecture_name == "x86_64": return "amd64"  # noqa
         | 
| 2729 | 
            +
                    return os_architecture_name
         | 
| 2730 | 
            +
                return ""
         | 
| 2731 | 
            +
             | 
| 2732 | 
            +
             | 
| 2733 | 
            +
            def create_uuid(nodash: bool = False, upper: bool = False) -> str:
         | 
| 2734 | 
            +
                value = str(uuid.uuid4())
         | 
| 2735 | 
            +
                if nodash is True:
         | 
| 2736 | 
            +
                    value = value.replace("-", "")
         | 
| 2737 | 
            +
                if upper is True:
         | 
| 2738 | 
            +
                    value = value.upper()
         | 
| 2739 | 
            +
                return value
         | 
| 2740 | 
            +
             | 
| 2741 | 
            +
             | 
| 2742 | 
            +
            def create_short_uuid(length: Optional[int] = None, upper: bool = False):
         | 
| 2743 | 
            +
                # Not really techincally a uuid of course.
         | 
| 2744 | 
            +
                if (length is None) or (not isinstance(length, int)) or (length < 1):
         | 
| 2745 | 
            +
                    length = 16
         | 
| 2746 | 
            +
                value = shortuuid.ShortUUID().random(length=length)
         | 
| 2747 | 
            +
                if upper is True:
         | 
| 2748 | 
            +
                    value = value.upper()
         | 
| 2749 | 
            +
                return value
         |