PyPI - dcicutils - Versions diffs - 8.9.0.0b0__py3-none-any.whl → 8.9.0.1b2__py3-none-any.whl - Mend

dcicutils 8.9.0.0b0py3-none-any.whl → 8.9.0.1b2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

dcicutils/command_utils.py +69 -1
dcicutils/creds_utils.py +1 -1
dcicutils/ff_utils.py +4 -1
dcicutils/file_utils.py +250 -41
dcicutils/http_utils.py +39 -0
dcicutils/misc_utils.py +82 -5
dcicutils/portal_object_utils.py +24 -89
dcicutils/portal_utils.py +249 -37
dcicutils/schema_utils.py +1 -1
dcicutils/scripts/view_portal_object.py +87 -5
dcicutils/structured_data.py +59 -17
dcicutils/submitr/ref_lookup_strategy.py +31 -25
dcicutils/tmpfile_utils.py +50 -10
dcicutils/zip_utils.py +27 -0
{dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/METADATA +6 -4
{dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/RECORD +19 -18
{dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/LICENSE.txt +0 -0
{dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/WHEEL +0 -0
{dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/entry_points.txt +0 -0

dcicutils/scripts/view_portal_object.py CHANGED Viewed

@@ -57,6 +57,7 @@
 import argparse
 from functools import lru_cache
+import io
 import json
 import pyperclip
 import os
@@ -97,11 +98,18 @@ def main():
                         help="Include all properties for schema usage.")
     parser.add_argument("--raw", action="store_true", required=False, default=False, help="Raw output.")
     parser.add_argument("--tree", action="store_true", required=False, default=False, help="Tree output for schemas.")
+    parser.add_argument("--post", type=str, required=False, default=None,
+                        help="POST data of the main arg type with data from file specified with this option.")
+    parser.add_argument("--patch", type=str, required=False, default=None,
+                        help="PATCH data of the main arg type with data from file specified with this option.")
     parser.add_argument("--database", action="store_true", required=False, default=False,
                         help="Read from database output.")
+    parser.add_argument("--bool", action="store_true", required=False,
+                        default=False, help="Only return whether found or not.")
     parser.add_argument("--yaml", action="store_true", required=False, default=False, help="YAML output.")
     parser.add_argument("--copy", "-c", action="store_true", required=False, default=False,
                         help="Copy object data to clipboard.")
+    parser.add_argument("--indent", required=False, default=False, help="Indent output.", type=int)
     parser.add_argument("--details", action="store_true", required=False, default=False, help="Detailed output.")
     parser.add_argument("--more-details", action="store_true", required=False, default=False,
                         help="More detailed output.")
@@ -151,6 +159,18 @@ def main():
         args.schema = True
     if args.schema:
+        if args.post:
+            if post_data := _read_json_from_file(args.post):
+                if args.verbose:
+                    _print(f"POSTing data from file ({args.post}) as type: {args.uuid}")
+                if isinstance(post_data, dict):
+                    post_data = [post_data]
+                elif not isinstance(post_data, list):
+                    _print(f"POST data neither list nor dictionary: {args.post}")
+                for item in post_data:
+                    portal.post_metadata(args.uuid, item)
+                if args.verbose:
+                    _print(f"Done POSTing data from file ({args.post}) as type: {args.uuid}")
         schema, schema_name = _get_schema(portal, args.uuid)
         if schema:
             if args.copy:
@@ -166,14 +186,50 @@ def main():
             _print_schema(schema, details=args.details, more_details=args.details,
                           all=args.all, raw=args.raw, raw_yaml=args.yaml)
             return
-    data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw, database=args.database, verbose=args.verbose)
+    elif args.patch:
+        if patch_data := _read_json_from_file(args.patch):
+            if args.verbose:
+                _print(f"PATCHing data from file ({args.patch}) for object: {args.uuid}")
+            if isinstance(patch_data, dict):
+                patch_data = [patch_data]
+            elif not isinstance(patch_data, list):
+                _print(f"PATCH data neither list nor dictionary: {args.patch}")
+            for item in patch_data:
+                portal.patch_metadata(args.uuid, item)
+            if args.verbose:
+                _print(f"Done PATCHing data from file ({args.patch}) as type: {args.uuid}")
+            return
+        else:
+            _print(f"No PATCH data found in file: {args.patch}")
+            exit(1)
+    data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw,
+                              database=args.database, check=args.bool, verbose=args.verbose)
+    if args.bool:
+        if data:
+            _print(f"{args.uuid}: found")
+            exit(0)
+        else:
+            _print(f"{args.uuid}: not found")
+            exit(1)
     if args.copy:
         pyperclip.copy(json.dumps(data, indent=4))
     if args.yaml:
         _print(yaml.dump(data))
     else:
-        _print(json.dumps(data, default=str, indent=4))
+        if args.indent > 0:
+            _print(_format_json_with_indent(data, indent=args.indent))
+        else:
+            _print(json.dumps(data, default=str, indent=4))
+def _format_json_with_indent(value: dict, indent: int = 0) -> Optional[str]:
+    if isinstance(value, dict):
+        result = json.dumps(value, indent=4)
+        if indent > 0:
+            result = f"{indent * ' '}{result}"
+            result = result.replace("\n", f"\n{indent * ' '}")
+        return result
 def _create_portal(ini: str, env: Optional[str] = None,
@@ -198,7 +254,8 @@ def _create_portal(ini: str, env: Optional[str] = None,
 def _get_portal_object(portal: Portal, uuid: str,
-                       raw: bool = False, database: bool = False, verbose: bool = False) -> dict:
+                       raw: bool = False, database: bool = False,
+                       check: bool = False, verbose: bool = False) -> dict:
     response = None
     try:
         if not uuid.startswith("/"):
@@ -212,13 +269,18 @@ def _get_portal_object(portal: Portal, uuid: str,
             _exit()
         _exit(f"Exception getting Portal object from {portal.server}: {uuid}\n{get_error_message(e)}")
     if not response:
+        if check:
+            return None
         _exit(f"Null response getting Portal object from {portal.server}: {uuid}")
     if response.status_code not in [200, 307]:
         # TODO: Understand why the /me endpoint returns HTTP status code 307, which is only why we mention it above.
         _exit(f"Invalid status code ({response.status_code}) getting Portal object from {portal.server}: {uuid}")
     if not response.json:
         _exit(f"Invalid JSON getting Portal object: {uuid}")
-    return response.json()
+    response = response.json()
+    if raw:
+        response.pop("schema_version", None)
+    return response
 @lru_cache(maxsize=1)
@@ -257,6 +319,7 @@ def _print_schema_info(schema: dict, level: int = 0,
                        required: Optional[List[str]] = None) -> None:
     if not schema or not isinstance(schema, dict):
         return
+    identifying_properties = schema.get("identifyingProperties")
     if level == 0:
         if required_properties := schema.get("required"):
             _print("- required properties:")
@@ -383,6 +446,8 @@ def _print_schema_info(schema: dict, level: int = 0,
                         suffix += f" | enum"
                     if property_required:
                         suffix += f" | required"
+                    if property_name in (identifying_properties or []):
+                        suffix += f" | identifying"
                     if property.get("uniqueKey"):
                         suffix += f" | unique"
                     if pattern := property.get("pattern"):
@@ -529,6 +594,23 @@ def _print_tree(root_name: Optional[str],
         print(line)
+def _read_json_from_file(file: str) -> Optional[dict]:
+    if not os.path.exists(file):
+        _print(f"Cannot find file: {file}")
+        exit(1)
+    try:
+        with io.open(file, "r") as f:
+            try:
+                return json.load(f)
+            except Exception:
+                _print(f"Cannot parse JSON in file: {file}")
+                exit(1)
+    except Exception as e:
+        print(e)
+        _print(f"Cannot open file: {file}")
+        exit(1)
 def _print(*args, **kwargs):
     with uncaptured_output():
         PRINT(*args, **kwargs)

dcicutils/structured_data.py CHANGED Viewed

@@ -11,7 +11,6 @@ from webtest.app import TestApp
 from dcicutils.common import OrchestratedApp
 from dcicutils.data_readers import CsvReader, Excel, RowReader
 from dcicutils.datetime_utils import normalize_date_string, normalize_datetime_string
-from dcicutils.file_utils import search_for_file
 from dcicutils.misc_utils import (create_dict, create_readonly_object, is_uuid, load_json_if,
                                   merge_objects, remove_empty_properties, right_trim, split_string,
                                   to_boolean, to_enum, to_float, to_integer, VirtualApp)
@@ -53,9 +52,10 @@ class StructuredDataSet:
     def __init__(self, file: Optional[str] = None, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None,
                  schemas: Optional[List[dict]] = None, autoadd: Optional[dict] = None,
                  order: Optional[List[str]] = None, prune: bool = True,
+                 remove_empty_objects_from_lists: bool = True,
                  ref_lookup_strategy: Optional[Callable] = None,
                  ref_lookup_nocache: bool = False,
-                 norefs: bool = False,
+                 norefs: bool = False, merge: bool = False,
                  progress: Optional[Callable] = None,
                  debug_sleep: Optional[str] = None) -> None:
         self._progress = progress if callable(progress) else None
@@ -65,7 +65,8 @@ class StructuredDataSet:
                               ref_lookup_nocache=ref_lookup_nocache) if portal else None
         self._ref_lookup_strategy = ref_lookup_strategy
         self._order = order
-        self._prune = prune
+        self._prune = prune is True
+        self._remove_empty_objects_from_lists = remove_empty_objects_from_lists is True
         self._warnings = {}
         self._errors = {}
         self._resolved_refs = set()
@@ -73,6 +74,7 @@ class StructuredDataSet:
         self._nrows = 0
         self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None
         self._norefs = True if norefs is True else False
+        self._merge = True if merge is True else False
         self._debug_sleep = None
         if debug_sleep:
             try:
@@ -93,14 +95,16 @@ class StructuredDataSet:
     def load(file: str, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None,
              schemas: Optional[List[dict]] = None, autoadd: Optional[dict] = None,
              order: Optional[List[str]] = None, prune: bool = True,
+             remove_empty_objects_from_lists: bool = True,
              ref_lookup_strategy: Optional[Callable] = None,
              ref_lookup_nocache: bool = False,
-             norefs: bool = False,
+             norefs: bool = False, merge: bool = False,
              progress: Optional[Callable] = None,
              debug_sleep: Optional[str] = None) -> StructuredDataSet:
         return StructuredDataSet(file=file, portal=portal, schemas=schemas, autoadd=autoadd, order=order, prune=prune,
+                                 remove_empty_objects_from_lists=remove_empty_objects_from_lists,
                                  ref_lookup_strategy=ref_lookup_strategy, ref_lookup_nocache=ref_lookup_nocache,
-                                 norefs=norefs, progress=progress, debug_sleep=debug_sleep)
+                                 norefs=norefs, merge=merge, progress=progress, debug_sleep=debug_sleep)
     def validate(self, force: bool = False) -> None:
         def data_without_deleted_properties(data: dict) -> dict:
@@ -204,14 +208,6 @@ class StructuredDataSet:
                             result.append({"type": type_name, "file": file_name})
         return result
-    def upload_files_located(self,
-                             location: Union[str, Optional[List[str]]] = None, recursive: bool = False) -> List[str]:
-        upload_files = copy.deepcopy(self.upload_files)
-        for upload_file in upload_files:
-            if file_path := search_for_file(upload_file["file"], location, recursive=recursive, single=True):
-                upload_file["path"] = file_path
-        return upload_files
     @property
     def nrows(self) -> int:
         return self._nrows
@@ -346,7 +342,23 @@ class StructuredDataSet:
     def _load_json_file(self, file: str) -> None:
         with open(file) as f:
-            self._add(Schema.type_name(file), json.load(f))
+            data = json.load(f)
+            if ((schema_name_inferred_from_file_name := Schema.type_name(file)) and
+                (self._portal.get_schema(schema_name_inferred_from_file_name) is not None)):  # noqa
+                # If the JSON file name looks like a schema name then assume it
+                # contains an object or an array of object of that schema type.
+                if self._merge:
+                    data = self._merge_with_existing_portal_object(data, schema_name_inferred_from_file_name)
+                self._add(Schema.type_name(file), data)
+            elif isinstance(data, dict):
+                # Otherwise if the JSON file name does not look like a schema name then
+                # assume it a dictionary where each property is the name of a schema, and
+                # which (each property) contains a list of object of that schema type.
+                for schema_name in data:
+                    item = data[schema_name]
+                    if self._merge:
+                        item = self._merge_with_existing_portal_object(item, schema_name)
+                    self._add(schema_name, item)
     def _load_reader(self, reader: RowReader, type_name: str) -> None:
         schema = None
@@ -368,7 +380,14 @@ class StructuredDataSet:
                 structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number)
                 if self._autoadd_properties:
                     self._add_properties(structured_row, self._autoadd_properties, schema)
-            self._add(type_name, structured_row)
+            # New merge functionality (2024-05-25).
+            if self._merge:
+                structured_row = self._merge_with_existing_portal_object(structured_row, schema_name)
+            if (prune_error := self._prune_structured_row(structured_row)) is not None:
+                self._note_error({"src": create_dict(type=schema_name, row=reader.row_number),
+                                  "error": prune_error}, "validation")
+            else:
+                self._add(type_name, structured_row)  # TODO: why type_name and not schema_name?
             if self._progress:
                 self._progress({
                     PROGRESS.LOAD_ITEM: self._nrows,
@@ -385,9 +404,20 @@ class StructuredDataSet:
             self._note_error(schema._unresolved_refs, "ref")
             self._resolved_refs.update(schema._resolved_refs)
-    def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None:
-        if self._prune:
+    def _prune_structured_row(self, data: dict) -> Optional[str]:
+        if not self._prune:
+            return None
+        if not self._remove_empty_objects_from_lists:
             remove_empty_properties(data)
+            return None
+        try:
+            remove_empty_properties(data, isempty_array_element=lambda element: element == {},
+                                    raise_exception_on_nonempty_array_element_after_empty=True)
+        except Exception as e:
+            return str(e)
+        return None
+    def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None:
         if type_name in self._data:
             self._data[type_name].extend([data] if isinstance(data, dict) else data)
         else:
@@ -398,6 +428,18 @@ class StructuredDataSet:
             if name not in structured_row and (not schema or schema.data.get("properties", {}).get(name)):
                 structured_row[name] = properties[name]
+    def _merge_with_existing_portal_object(self, portal_object: dict, portal_type: str) -> dict:
+        """
+        Given a Portal object (presumably/in-practice from the given metadata), if there is
+        an existing Portal item, identified by the identifying properties for the given object,
+        then merges the given object into the existing one and returns the result; otherwise
+        just returns the given object. Note that the given object may be CHANGED in place.
+        """
+        for identifying_path in self._portal.get_identifying_paths(portal_object, portal_type):
+            if existing_portal_object := self._portal.get_metadata(identifying_path, raw=True, raise_exception=False):
+                return merge_objects(existing_portal_object, portal_object)
+        return portal_object
     def _is_ref_lookup_specified_type(ref_lookup_flags: int) -> bool:
         return (ref_lookup_flags &
                 Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE

dcicutils/submitr/ref_lookup_strategy.py CHANGED Viewed

@@ -2,39 +2,45 @@ import re
 from typing import Optional
 from dcicutils.structured_data import Portal
+# This function is exposed (to smaht-portal/ingester and smaht-submitr) only because previously,
+# before it was fully developed, we had differing behaviors; but this has been unified; so this
+# could now be internalized to structured_data, and portal_object_utils (TODO).
 def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
     #
-    # FYI: Note this situation WRT object lookups ...
-    #
-    # /{submitted_id}                # NOT FOUND
-    # /UnalignedReads/{submitted_id} # OK
-    # /SubmittedFile/{submitted_id}  # OK
-    # /File/{submitted_id}           # NOT FOUND
-    #
-    # /{accession}                   # OK
-    # /UnalignedReads/{accession}    # NOT FOUND
-    # /SubmittedFile/{accession}     # NOT FOUND
-    # /File/{accession}              # OK
+    # Note this slight odd situation WRT object lookups by submitted_id and accession:
+    # -----------------------------+-----------------------------------------------+---------------+
+    # PATH                         | EXAMPLE                                       | LOOKUP RESULT |
+    # -----------------------------+-----------------------------------------------+---------------+
+    # /submitted_id                | //UW_FILE-SET_COLO-829BL_HI-C_1               | NOT FOUND     |
+    # /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND         |
+    # /SubmittedFile/submitted_id  | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1  | FOUND         |
+    # /File/submitted_id           | /File/UW_FILE-SET_COLO-829BL_HI-C_1           | NOT FOUND     |
+    # -----------------------------+-----------------------------------------------+---------------+
+    # /accession                   | /SMAFSFXF1RO4                                 | FOUND         |
+    # /UnalignedReads/accession    | /UnalignedReads/SMAFSFXF1RO4                  | NOT FOUND     |
+    # /SubmittedFile/accession     | /SubmittedFile/SMAFSFXF1RO4                   | NOT FOUND     |
+    # /File/accession              | /File/SMAFSFXF1RO4                            | FOUND         |
+    # -----------------------------+-----------------------------------------------+---------------+
     #
     def ref_validator(schema: Optional[dict],
                       property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
         """
-        Returns False iff the type represented by the given schema, can NOT be referenced by
-        the given property name with the given property value, otherwise returns None.
+        Returns False iff objects of type represented by the given schema, CANNOT be referenced with
+        a Portal path using the given property name and its given property value, otherwise returns None.
-        For example, if the schema is for the UnalignedReads type and the property name
-        is accession, then we will return False iff the given property value is NOT a properly
-        formatted accession ID. Otherwise, we will return None, which indicates that the
-        caller (in dcicutils.structured_data.Portal.ref_exists) will continue executing
-        its default behavior, which is to check other ways in which the given type can NOT
-        be referenced by the given value, i.e. it checks other identifying properties for
-        the type and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
+        For example, if the schema is for UnalignedReads and the property name is accession, then we will
+        return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
+        will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
+        will continue executing its default behavior, which is to check other ways in which the given type
+        CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
+        and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
-        The goal (in structured_data) being to detect if a type is being referenced in such
-        a way that cannot possibly be allowed, i.e. because none of its identifying types
-        are in the required form (if indeed there any requirements). Note that it is guaranteed
-        that the given property name is indeed an identifying property for the given type.
+        The goal (in structured_data) being to detect if a type is being referenced in such a way that
+        CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
+        if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
+        identifying property for the given type.
         """
         if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
             if (property_format == "accession") and (property_name == "accession"):
@@ -62,6 +68,6 @@ def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str
 # This is here for now because of problems with circular dependencies.
-# See: smaht-portal/.../schema_formats.py
+# See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
 def _is_accession_id(value: str) -> bool:
     return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None

dcicutils/tmpfile_utils.py CHANGED Viewed

@@ -1,8 +1,11 @@
 from contextlib import contextmanager
+from datetime import datetime
 import os
 import shutil
 import tempfile
+from uuid import uuid4 as uuid
 from typing import List, Optional, Union
+from dcicutils.file_utils import create_random_file
 @contextmanager
@@ -15,22 +18,59 @@ def temporary_directory() -> str:
 @contextmanager
-def temporary_file(name: Optional[str] = None, suffix: Optional[str] = None,
+def temporary_file(name: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
                    content: Optional[Union[str, bytes, List[str]]] = None) -> str:
     with temporary_directory() as tmp_directory_name:
-        tmp_file_name = os.path.join(tmp_directory_name, name or tempfile.mktemp(dir="")) + (suffix or "")
-        with open(tmp_file_name, "wb" if isinstance(content, bytes) else "w") as tmp_file:
+        tmp_file_name = f"{prefix or ''}{name or tempfile.mktemp(dir='')}{suffix or ''}"
+        tmp_file_path = os.path.join(tmp_directory_name, tmp_file_name)
+        with open(tmp_file_path, "wb" if isinstance(content, bytes) else "w") as tmp_file:
             if content is not None:
                 tmp_file.write("\n".join(content) if isinstance(content, list) else content)
-        yield tmp_file_name
+        yield tmp_file_path
+def create_temporary_file_name(prefix: Optional[str] = None, suffix: Optional[str] = None) -> str:
+    """
+    Generates and returns the full path to file within the system temporary directory.
+    """
+    random_string = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}{str(uuid()).replace('-', '')}"
+    tmp_file_name = f"{prefix or ''}{random_string}{suffix or ''}"
+    return os.path.join(tempfile.gettempdir(), tmp_file_name)
+@contextmanager
+def temporary_random_file(prefix: Optional[str] = None, suffix: Optional[str] = None,
+                          nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
+    with temporary_file(prefix=prefix, suffix=suffix) as tmp_file_path:
+        create_random_file(tmp_file_path, nbytes=nbytes, binary=binary, line_length=line_length)
+        yield tmp_file_path
 def remove_temporary_directory(tmp_directory_name: str) -> None:
-    def is_temporary_directory(path: str) -> bool:
-        try:
-            tmpdir = tempfile.gettempdir()
-            return os.path.commonpath([path, tmpdir]) == tmpdir and os.path.exists(path) and os.path.isdir(path)
-        except Exception:
-            return False
+    """
+    Removes the given directory, recursively; but ONLY if it is (somewhere) within the system temporary directory.
+    """
     if is_temporary_directory(tmp_directory_name):  # Guard against errant deletion.
         shutil.rmtree(tmp_directory_name)
+def remove_temporary_file(tmp_file_name: str) -> bool:
+    """
+    Removes the given file; but ONLY if it is (somewhere) within the system temporary directory.
+    """
+    try:
+        tmpdir = tempfile.gettempdir()
+        if (os.path.commonpath([tmpdir, tmp_file_name]) == tmpdir) and os.path.isfile(tmp_file_name):
+            os.remove(tmp_file_name)
+            return True
+        return False
+    except Exception:
+        return False
+def is_temporary_directory(path: str) -> bool:
+    try:
+        tmpdir = tempfile.gettempdir()
+        return os.path.commonpath([path, tmpdir]) == tmpdir and os.path.exists(path) and os.path.isdir(path)
+    except Exception:
+        return False

dcicutils/zip_utils.py CHANGED Viewed

@@ -2,7 +2,9 @@ from contextlib import contextmanager
 from dcicutils.tmpfile_utils import temporary_directory, temporary_file
 import gzip
 import os
+import shutil
 import tarfile
+import tempfile
 from typing import List, Optional
 import zipfile
@@ -45,3 +47,28 @@ def unpack_gz_file_to_temporary_file(file: str, suffix: Optional[str] = None) ->
                     outputf.write(inputf.read())
                     outputf.close()
                     yield tmp_file_name
+def extract_file_from_zip(zip_file: str, file_to_extract: str,
+                          destination_file: str, raise_exception: bool = True) -> bool:
+    """
+    Extracts from the given zip file, the given file to extract, writing it to the
+    given destination file. Returns True if all is well, otherwise False, or if the
+    raise_exception argument is True (the default), then raises and exception on error.
+    """
+    try:
+        if not (destination_directory := os.path.dirname(destination_file)):
+            destination_directory = os.getcwd()
+            destination_file = os.path.join(destination_directory, destination_file)
+        with tempfile.TemporaryDirectory() as tmp_directory_name:
+            with zipfile.ZipFile(zip_file, "r") as zipf:
+                if file_to_extract not in zipf.namelist():
+                    return False
+                zipf.extract(file_to_extract, path=tmp_directory_name)
+                os.makedirs(destination_directory, exist_ok=True)
+                shutil.move(os.path.join(tmp_directory_name, file_to_extract), destination_file)
+            return True
+    except Exception as e:
+        if raise_exception:
+            raise e
+    return False

{dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/METADATA RENAMED Viewed

@@ -1,12 +1,12 @@
 Metadata-Version: 2.1
 Name: dcicutils
-Version: 8.9.0.0b0
+Version: 8.9.0.1b2
 Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
 Home-page: https://github.com/4dn-dcic/utils
 License: MIT
 Author: 4DN-DCIC Team
 Author-email: support@4dnucleome.org
-Requires-Python: >=3.8,<3.12
+Requires-Python: >=3.8,<3.13
 Classifier: Development Status :: 4 - Beta
 Classifier: Intended Audience :: Developers
 Classifier: Intended Audience :: Science/Research
@@ -24,9 +24,10 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Topic :: Database :: Database Engines/Servers
 Requires-Dist: PyJWT (>=2.6.0,<3.0.0)
 Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
+Requires-Dist: appdirs (>=1.4.4,<2.0.0)
 Requires-Dist: aws-requests-auth (>=0.4.2,<1)
-Requires-Dist: boto3 (>=1.28.57,<2.0.0)
-Requires-Dist: botocore (>=1.31.57,<2.0.0)
+Requires-Dist: boto3 (>=1.34.93,<2.0.0)
+Requires-Dist: botocore (>=1.34.93,<2.0.0)
 Requires-Dist: chardet (>=5.2.0,<6.0.0)
 Requires-Dist: docker (>=4.4.4,<5.0.0)
 Requires-Dist: elasticsearch (==7.13.4)
@@ -42,6 +43,7 @@ Requires-Dist: pytz (>=2020.4)
 Requires-Dist: redis (>=4.5.1,<5.0.0)
 Requires-Dist: requests (>=2.21.0,<3.0.0)
 Requires-Dist: rfc3986 (>=1.4.0,<2.0.0)
+Requires-Dist: shortuuid (>=1.0.13,<2.0.0)
 Requires-Dist: structlog (>=19.2.0,<20.0.0)
 Requires-Dist: toml (>=0.10.1,<1)
 Requires-Dist: tqdm (>=4.66.2,<5.0.0)

{dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/RECORD RENAMED Viewed

@@ -5,11 +5,11 @@ dcicutils/bundle_utils.py,sha256=ZVQcqlt7Yly8-YbL3A-5DW859_hMWpTL6dXtknEYZIw,346
 dcicutils/captured_output.py,sha256=0hP7sPwleMaYXQAvCfJOxG8Z8T_JJYy8ADp8A5ZoblE,3295
 dcicutils/cloudformation_utils.py,sha256=MtWJrSTXyiImgbPHgRvfH9bWso20ZPLTFJAfhDQSVj4,13786
 dcicutils/codebuild_utils.py,sha256=CKpmhJ-Z8gYbkt1I2zyMlKtFdsg7T8lqrx3V5ieta-U,1155
-dcicutils/command_utils.py,sha256=JExll5TMqIcmuiGvuS8q4XDUvoEfi2oSH0E2FVF6suU,15285
+dcicutils/command_utils.py,sha256=1_h18LGX86sLAkRkH33HNmBkwMb7v2wAh3jL01hzceU,18487
 dcicutils/common.py,sha256=YE8Mt5-vaZWWz4uaChSVhqGFbFtW5QKtnIyOr4zG4vM,3955
 dcicutils/contribution_scripts.py,sha256=0k5Gw1TumcD5SAcXVkDd6-yvuMEw-jUp5Kfb7FJH6XQ,2015
 dcicutils/contribution_utils.py,sha256=vYLS1JUB3sKd24BUxZ29qUBqYeQBLK9cwo8x3k64uPg,25653
-dcicutils/creds_utils.py,sha256=xrLekD49Ex0GOpL9n7LlJA4gvNcY7txTVFOSYD7LvEU,11113
+dcicutils/creds_utils.py,sha256=64BbIfS90T1eJmmQJrDyfrRa3V2F1x7T8lOrEeFfqJY,11127
 dcicutils/data_readers.py,sha256=6EMrY7TjDE8H7bA_TCWtpLQP7slJ0YTL77_dNh6e7sg,7626
 dcicutils/data_utils.py,sha256=k2OxOlsx7AJ6jF-YNlMyGus_JqSUBe4_n1s65Mv1gQQ,3098
 dcicutils/datetime_utils.py,sha256=sM653aw_1zy1qBmfAH-WetCi2Fw9cnFK7FZN_Tg4onI,13499
@@ -27,10 +27,11 @@ dcicutils/env_utils_legacy.py,sha256=J81OAtJHN69o1beHO6q1j7_J6TeblSjnAHlS8VA5KSM
 dcicutils/es_utils.py,sha256=ZksLh5ei7kRUfiFltk8sd2ZSfh15twbstrMzBr8HNw4,7541
 dcicutils/exceptions.py,sha256=4giQGtpak-omQv7BP6Ckeu91XK5fnDosC8gfdmN_ccA,9931
 dcicutils/ff_mocks.py,sha256=6RKS4eUiu_Wl8yP_8V0CaV75w4ZdWxdCuL1CVlnMrek,36918
-dcicutils/ff_utils.py,sha256=Yf-fET5gdpjrH0gikpOCIJdY2Dv3obzUpR31ur816mU,72972
-dcicutils/file_utils.py,sha256=098rXvLeIh8n69EGW7DpOS227ef3BPgwhRAktoU6mhE,2663
+dcicutils/ff_utils.py,sha256=oIhuZPnGtfwj6bWyCc1u23JbMB_6InPp01ZqUOljd8M,73123
+dcicutils/file_utils.py,sha256=zyNdRl1Fu3SrQwjJWaIMvQpi4DRaodNZCX7oTkiPJ-A,10916
 dcicutils/function_cache_decorator.py,sha256=XMyiEGODVr2WoAQ68vcoX_9_Xb9p8pZXdXl7keU8i2g,10026
 dcicutils/glacier_utils.py,sha256=Q4CVXsZCbP-SoZIsZ5NMcawDfelOLzbQnIlQn-GdlTo,34149
+dcicutils/http_utils.py,sha256=tNfH5JA-OwbQKEvD5HPJ3lcp2TSIZ4rnl__4d4JO8Gw,1583
 dcicutils/jh_utils.py,sha256=Gpsxb9XEzggF_-Eq3ukjKvTnuyb9V1SCSUXkXsES4Kg,11502
 dcicutils/kibana/dashboards.json,sha256=wHMB_mpJ8OaYhRRgvpZuihaB2lmSF64ADt_8hkBWgQg,16225
 dcicutils/kibana/readme.md,sha256=3KmHF9FH6A6xwYsNxRFLw27q0XzHYnjZOlYUnn3VkQQ,2164
@@ -43,11 +44,11 @@ dcicutils/license_policies/park-lab-gpl-pipeline.jsonc,sha256=vLZkwm3Js-kjV44nug
 dcicutils/license_policies/park-lab-pipeline.jsonc,sha256=9qlY0ASy3iUMQlr3gorVcXrSfRHnVGbLhkS427UaRy4,283
 dcicutils/license_utils.py,sha256=d1cq6iwv5Ju-VjdoINi6q7CPNNL7Oz6rcJdLMY38RX0,46978
 dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
-dcicutils/misc_utils.py,sha256=YH_TTmv6ABWeMERwVvA2-rIfdS-CoPYLXJru9TvWxgM,104610
+dcicutils/misc_utils.py,sha256=zHwsxxEn24muLBP7mDvMa8I9VdMejwW8HMuCL5xbhhw,107690
 dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
 dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
-dcicutils/portal_object_utils.py,sha256=gDXRgPsRvqCFwbC8WatsuflAxNiigOnqr0Hi93k3AgE,15422
-dcicutils/portal_utils.py,sha256=DYyE5o15GekDgzpJWas9iS7klAYbjJZUPW0G42McArk,30779
+dcicutils/portal_object_utils.py,sha256=Az3n1aL-PQkN5gOFE6ZqC2XkYsqiwKlq7-tZggs1QN4,11062
+dcicutils/portal_utils.py,sha256=TDGmJqxqWfuqdJZ-ARqbOxPXNuzlqNxPD49jMEY4VQA,45217
 dcicutils/progress_bar.py,sha256=UT7lxb-rVF_gp4yjY2Tg4eun1naaH__hB4_v3O85bcE,19468
 dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
 dcicutils/qa_checkers.py,sha256=cdXjeL0jCDFDLT8VR8Px78aS10hwNISOO5G_Zv2TZ6M,20534
@@ -55,25 +56,25 @@ dcicutils/qa_utils.py,sha256=TT0SiJWiuxYvbsIyhK9VO4uV_suxhB6CpuC4qPacCzQ,160208
 dcicutils/redis_tools.py,sha256=qkcSNMtvqkpvts-Cm9gWhneK523Q_oHwhNUud1be1qk,7055
 dcicutils/redis_utils.py,sha256=VJ-7g8pOZqR1ZCtdcjKz3-6as2DMUcs1b1zG6wSprH4,6462
 dcicutils/s3_utils.py,sha256=LauLFQGvZLfpBJ81tYMikjLd3SJRz2R_FrL1n4xSlyI,28868
-dcicutils/schema_utils.py,sha256=2hOzuGK7F8xZ7JyS7_Lan2wXOlNZezzT2lqgEs3QOe4,10605
+dcicutils/schema_utils.py,sha256=IIteRrg-iOJOFU17n2lvKByVdWdiMfuAQ1kf_QIM96Q,10604
 dcicutils/scripts/publish_to_pypi.py,sha256=LFzNHIQK2EXFr88YcfctyA_WKEBFc1ElnSjWrCXedPM,13889
 dcicutils/scripts/run_license_checker.py,sha256=z2keYnRDZsHQbTeo1XORAXSXNJK5axVzL5LjiNqZ7jE,4184
-dcicutils/scripts/view_portal_object.py,sha256=Cy-8GwGJS9EX-5RxE8mjsqNlDT0N6OCpkNffPVkTFQc,26262
+dcicutils/scripts/view_portal_object.py,sha256=HZzM44BDcGycO9XTOTZyP-F7PRMZaZrnFfiqiT7Qvqg,29777
 dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19745
 dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
 dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
 dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
-dcicutils/structured_data.py,sha256=BQuIMv6OPySsn6YxtXE2Er-zLE2QJuCYhEQ3V0u_UXY,61238
+dcicutils/structured_data.py,sha256=GfG96VyFwQIkmUax-ZdEzbWnfEiJxmeZEaUWz78IQZY,64030
 dcicutils/submitr/progress_constants.py,sha256=5bxyX77ql8qEJearfHEvsvXl7D0GuUODW0T65mbRmnE,2895
-dcicutils/submitr/ref_lookup_strategy.py,sha256=Js2cVznTmgjciLWBPLCvMiwLIHXjDn3jww-gJPjYuFw,3467
+dcicutils/submitr/ref_lookup_strategy.py,sha256=VJN-Oo0LLna6Vo2cu47eC-eU-yUC9NFlQP29xajejVU,4741
 dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
-dcicutils/tmpfile_utils.py,sha256=n95XF8dZVbQRSXBZTGToXXfSs3JUVRyN6c3ZZ0nhAWI,1403
+dcicutils/tmpfile_utils.py,sha256=irmN6Otvtxyum-7qr5h9GIzDs9rtFFyUsGQyqJXd_y4,2997
 dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
 dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
 dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
-dcicutils/zip_utils.py,sha256=rnjNv_k6L9jT2SjDSgVXp4BEJYLtz9XN6Cl2Fy-tqnM,2027
-dcicutils-8.9.0.0b0.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
-dcicutils-8.9.0.0b0.dist-info/METADATA,sha256=sHJ_jTCTbZwTy6AoI9BSixIfwZDxdntJvQmTy5keWnI,3356
-dcicutils-8.9.0.0b0.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-dcicutils-8.9.0.0b0.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
-dcicutils-8.9.0.0b0.dist-info/RECORD,,
+dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
+dcicutils-8.9.0.1b2.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
+dcicutils-8.9.0.1b2.dist-info/METADATA,sha256=Mh-FpaQpv_ipkozMQ_Ul_vezgpxdzX4lvp38jaDD8rc,3439
+dcicutils-8.9.0.1b2.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+dcicutils-8.9.0.1b2.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
+dcicutils-8.9.0.1b2.dist-info/RECORD,,

{dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/WHEEL RENAMED Viewed

File without changes

{dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

dcicutils 8.9.0.0b0__py3-none-any.whl → 8.9.0.1b2__py3-none-any.whl

dcicutils 8.9.0.0b0py3-none-any.whl → 8.9.0.1b2py3-none-any.whl