PyPI - dcicutils - Versions diffs - 8.8.1.1b4__py3-none-any.whl → 8.8.1.1b6__py3-none-any.whl - Mend

dcicutils 8.8.1.1b4py3-none-any.whl → 8.8.1.1b6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

dcicutils/data_readers.py CHANGED Viewed

@@ -77,7 +77,11 @@ class RowReader(abc.ABC):
     def warnings(self) -> List[str]:
         warnings = []
         if self._warning_empty_headers:
-            warnings.append({"src": create_dict(file=self.file),
+            if hasattr(self, "sheet_name") and self.sheet_name:
+                src = {"sheet": self.sheet_name}
+            else:
+                src = {"file": self.file}
+            warnings.append({"src": src,
                              "warning": "Empty header column encountered; ignoring it and all subsequent columns."})
         if self._warning_extra_values:
             for row_number in self._warning_extra_values:

dcicutils/structured_data.py CHANGED Viewed

@@ -44,7 +44,7 @@ FILE_TYPE_PROPERTY_NAME = "filename"
 EXTRA_FILE_TYPE_NAME = "ExtraFile"
 EXTRA_FILE_TYPE_PROPERTY_NAME = "extra_files"
-ENABLE_ARRAY_SHEET_REFS = True
+ENABLE_ARRAY_SHEET_REFS = False
 # The ExtraFile pseudo-type schema.
 EXTRA_FILE_SCHEMA = {
@@ -91,6 +91,7 @@ class StructuredDataSet:
         self._errors = {}
         self._resolved_refs = set()
         self._validated = False
+        self._nrows = 0
         self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None
         self._norefs = True if norefs is True else False
         self._debug_sleep = None
@@ -194,6 +195,10 @@ class StructuredDataSet:
                 upload_file["path"] = file_path
         return upload_files
+    @property
+    def nrows(self) -> int:
+        return self._nrows
     def compare(self, progress: Optional[Callable] = None) -> dict:
         def get_counts() -> int:
             ntypes = 0
@@ -288,8 +293,6 @@ class StructuredDataSet:
         order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {}
         for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)):
             self._load_reader(excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name))
-        if self._progress:
-            self._progress({"finish": True})
         # TODO: Do we really need progress reporting for the below?
         # Check for unresolved reference errors which really are not because of ordering.
         # Yes such internal references will be handled correctly on actual database update via snovault.loadxl.
@@ -301,11 +304,25 @@ class StructuredDataSet:
                     # if not (resolved := self.portal.ref_exists_internally(ref := ref_error["error"])):
                     ref_errors_actual.append(ref_error)
                 else:
+                    # Now found so subtract off from ref_total_notfound_count.
+                    self.portal._ref_total_notfound_count -= 1
                     self._resolved_refs.add((ref, resolved.get("uuid")))
             if ref_errors_actual:
                 self._errors["ref"] = ref_errors_actual
             else:
                 del self._errors["ref"]
+        if self._progress:
+            # TODO: Refactor with same thing below in _load_reader.
+            self._progress({
+                "finish": True,
+                "refs": self.ref_total_count,
+                "refs_found": self.ref_total_found_count,
+                "refs_not_found": self.ref_total_notfound_count,
+                "refs_lookup": self.ref_lookup_count,
+                "refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
+                "refs_exists_cache_hit": self.ref_exists_cache_hit_count,
+                "refs_invalid": self.ref_invalid_identifying_property_count
+            })
     def _load_json_file(self, file: str) -> None:
         with open(file) as f:
@@ -316,6 +333,7 @@ class StructuredDataSet:
         noschema = False
         structured_row_template = None
         for row in reader:
+            self._nrows += 1
             if self._debug_sleep:
                 time.sleep(float(self._debug_sleep))
             if not structured_row_template:  # Delay creation just so we don't reference schema if there are no rows.
@@ -338,7 +356,8 @@ class StructuredDataSet:
                     "refs_found": self.ref_total_found_count,
                     "refs_not_found": self.ref_total_notfound_count,
                     "refs_lookup": self.ref_lookup_count,
-                    "refs_cache_hit": self.ref_exists_cache_hit_count,
+                    "refs_lookup_cache_hit": self.ref_lookup_cache_hit_count,
+                    "refs_exists_cache_hit": self.ref_exists_cache_hit_count,
                     "refs_invalid": self.ref_invalid_identifying_property_count
                 })
         self._note_warning(reader.warnings, "reader")
@@ -510,15 +529,16 @@ class _StructuredRowTemplate:
                     set_value_backtrack_object(i, p)
                 data = data[p]
             if (p := path[-1]) == -1 and isinstance(value, str):
-                if ENABLE_ARRAY_SHEET_REFS and value.lower().startswith("[ref:") and value.endswith("]"):
-                    if self._obtain_array_values:
-                        values = self._obtain_array_values(value)
-                    if sheet_name_containing_array := value[5:].strip():
-                        if dot := sheet_name_containing_array.find(".") > 0:
-                            if sheet_name_containing_array := sheet_name_containing_array[0:dot].strip():
-                                pass
-                                # sheet_column_containing_array = sheet_name_containing_array[dot + 1:].strip()
-                    pass
+                if ENABLE_ARRAY_SHEET_REFS and False:
+                    # TODO: IN PROGRESS. DISABLED FOR NOW.
+                    if isinstance(value, str) and value.lower().startswith("[ref:") and value.endswith("]"):
+                        if self._obtain_array_values:
+                            values = self._obtain_array_values(value)
+                        if sheet_name_containing_array := value[5:].strip():
+                            if dot := sheet_name_containing_array.find(".") > 0:
+                                if sheet_name_containing_array := sheet_name_containing_array[0:dot].strip():
+                                    pass
+                                    # sheet_column_containing_array = sheet_name_containing_array[dot + 1:].strip()
                 values = _split_array_string(value, unique=typeinfo.get("unique") if typeinfo else False)
                 if mapv:
                     values = [mapv(value, src) for value in values]
@@ -891,7 +911,6 @@ class Portal(PortalBase):
     def ref_exists(self, type_name: str, value: Optional[str] = None,
                    called_from_map_ref: bool = False) -> Optional[dict]:
-        # print(f"\033[Kxyzzy:ref_exists({type_name}/{value})")
         if not value:
             type_name, value = Portal._get_type_name_and_value_from_path(type_name)
             if not type_name or not value:
@@ -919,7 +938,9 @@ class Portal(PortalBase):
             # self._data can change, i.e. as data (e.g. spreadsheet sheets) are parsed.
             return self.ref_exists_internally(type_name, value, update_counts=called_from_map_ref) or {}
         # Reference is NOT cached here; lookup INTERNALLY first.
-        if resolved := self.ref_exists_internally(type_name, value, update_counts=called_from_map_ref):
+        # Skip updating _ref_total_notfound_count here as if not found we look in portal below.
+        if resolved := self.ref_exists_internally(type_name, value, update_counts=called_from_map_ref,
+                                                  skip_total_notfound_count=True):
             # Reference was resolved internally (note: here only if resolved is not an empty dictionary).
             if called_from_map_ref:
                 self._ref_total_found_count += 1
@@ -965,13 +986,13 @@ class Portal(PortalBase):
         return None
     def ref_exists_internally(self, type_name: str, value: Optional[str] = None,
-                              update_counts: bool = False) -> Optional[dict]:
+                              update_counts: bool = False,
+                              skip_total_notfound_count: bool = False) -> Optional[dict]:
         """
         Looks up the given reference (type/value) internally (i.e. with this data parsed thus far).
         If found then returns a dictionary containing the (given) type name and the uuid (if any)
         of the resolved item.
         """
-        # print(f"\033[Kxyzzy:ref_exists_internally({type_name}/{value})")
         if not value:
             type_name, value = Portal._get_type_name_and_value_from_path(type_name)
             if not type_name or not value:
@@ -990,6 +1011,9 @@ class Portal(PortalBase):
                 resolved = {"type": type_name, "uuid": resolved_item.get("uuid")}
                 self._cache_ref(type_name, value, resolved)
                 return resolved
+        if update_counts:
+            if not skip_total_notfound_count:
+                self._ref_total_notfound_count += 1
         return {}  # Empty return means not resolved internally.
     def _ref_exists_single_internally(self, type_name: str, value: str) -> Tuple[bool, Optional[dict]]:

{dcicutils-8.8.1.1b4.dist-info → dcicutils-8.8.1.1b6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dcicutils
-Version: 8.8.1.1b4
+Version: 8.8.1.1b6
 Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
 Home-page: https://github.com/4dn-dcic/utils
 License: MIT

{dcicutils-8.8.1.1b4.dist-info → dcicutils-8.8.1.1b6.dist-info}/RECORD RENAMED Viewed

@@ -10,7 +10,7 @@ dcicutils/common.py,sha256=YE8Mt5-vaZWWz4uaChSVhqGFbFtW5QKtnIyOr4zG4vM,3955
 dcicutils/contribution_scripts.py,sha256=0k5Gw1TumcD5SAcXVkDd6-yvuMEw-jUp5Kfb7FJH6XQ,2015
 dcicutils/contribution_utils.py,sha256=vYLS1JUB3sKd24BUxZ29qUBqYeQBLK9cwo8x3k64uPg,25653
 dcicutils/creds_utils.py,sha256=xrLekD49Ex0GOpL9n7LlJA4gvNcY7txTVFOSYD7LvEU,11113
-dcicutils/data_readers.py,sha256=wNRNlCUpsrvFHUKXYhE9fMZ392NWH4-KDqwW5f6avGc,7265
+dcicutils/data_readers.py,sha256=WWH_VDz2KnNv_FoTjfFwrg6zh9asl8Q-uEV2V3XuyUg,7414
 dcicutils/data_utils.py,sha256=k2OxOlsx7AJ6jF-YNlMyGus_JqSUBe4_n1s65Mv1gQQ,3098
 dcicutils/datetime_utils.py,sha256=EODDGAngp1yh2ZlDIuI7tB74JBJucw2DljqfPknzK0Y,4666
 dcicutils/deployment_utils.py,sha256=rcNUFMe_tsrG4CHEtgBe41cZx4Pk4JqISPsjrJRMoEs,68891
@@ -62,15 +62,15 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
 dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
 dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
 dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
-dcicutils/structured_data.py,sha256=bc0sTFQQTldLM0HIV9-o_DHIY4kjEdHBPybcrfiuowM,58004
+dcicutils/structured_data.py,sha256=kf5aiMXk-DGRtCXWo3D9e2HHcmMffouAvCS-r1epvsM,59254
 dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
 dcicutils/tmpfile_utils.py,sha256=n95XF8dZVbQRSXBZTGToXXfSs3JUVRyN6c3ZZ0nhAWI,1403
 dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
 dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
 dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
 dcicutils/zip_utils.py,sha256=rnjNv_k6L9jT2SjDSgVXp4BEJYLtz9XN6Cl2Fy-tqnM,2027
-dcicutils-8.8.1.1b4.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
-dcicutils-8.8.1.1b4.dist-info/METADATA,sha256=e5E4t8Ati-ECkMU0nNomHXag-7iXXz7G7DBJqfz0MSE,3356
-dcicutils-8.8.1.1b4.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-dcicutils-8.8.1.1b4.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
-dcicutils-8.8.1.1b4.dist-info/RECORD,,
+dcicutils-8.8.1.1b6.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
+dcicutils-8.8.1.1b6.dist-info/METADATA,sha256=i5C6Embybe7tMr1JUT4jB2tRppS8Omxs6afsU_LQkCE,3356
+dcicutils-8.8.1.1b6.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+dcicutils-8.8.1.1b6.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
+dcicutils-8.8.1.1b6.dist-info/RECORD,,

{dcicutils-8.8.1.1b4.dist-info → dcicutils-8.8.1.1b6.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{dcicutils-8.8.1.1b4.dist-info → dcicutils-8.8.1.1b6.dist-info}/WHEEL RENAMED Viewed

File without changes

{dcicutils-8.8.1.1b4.dist-info → dcicutils-8.8.1.1b6.dist-info}/entry_points.txt RENAMED Viewed

File without changes

dcicutils 8.8.1.1b4__py3-none-any.whl → 8.8.1.1b6__py3-none-any.whl

dcicutils 8.8.1.1b4py3-none-any.whl → 8.8.1.1b6py3-none-any.whl