dcicutils 8.8.0.1b5__py3-none-any.whl → 8.8.0.1b6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,7 +12,7 @@ from dcicutils.common import OrchestratedApp
12
12
  from dcicutils.data_readers import CsvReader, Excel, RowReader
13
13
  from dcicutils.datetime_utils import normalize_date_string, normalize_datetime_string
14
14
  from dcicutils.file_utils import search_for_file
15
- from dcicutils.misc_utils import (create_dict, create_readonly_object, is_uuid, load_json_if,
15
+ from dcicutils.misc_utils import (create_dict, create_readonly_object, load_json_if,
16
16
  merge_objects, remove_empty_properties, right_trim,
17
17
  split_string, to_boolean, to_enum, to_float, to_integer, VirtualApp)
18
18
  from dcicutils.portal_object_utils import PortalObject
@@ -53,6 +53,10 @@ class StructuredDataSet:
53
53
  # can choose to lookup root path first, or not lookup root path at all, or not lookup
54
54
  # subtypes at all; the ref_lookup_strategy callable if specified should take a type_name
55
55
  # and value (string) arguements and return an integer of any of the below ORed together.
56
+ # The main purpose of this is optimization; to minimum portal lookups; since for example,
57
+ # currently at least, /{type}/{accession} does not work but /{accession} does; so we
58
+ # currently (smaht-portal/.../ingestion_processors) use REF_LOOKUP_ROOT_FIRST for this.
59
+ # And current usage NEVER has REF_LOOKUP_SUBTYPES turned OFF; but support just in case.
56
60
  REF_LOOKUP_ROOT = 0x0001
57
61
  REF_LOOKUP_ROOT_FIRST = 0x0002 | REF_LOOKUP_ROOT
58
62
  REF_LOOKUP_SUBTYPES = 0x0004
@@ -228,8 +232,10 @@ class StructuredDataSet:
228
232
  if ref_errors := self.ref_errors:
229
233
  ref_errors_actual = []
230
234
  for ref_error in ref_errors:
231
- if not self.portal.ref_exists(ref_error["error"]):
235
+ if not (resolved := self.portal.ref_exists(ref := ref_error["error"])):
232
236
  ref_errors_actual.append(ref_error)
237
+ else:
238
+ self._resolved_refs.add((ref, resolved[0].get("uuid")))
233
239
  if ref_errors_actual:
234
240
  self._errors["ref"] = ref_errors_actual
235
241
  else:
@@ -565,7 +571,7 @@ class Schema(SchemaBase):
565
571
  the names of any nested properties (i.e objects within objects) flattened into a single
566
572
  property name in dot notation; and set the value of each of these flat property names
567
573
  to the type of the terminal/leaf value of the (either) top-level or nested type. N.B. We
568
- do NOT currently support array-of-arry or array-of-multiple-types. E.g. for this schema:
574
+ do NOT currently support array-of-array or array-of-multiple-types. E.g. for this schema:
569
575
 
570
576
  { "properties": {
571
577
  "abc": {
@@ -783,66 +789,90 @@ class Portal(PortalBase):
783
789
  return self._ref_cache.get(f"/{type_name}/{value}", None)
784
790
  return None
785
791
 
786
- def _cache_ref(self, type_name: str, value: str, resolved: List[str],
787
- subtype_names: Optional[List[str]]) -> None:
792
+ def _cache_ref(self, type_name: str, value: str, resolved: List[str], subtype_names: Optional[List[str]]) -> None:
788
793
  if self._ref_cache is not None:
789
- for type_name in [type_name] + (subtype_names or []):
790
- object_path = f"/{type_name}/{value}"
791
- if self._ref_cache.get(object_path, None) is None:
792
- self._ref_cache[object_path] = resolved
794
+ for type_name in [type_name] + (subtype_names if subtype_names else []):
795
+ self._ref_cache[f"/{type_name}/{value}"] = resolved
793
796
 
794
797
  def ref_exists(self, type_name: str, value: Optional[str] = None) -> List[dict]:
795
798
  if not value:
796
799
  if type_name.startswith("/") and len(parts := type_name[1:].split("/")) == 2:
797
- type_name = parts[0]
798
- value = parts[1]
800
+ if not (type_name := parts[0]) or not (value := parts[1]):
801
+ return []
799
802
  else:
800
- return [] # Should not happen.
803
+ return []
801
804
  if (resolved := self._ref_exists_from_cache(type_name, value)) is not None:
805
+ # Found cached resolved reference.
806
+ if not resolved:
807
+ # Cached resolved reference is empty ([]).
808
+ # It might NOW be found internally, since the portal self._data can change.
809
+ # TODO
810
+ ref_lookup_strategy = self._ref_lookup_strategy(type_name, value)
811
+ is_ref_lookup_subtypes = StructuredDataSet._is_ref_lookup_subtypes(ref_lookup_strategy)
812
+ subtype_names = self._get_schema_subtypes(type_name) if is_ref_lookup_subtypes else None
813
+ is_resolved, resolved_uuid = self._ref_exists_internally(type_name, value, subtype_names)
814
+ if is_resolved:
815
+ resolved = [{"type": type_name, "uuid": resolved_uuid}]
816
+ self._cache_ref(type_name, value, resolved, subtype_names)
817
+ return resolved
802
818
  self._ref_exists_cache_hit_count += 1
803
819
  return resolved
804
820
  # Not cached here.
805
821
  self._ref_exists_cache_miss_count += 1
806
- resolved = []
822
+ # Get the lookup strategy.
807
823
  ref_lookup_strategy = self._ref_lookup_strategy(type_name, value)
808
824
  is_ref_lookup_root = StructuredDataSet._is_ref_lookup_root(ref_lookup_strategy)
809
825
  is_ref_lookup_root_first = StructuredDataSet._is_ref_lookup_root_first(ref_lookup_strategy)
810
826
  is_ref_lookup_subtypes = StructuredDataSet._is_ref_lookup_subtypes(ref_lookup_strategy)
811
- is_resolved = False
812
- subtype_names = self._get_schema_subtypes(type_name)
827
+ subtype_names = self._get_schema_subtypes(type_name) if is_ref_lookup_subtypes else None
828
+ # Lookup internally first (including at subtypes if desired).
829
+ is_resolved, resolved_uuid = self._ref_exists_internally(type_name, value, subtype_names)
830
+ if is_resolved:
831
+ resolved = [{"type": type_name, "uuid": resolved_uuid}]
832
+ self._cache_ref(type_name, value, resolved, subtype_names)
833
+ return resolved
834
+ # Not found internally; perform actual portal lookup (included at root and subtypes if desired).
835
+ # First construct the list of lookup paths at which to look for the referenced item.
836
+ lookup_paths = []
813
837
  if is_ref_lookup_root_first:
814
- is_resolved, resolved_uuid = self._ref_exists_single(type_name, value, root=True)
815
- if not is_resolved:
816
- is_resolved, resolved_uuid = self._ref_exists_single(type_name, value)
817
- if not is_resolved and is_ref_lookup_root and not is_ref_lookup_root_first:
818
- is_resolved, resolved_uuid = self._ref_exists_single(type_name, value, root=True)
838
+ lookup_paths.append(f"/{value}")
839
+ lookup_paths.append(f"/{type_name}/{value}")
840
+ if is_ref_lookup_root and not is_ref_lookup_root_first:
841
+ lookup_paths.append(f"/{value}")
842
+ if subtype_names:
843
+ for subtype_name in subtype_names:
844
+ lookup_paths.append(f"/{subtype_name}/{value}")
845
+ # Do the actual lookup in the portal for each of the desired lookup paths.
846
+ for lookup_path in lookup_paths:
847
+ if isinstance(item := self.get_metadata(lookup_path), dict):
848
+ resolved = [{"type": type_name, "uuid": item.get("uuid", None)}]
849
+ self._cache_ref(type_name, value, resolved, subtype_names)
850
+ return resolved
851
+ return []
852
+
853
+ def _ref_exists_internally(self, type_name: str, value: str,
854
+ subtype_names: Optional[List[str]] = None) -> Tuple[bool, Optional[str]]:
855
+ is_resolved, resolved_uuid = self._ref_exists_single_internally(type_name, value)
819
856
  if is_resolved:
820
- resolved.append({"type": type_name, "uuid": resolved_uuid})
821
- # Check for the given ref in all subtypes of the given type.
822
- elif subtype_names and is_ref_lookup_subtypes:
857
+ return True, resolved_uuid
858
+ if subtype_names:
823
859
  for subtype_name in subtype_names:
824
- is_resolved, resolved_uuid = self._ref_exists_single(subtype_name, value)
860
+ is_resolved, resolved_uuid = self._ref_exists_single_internally(subtype_name, value)
825
861
  if is_resolved:
826
- resolved.append({"type": type_name, "uuid": resolved_uuid})
827
- break
828
- # Cache this ref (and all subtype versions of it); whether or not found;
829
- # if not found it will be an empty array (array because caching all matches;
830
- # but TODO - do not think we should do this anymore - maybe test changes needed).
831
- self._cache_ref(type_name, value, resolved, subtype_names)
832
- return resolved
833
-
834
- def _ref_exists_single(self, type_name: str, value: str, root: bool = False) -> Tuple[bool, Optional[str]]:
835
- # Check first in our own data (i.e. e.g. within the given spreadsheet).
862
+ return True, resolved_uuid
863
+ return False, None
864
+
865
+ def _ref_exists_single_internally(self, type_name: str, value: str) -> Tuple[bool, Optional[str]]:
836
866
  if self._data and (items := self._data.get(type_name)) and (schema := self.get_schema(type_name)):
837
- iproperties = set(schema.get("identifyingProperties", [])) | {"identifier", "uuid"}
867
+ identifying_properties = set(schema.get("identifyingProperties", [])) | {"identifier", "uuid"}
838
868
  for item in items:
839
- if (ivalue := next((item[iproperty] for iproperty in iproperties if iproperty in item), None)):
840
- if isinstance(ivalue, list) and value in ivalue or ivalue == value:
841
- self._ref_exists_internal_count += 1
842
- return True, (ivalue if isinstance(ivalue, str) and is_uuid(ivalue) else None)
843
- if (value := self.get_metadata(f"/{type_name}/{value}" if not root else f"/{value}")) is None:
844
- return False, None
845
- return True, value.get("uuid")
869
+ for identifying_property in identifying_properties:
870
+ if (identifying_value := item.get(identifying_property, None)) is not None:
871
+ if ((identifying_value == value) or
872
+ (isinstance(identifying_value, list) and (value in identifying_value))): # noqa
873
+ self._ref_exists_internal_count += 1
874
+ return True, item.get("uuid", None)
875
+ return False, None
846
876
 
847
877
  @property
848
878
  def ref_lookup_cache_hit_count(self) -> int:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.8.0.1b5
3
+ Version: 8.8.0.1b6
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -62,15 +62,15 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
62
62
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
63
63
  dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
64
64
  dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
65
- dcicutils/structured_data.py,sha256=FB28ek0HO0fZ7ixegjFkMWuwYtcbMsBE4K2DCOtjJmQ,46133
65
+ dcicutils/structured_data.py,sha256=7JDesiA0geGkP343yV3z9Bkc8qN22RKoT20cHrecEYA,47985
66
66
  dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
67
67
  dcicutils/tmpfile_utils.py,sha256=n95XF8dZVbQRSXBZTGToXXfSs3JUVRyN6c3ZZ0nhAWI,1403
68
68
  dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
69
69
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
70
70
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
71
71
  dcicutils/zip_utils.py,sha256=rnjNv_k6L9jT2SjDSgVXp4BEJYLtz9XN6Cl2Fy-tqnM,2027
72
- dcicutils-8.8.0.1b5.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
73
- dcicutils-8.8.0.1b5.dist-info/METADATA,sha256=lZ31Wrd_wtKZOzAA8W7MlJL7SCWx-75VqKFz6gDgPiY,3356
74
- dcicutils-8.8.0.1b5.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
75
- dcicutils-8.8.0.1b5.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
76
- dcicutils-8.8.0.1b5.dist-info/RECORD,,
72
+ dcicutils-8.8.0.1b6.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
73
+ dcicutils-8.8.0.1b6.dist-info/METADATA,sha256=-MVcTLgcFRea1f0P8L91J8zmo1wbjbUbPr-V82goavo,3356
74
+ dcicutils-8.8.0.1b6.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
75
+ dcicutils-8.8.0.1b6.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
76
+ dcicutils-8.8.0.1b6.dist-info/RECORD,,