dcicutils 8.8.0.1b5__py3-none-any.whl → 8.8.0.1b6__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -12,7 +12,7 @@ from dcicutils.common import OrchestratedApp
12
12
  from dcicutils.data_readers import CsvReader, Excel, RowReader
13
13
  from dcicutils.datetime_utils import normalize_date_string, normalize_datetime_string
14
14
  from dcicutils.file_utils import search_for_file
15
- from dcicutils.misc_utils import (create_dict, create_readonly_object, is_uuid, load_json_if,
15
+ from dcicutils.misc_utils import (create_dict, create_readonly_object, load_json_if,
16
16
  merge_objects, remove_empty_properties, right_trim,
17
17
  split_string, to_boolean, to_enum, to_float, to_integer, VirtualApp)
18
18
  from dcicutils.portal_object_utils import PortalObject
@@ -53,6 +53,10 @@ class StructuredDataSet:
53
53
  # can choose to lookup root path first, or not lookup root path at all, or not lookup
54
54
  # subtypes at all; the ref_lookup_strategy callable if specified should take a type_name
55
55
  # and value (string) arguements and return an integer of any of the below ORed together.
56
+ # The main purpose of this is optimization; to minimum portal lookups; since for example,
57
+ # currently at least, /{type}/{accession} does not work but /{accession} does; so we
58
+ # currently (smaht-portal/.../ingestion_processors) use REF_LOOKUP_ROOT_FIRST for this.
59
+ # And current usage NEVER has REF_LOOKUP_SUBTYPES turned OFF; but support just in case.
56
60
  REF_LOOKUP_ROOT = 0x0001
57
61
  REF_LOOKUP_ROOT_FIRST = 0x0002 | REF_LOOKUP_ROOT
58
62
  REF_LOOKUP_SUBTYPES = 0x0004
@@ -228,8 +232,10 @@ class StructuredDataSet:
228
232
  if ref_errors := self.ref_errors:
229
233
  ref_errors_actual = []
230
234
  for ref_error in ref_errors:
231
- if not self.portal.ref_exists(ref_error["error"]):
235
+ if not (resolved := self.portal.ref_exists(ref := ref_error["error"])):
232
236
  ref_errors_actual.append(ref_error)
237
+ else:
238
+ self._resolved_refs.add((ref, resolved[0].get("uuid")))
233
239
  if ref_errors_actual:
234
240
  self._errors["ref"] = ref_errors_actual
235
241
  else:
@@ -565,7 +571,7 @@ class Schema(SchemaBase):
565
571
  the names of any nested properties (i.e objects within objects) flattened into a single
566
572
  property name in dot notation; and set the value of each of these flat property names
567
573
  to the type of the terminal/leaf value of the (either) top-level or nested type. N.B. We
568
- do NOT currently support array-of-arry or array-of-multiple-types. E.g. for this schema:
574
+ do NOT currently support array-of-array or array-of-multiple-types. E.g. for this schema:
569
575
 
570
576
  { "properties": {
571
577
  "abc": {
@@ -783,66 +789,90 @@ class Portal(PortalBase):
783
789
  return self._ref_cache.get(f"/{type_name}/{value}", None)
784
790
  return None
785
791
 
786
- def _cache_ref(self, type_name: str, value: str, resolved: List[str],
787
- subtype_names: Optional[List[str]]) -> None:
792
+ def _cache_ref(self, type_name: str, value: str, resolved: List[str], subtype_names: Optional[List[str]]) -> None:
788
793
  if self._ref_cache is not None:
789
- for type_name in [type_name] + (subtype_names or []):
790
- object_path = f"/{type_name}/{value}"
791
- if self._ref_cache.get(object_path, None) is None:
792
- self._ref_cache[object_path] = resolved
794
+ for type_name in [type_name] + (subtype_names if subtype_names else []):
795
+ self._ref_cache[f"/{type_name}/{value}"] = resolved
793
796
 
794
797
  def ref_exists(self, type_name: str, value: Optional[str] = None) -> List[dict]:
795
798
  if not value:
796
799
  if type_name.startswith("/") and len(parts := type_name[1:].split("/")) == 2:
797
- type_name = parts[0]
798
- value = parts[1]
800
+ if not (type_name := parts[0]) or not (value := parts[1]):
801
+ return []
799
802
  else:
800
- return [] # Should not happen.
803
+ return []
801
804
  if (resolved := self._ref_exists_from_cache(type_name, value)) is not None:
805
+ # Found cached resolved reference.
806
+ if not resolved:
807
+ # Cached resolved reference is empty ([]).
808
+ # It might NOW be found internally, since the portal self._data can change.
809
+ # TODO
810
+ ref_lookup_strategy = self._ref_lookup_strategy(type_name, value)
811
+ is_ref_lookup_subtypes = StructuredDataSet._is_ref_lookup_subtypes(ref_lookup_strategy)
812
+ subtype_names = self._get_schema_subtypes(type_name) if is_ref_lookup_subtypes else None
813
+ is_resolved, resolved_uuid = self._ref_exists_internally(type_name, value, subtype_names)
814
+ if is_resolved:
815
+ resolved = [{"type": type_name, "uuid": resolved_uuid}]
816
+ self._cache_ref(type_name, value, resolved, subtype_names)
817
+ return resolved
802
818
  self._ref_exists_cache_hit_count += 1
803
819
  return resolved
804
820
  # Not cached here.
805
821
  self._ref_exists_cache_miss_count += 1
806
- resolved = []
822
+ # Get the lookup strategy.
807
823
  ref_lookup_strategy = self._ref_lookup_strategy(type_name, value)
808
824
  is_ref_lookup_root = StructuredDataSet._is_ref_lookup_root(ref_lookup_strategy)
809
825
  is_ref_lookup_root_first = StructuredDataSet._is_ref_lookup_root_first(ref_lookup_strategy)
810
826
  is_ref_lookup_subtypes = StructuredDataSet._is_ref_lookup_subtypes(ref_lookup_strategy)
811
- is_resolved = False
812
- subtype_names = self._get_schema_subtypes(type_name)
827
+ subtype_names = self._get_schema_subtypes(type_name) if is_ref_lookup_subtypes else None
828
+ # Lookup internally first (including at subtypes if desired).
829
+ is_resolved, resolved_uuid = self._ref_exists_internally(type_name, value, subtype_names)
830
+ if is_resolved:
831
+ resolved = [{"type": type_name, "uuid": resolved_uuid}]
832
+ self._cache_ref(type_name, value, resolved, subtype_names)
833
+ return resolved
834
+ # Not found internally; perform actual portal lookup (included at root and subtypes if desired).
835
+ # First construct the list of lookup paths at which to look for the referenced item.
836
+ lookup_paths = []
813
837
  if is_ref_lookup_root_first:
814
- is_resolved, resolved_uuid = self._ref_exists_single(type_name, value, root=True)
815
- if not is_resolved:
816
- is_resolved, resolved_uuid = self._ref_exists_single(type_name, value)
817
- if not is_resolved and is_ref_lookup_root and not is_ref_lookup_root_first:
818
- is_resolved, resolved_uuid = self._ref_exists_single(type_name, value, root=True)
838
+ lookup_paths.append(f"/{value}")
839
+ lookup_paths.append(f"/{type_name}/{value}")
840
+ if is_ref_lookup_root and not is_ref_lookup_root_first:
841
+ lookup_paths.append(f"/{value}")
842
+ if subtype_names:
843
+ for subtype_name in subtype_names:
844
+ lookup_paths.append(f"/{subtype_name}/{value}")
845
+ # Do the actual lookup in the portal for each of the desired lookup paths.
846
+ for lookup_path in lookup_paths:
847
+ if isinstance(item := self.get_metadata(lookup_path), dict):
848
+ resolved = [{"type": type_name, "uuid": item.get("uuid", None)}]
849
+ self._cache_ref(type_name, value, resolved, subtype_names)
850
+ return resolved
851
+ return []
852
+
853
+ def _ref_exists_internally(self, type_name: str, value: str,
854
+ subtype_names: Optional[List[str]] = None) -> Tuple[bool, Optional[str]]:
855
+ is_resolved, resolved_uuid = self._ref_exists_single_internally(type_name, value)
819
856
  if is_resolved:
820
- resolved.append({"type": type_name, "uuid": resolved_uuid})
821
- # Check for the given ref in all subtypes of the given type.
822
- elif subtype_names and is_ref_lookup_subtypes:
857
+ return True, resolved_uuid
858
+ if subtype_names:
823
859
  for subtype_name in subtype_names:
824
- is_resolved, resolved_uuid = self._ref_exists_single(subtype_name, value)
860
+ is_resolved, resolved_uuid = self._ref_exists_single_internally(subtype_name, value)
825
861
  if is_resolved:
826
- resolved.append({"type": type_name, "uuid": resolved_uuid})
827
- break
828
- # Cache this ref (and all subtype versions of it); whether or not found;
829
- # if not found it will be an empty array (array because caching all matches;
830
- # but TODO - do not think we should do this anymore - maybe test changes needed).
831
- self._cache_ref(type_name, value, resolved, subtype_names)
832
- return resolved
833
-
834
- def _ref_exists_single(self, type_name: str, value: str, root: bool = False) -> Tuple[bool, Optional[str]]:
835
- # Check first in our own data (i.e. e.g. within the given spreadsheet).
862
+ return True, resolved_uuid
863
+ return False, None
864
+
865
+ def _ref_exists_single_internally(self, type_name: str, value: str) -> Tuple[bool, Optional[str]]:
836
866
  if self._data and (items := self._data.get(type_name)) and (schema := self.get_schema(type_name)):
837
- iproperties = set(schema.get("identifyingProperties", [])) | {"identifier", "uuid"}
867
+ identifying_properties = set(schema.get("identifyingProperties", [])) | {"identifier", "uuid"}
838
868
  for item in items:
839
- if (ivalue := next((item[iproperty] for iproperty in iproperties if iproperty in item), None)):
840
- if isinstance(ivalue, list) and value in ivalue or ivalue == value:
841
- self._ref_exists_internal_count += 1
842
- return True, (ivalue if isinstance(ivalue, str) and is_uuid(ivalue) else None)
843
- if (value := self.get_metadata(f"/{type_name}/{value}" if not root else f"/{value}")) is None:
844
- return False, None
845
- return True, value.get("uuid")
869
+ for identifying_property in identifying_properties:
870
+ if (identifying_value := item.get(identifying_property, None)) is not None:
871
+ if ((identifying_value == value) or
872
+ (isinstance(identifying_value, list) and (value in identifying_value))): # noqa
873
+ self._ref_exists_internal_count += 1
874
+ return True, item.get("uuid", None)
875
+ return False, None
846
876
 
847
877
  @property
848
878
  def ref_lookup_cache_hit_count(self) -> int:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.8.0.1b5
3
+ Version: 8.8.0.1b6
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -62,15 +62,15 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
62
62
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
63
63
  dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
64
64
  dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
65
- dcicutils/structured_data.py,sha256=FB28ek0HO0fZ7ixegjFkMWuwYtcbMsBE4K2DCOtjJmQ,46133
65
+ dcicutils/structured_data.py,sha256=7JDesiA0geGkP343yV3z9Bkc8qN22RKoT20cHrecEYA,47985
66
66
  dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
67
67
  dcicutils/tmpfile_utils.py,sha256=n95XF8dZVbQRSXBZTGToXXfSs3JUVRyN6c3ZZ0nhAWI,1403
68
68
  dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
69
69
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
70
70
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
71
71
  dcicutils/zip_utils.py,sha256=rnjNv_k6L9jT2SjDSgVXp4BEJYLtz9XN6Cl2Fy-tqnM,2027
72
- dcicutils-8.8.0.1b5.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
73
- dcicutils-8.8.0.1b5.dist-info/METADATA,sha256=lZ31Wrd_wtKZOzAA8W7MlJL7SCWx-75VqKFz6gDgPiY,3356
74
- dcicutils-8.8.0.1b5.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
75
- dcicutils-8.8.0.1b5.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
76
- dcicutils-8.8.0.1b5.dist-info/RECORD,,
72
+ dcicutils-8.8.0.1b6.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
73
+ dcicutils-8.8.0.1b6.dist-info/METADATA,sha256=-MVcTLgcFRea1f0P8L91J8zmo1wbjbUbPr-V82goavo,3356
74
+ dcicutils-8.8.0.1b6.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
75
+ dcicutils-8.8.0.1b6.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
76
+ dcicutils-8.8.0.1b6.dist-info/RECORD,,