dcicutils 8.8.0.1b15__tar.gz → 8.8.0.1b16__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/PKG-INFO +1 -1
  2. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/structured_data.py +187 -125
  3. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/pyproject.toml +1 -1
  4. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/LICENSE.txt +0 -0
  5. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/README.rst +0 -0
  6. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/__init__.py +0 -0
  7. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/base.py +0 -0
  8. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/beanstalk_utils.py +0 -0
  9. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/bundle_utils.py +0 -0
  10. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/captured_output.py +0 -0
  11. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/cloudformation_utils.py +0 -0
  12. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/codebuild_utils.py +0 -0
  13. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/command_utils.py +0 -0
  14. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/common.py +0 -0
  15. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/contribution_scripts.py +0 -0
  16. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/contribution_utils.py +0 -0
  17. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/creds_utils.py +0 -0
  18. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/data_readers.py +0 -0
  19. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/data_utils.py +0 -0
  20. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/datetime_utils.py +0 -0
  21. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/deployment_utils.py +0 -0
  22. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/diff_utils.py +0 -0
  23. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/docker_utils.py +0 -0
  24. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/ecr_scripts.py +0 -0
  25. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/ecr_utils.py +0 -0
  26. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/ecs_utils.py +0 -0
  27. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/env_base.py +0 -0
  28. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/env_manager.py +0 -0
  29. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/env_scripts.py +0 -0
  30. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/env_utils.py +0 -0
  31. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/env_utils_legacy.py +0 -0
  32. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/es_utils.py +0 -0
  33. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/exceptions.py +0 -0
  34. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/ff_mocks.py +0 -0
  35. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/ff_utils.py +0 -0
  36. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/file_utils.py +0 -0
  37. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/function_cache_decorator.py +0 -0
  38. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/glacier_utils.py +0 -0
  39. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/jh_utils.py +0 -0
  40. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/kibana/dashboards.json +0 -0
  41. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/kibana/readme.md +0 -0
  42. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/lang_utils.py +0 -0
  43. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/license_policies/c4-infrastructure.jsonc +0 -0
  44. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/license_policies/c4-python-infrastructure.jsonc +0 -0
  45. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/license_policies/park-lab-common-server.jsonc +0 -0
  46. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/license_policies/park-lab-common.jsonc +0 -0
  47. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc +0 -0
  48. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/license_policies/park-lab-pipeline.jsonc +0 -0
  49. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/license_utils.py +0 -0
  50. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/log_utils.py +0 -0
  51. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/misc_utils.py +0 -0
  52. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/obfuscation_utils.py +0 -0
  53. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/opensearch_utils.py +0 -0
  54. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/portal_object_utils.py +0 -0
  55. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/portal_utils.py +0 -0
  56. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/project_utils.py +0 -0
  57. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/qa_checkers.py +0 -0
  58. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/qa_utils.py +0 -0
  59. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/redis_tools.py +0 -0
  60. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/redis_utils.py +0 -0
  61. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/s3_utils.py +0 -0
  62. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/schema_utils.py +0 -0
  63. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/scripts/publish_to_pypi.py +0 -0
  64. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/scripts/run_license_checker.py +0 -0
  65. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/scripts/view_portal_object.py +0 -0
  66. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/secrets_utils.py +0 -0
  67. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/sheet_utils.py +0 -0
  68. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/snapshot_utils.py +0 -0
  69. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/ssl_certificate_utils.py +0 -0
  70. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/task_utils.py +0 -0
  71. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/tmpfile_utils.py +0 -0
  72. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/trace_utils.py +0 -0
  73. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/validation_utils.py +0 -0
  74. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/variant_utils.py +0 -0
  75. {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/zip_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.8.0.1b15
3
+ Version: 8.8.0.1b16
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -264,9 +264,10 @@ class StructuredDataSet:
264
264
  ref_errors_actual = []
265
265
  for ref_error in ref_errors:
266
266
  if not (resolved := self.portal.ref_exists(ref := ref_error["error"])):
267
+ # if not (resolved := self.portal.ref_exists_internally(ref := ref_error["error"])):
267
268
  ref_errors_actual.append(ref_error)
268
269
  else:
269
- self._resolved_refs.add((ref, resolved[0].get("uuid")))
270
+ self._resolved_refs.add((ref, resolved.get("uuid")))
270
271
  if ref_errors_actual:
271
272
  self._errors["ref"] = ref_errors_actual
272
273
  else:
@@ -296,7 +297,7 @@ class StructuredDataSet:
296
297
  self._add_properties(structured_row, self._autoadd_properties, schema)
297
298
  self._add(type_name, structured_row)
298
299
  if self._progress:
299
- self._progress_update(-1, len(self._resolved_refs), len(self.ref_errors), self.ref_lookup_count)
300
+ self._progress_update(-1, self.ref_total_count, self.ref_total_notfound_count, self.ref_lookup_count)
300
301
  self._note_warning(reader.warnings, "reader")
301
302
  if schema:
302
303
  self._note_error(schema._unresolved_refs, "ref")
@@ -328,6 +329,18 @@ class StructuredDataSet:
328
329
  def _is_ref_lookup_subtypes(ref_lookup_flags: int) -> bool:
329
330
  return (ref_lookup_flags & StructuredDataSet.REF_LOOKUP_SUBTYPES) == StructuredDataSet.REF_LOOKUP_SUBTYPES
330
331
 
332
+ @property
333
+ def ref_total_count(self) -> int:
334
+ return self.portal.ref_total_count if self.portal else -1
335
+
336
+ @property
337
+ def ref_total_found_count(self) -> int:
338
+ return self.portal.ref_total_found_count if self.portal else -1
339
+
340
+ @property
341
+ def ref_total_notfound_count(self) -> int:
342
+ return self.portal.ref_total_notfound_count if self.portal else -1
343
+
331
344
  @property
332
345
  def ref_lookup_cache_hit_count(self) -> int:
333
346
  return self.portal.ref_lookup_cache_hit_count if self.portal else -1
@@ -356,6 +369,10 @@ class StructuredDataSet:
356
369
  def ref_exists_internal_count(self) -> int:
357
370
  return self.portal.ref_exists_internal_count if self.portal else -1
358
371
 
372
+ @property
373
+ def ref_exists_external_count(self) -> int:
374
+ return self.portal.ref_exists_external_count if self.portal else -1
375
+
359
376
  @property
360
377
  def ref_exists_cache_hit_count(self) -> int:
361
378
  return self.portal.ref_exists_cache_hit_count if self.portal else -1
@@ -595,18 +612,11 @@ class Schema(SchemaBase):
595
612
  # TODO: If think we do have the column (and type?) name(s) originating the ref yes?
596
613
  self._unresolved_refs.append({"src": src, "error": f"/{link_to}/<null>"})
597
614
  elif portal:
598
- if not (resolved := portal.ref_exists(link_to, value)):
615
+ if not (resolved := portal.ref_exists(link_to, value, True)):
599
616
  self._unresolved_refs.append({"src": src, "error": f"/{link_to}/{value}"})
600
- elif len(resolved) > 1:
601
- # TODO: Don't think we need this anymore; see TODO on Portal.ref_exists.
602
- self._unresolved_refs.append({
603
- "src": src,
604
- "error": f"/{link_to}/{value}",
605
- "types": [resolved_ref["type"] for resolved_ref in resolved]})
606
617
  else:
607
618
  # A resolved-ref set value is a tuple of the reference path and its uuid.
608
- self._resolved_refs.add((f"/{link_to}/{value}", resolved[0].get("uuid")))
609
- # self._resolved_refs.add((f"/{link_to}/{value}", resolved[0].get("uuid"), resolved[0].get("data")))
619
+ self._resolved_refs.add((f"/{link_to}/{value}", resolved.get("uuid")))
610
620
  return value
611
621
  return lambda value, src: map_ref(value, typeinfo.get("linkTo"), self._portal, src)
612
622
 
@@ -767,24 +777,28 @@ class Portal(PortalBase):
767
777
  else:
768
778
  self._ref_lookup_strategy = lambda type_name, schema, value: (StructuredDataSet.REF_LOOKUP_DEFAULT, None)
769
779
  if ref_lookup_nocache is True:
770
- self.ref_lookup = self.ref_lookup_nocache
780
+ self.ref_lookup = self.ref_lookup_uncached
771
781
  self._ref_cache = None
772
782
  else:
773
- self.ref_lookup = self.ref_lookup_cache
783
+ self.ref_lookup = self.ref_lookup_cached
774
784
  self._ref_cache = {}
775
785
  self._ref_lookup_found_count = 0
776
786
  self._ref_lookup_notfound_count = 0
777
787
  self._ref_lookup_error_count = 0
778
788
  self._ref_exists_internal_count = 0
789
+ self._ref_exists_external_count = 0
779
790
  self._ref_exists_cache_hit_count = 0
780
791
  self._ref_exists_cache_miss_count = 0
781
792
  self._ref_incorrect_identifying_property_count = 0
793
+ self._ref_total_count = 0
794
+ self._ref_total_found_count = 0
795
+ self._ref_total_notfound_count = 0
782
796
 
783
797
  @lru_cache(maxsize=8092)
784
- def ref_lookup_cache(self, object_name: str) -> Optional[dict]:
785
- return self.ref_lookup_nocache(object_name)
798
+ def ref_lookup_cached(self, object_name: str) -> Optional[dict]:
799
+ return self.ref_lookup_uncached(object_name)
786
800
 
787
- def ref_lookup_nocache(self, object_name: str) -> Optional[dict]:
801
+ def ref_lookup_uncached(self, object_name: str) -> Optional[dict]:
788
802
  try:
789
803
  result = super().get_metadata(object_name, raw=True)
790
804
  self._ref_lookup_found_count += 1
@@ -819,10 +833,10 @@ class Portal(PortalBase):
819
833
  return schemas
820
834
 
821
835
  @lru_cache(maxsize=64)
822
- def _get_schema_subtypes(self, type_name: str) -> Optional[List[str]]:
836
+ def _get_schema_subtypes_names(self, type_name: str) -> List[str]:
823
837
  if not (schemas_super_type_map := self.get_schemas_super_type_map()):
824
838
  return []
825
- return schemas_super_type_map.get(type_name)
839
+ return schemas_super_type_map.get(type_name, [])
826
840
 
827
841
  def is_file_schema(self, schema_name: str) -> bool:
828
842
  """
@@ -830,74 +844,47 @@ class Portal(PortalBase):
830
844
  """
831
845
  return self.is_schema_type(schema_name, FILE_SCHEMA_NAME)
832
846
 
833
- def _ref_exists_from_cache(self, type_name: str, value: str) -> Optional[List[dict]]:
834
- if self._ref_cache is not None:
835
- return self._ref_cache.get(f"/{type_name}/{value}", None)
836
- return None
837
-
838
- def _cache_ref(self, type_name: str, value: str, resolved: List[str], subtype_names: Optional[List[str]]) -> None:
839
- if self._ref_cache is not None:
840
- for type_name in [type_name] + (subtype_names if subtype_names else []):
841
- self._ref_cache[f"/{type_name}/{value}"] = resolved
842
-
843
- def ref_exists(self, type_name: str, value: Optional[str] = None) -> List[dict]:
847
+ def ref_exists(self, type_name: str, value: Optional[str] = None,
848
+ called_from_map_ref: bool = False) -> Optional[dict]:
849
+ # print(f"\033[Kxyzzy:ref_exists({type_name}/{value})")
844
850
  if not value:
845
- if type_name.startswith("/") and len(parts := type_name[1:].split("/")) == 2:
846
- if not (type_name := parts[0]) or not (value := parts[1]):
847
- return []
848
- else:
849
- return []
851
+ type_name, value = Portal._get_type_name_and_value_from_path(type_name)
852
+ if not type_name or not value:
853
+ return None
854
+ if called_from_map_ref:
855
+ self._ref_total_count += 1
856
+ # First check our reference cache.
850
857
  if (resolved := self._ref_exists_from_cache(type_name, value)) is not None:
851
- # Found cached resolved reference.
852
- if not resolved:
853
- # Cached resolved reference is empty ([]).
854
- # It might NOW be found internally, since the portal self._data
855
- # can change, as the data (e.g. spreadsheet sheets) are parsed.
856
- # TODO: Consolidate this with the below similar usage.
857
- ref_lookup_strategy, incorrect_identifying_property = (
858
- self._ref_lookup_strategy(type_name, self.get_schema(type_name), value))
859
- is_ref_lookup_subtypes = StructuredDataSet._is_ref_lookup_subtypes(ref_lookup_strategy)
860
- subtype_names = self._get_schema_subtypes(type_name) if is_ref_lookup_subtypes else None
861
- is_resolved, identifying_property, resolved_uuid = (
862
- self._ref_exists_internally(type_name, value, subtype_names,
863
- incorrect_identifying_property=incorrect_identifying_property))
864
- if is_resolved:
865
- if identifying_property == incorrect_identifying_property:
866
- # Not REALLY resolved as it resolved to a property which is NOT an identifying
867
- # property, but may be commonly mistaken for one (e.g. UnalignedReads.filename).
868
- self._ref_incorrect_identifying_property_count += 1
869
- return []
870
- resolved = [{"type": type_name, "uuid": resolved_uuid}]
871
- self._cache_ref(type_name, value, resolved, subtype_names)
872
- return resolved
873
- self._ref_exists_cache_hit_count += 1
858
+ # Found CACHED reference.
859
+ if resolved:
860
+ # Found cached RESOLVED reference (non-empty object).
861
+ if called_from_map_ref:
862
+ self._ref_total_found_count += 1
863
+ return resolved
864
+ # Found cached UNRESOLVED reference (empty object); meaning it was looked
865
+ # up but not found. It might NOW be found INTERNALLY, since the portal
866
+ # self._data can change, i.e. as data (e.g. spreadsheet sheets) are parsed.
867
+ return self.ref_exists_internally(type_name, value, update_counts=called_from_map_ref) or {}
868
+ # Reference is NOT cached here; lookup INTERNALLY first.
869
+ if (resolved := self.ref_exists_internally(type_name, value, update_counts=called_from_map_ref)) is None:
870
+ # Reference was resolved (internally) INCORRECTLY.
871
+ if called_from_map_ref:
872
+ self._ref_total_notfound_count += 1
873
+ return None
874
+ if resolved:
875
+ # Reference was resolved internally.
876
+ if called_from_map_ref:
877
+ self._ref_total_found_count += 1
874
878
  return resolved
875
- # Not cached here.
876
- self._ref_exists_cache_miss_count += 1
879
+ # Reference is NOT cached and was NOT resolved internally; lookup in PORTAL.
877
880
  # Get the lookup strategy; i.e. should do we lookup by root path, and if so, should
878
881
  # we do this first, and do we lookup by subtypes; by default we lookup by root path
879
- # but not first, and we do lookup by subtypes.
880
- ref_lookup_strategy, incorrect_identifying_property = (
881
- self._ref_lookup_strategy(type_name, self.get_schema(type_name), value))
882
+ # but not first, and we also lookup by subtypes by default.
883
+ ref_lookup_strategy, _ = self._ref_lookup_strategy(type_name, self.get_schema(type_name), value)
882
884
  is_ref_lookup_specified_type = StructuredDataSet._is_ref_lookup_specified_type(ref_lookup_strategy)
883
885
  is_ref_lookup_root = StructuredDataSet._is_ref_lookup_root(ref_lookup_strategy)
884
886
  is_ref_lookup_root_first = StructuredDataSet._is_ref_lookup_root_first(ref_lookup_strategy)
885
887
  is_ref_lookup_subtypes = StructuredDataSet._is_ref_lookup_subtypes(ref_lookup_strategy)
886
- subtype_names = self._get_schema_subtypes(type_name) if is_ref_lookup_subtypes else None
887
- # Lookup internally first (including at subtypes if desired; root lookup not applicable here).
888
- is_resolved, identifying_property, resolved_uuid = (
889
- self._ref_exists_internally(type_name, value, subtype_names,
890
- incorrect_identifying_property=incorrect_identifying_property))
891
- if is_resolved:
892
- if identifying_property == incorrect_identifying_property:
893
- # Not REALLY resolved as it resolved to a property which is NOT an identifying
894
- # property, but may be commonly mistaken for one (e.g. UnalignedReads.filename).
895
- self._ref_incorrect_identifying_property_count += 1
896
- return []
897
- resolved = [{"type": type_name, "uuid": resolved_uuid}]
898
- self._cache_ref(type_name, value, resolved, subtype_names)
899
- return resolved
900
- # Not found internally; perform actual portal lookup (including at root and subtypes if desired).
901
888
  # First construct the list of lookup paths at which to look for the referenced item.
902
889
  lookup_paths = []
903
890
  if is_ref_lookup_root_first:
@@ -906,37 +893,81 @@ class Portal(PortalBase):
906
893
  lookup_paths.append(f"/{type_name}/{value}")
907
894
  if is_ref_lookup_root and not is_ref_lookup_root_first:
908
895
  lookup_paths.append(f"/{value}")
909
- if subtype_names:
910
- for subtype_name in subtype_names:
911
- lookup_paths.append(f"/{subtype_name}/{value}")
896
+ subtype_names = self._get_schema_subtypes_names(type_name) if is_ref_lookup_subtypes else []
897
+ for subtype_name in subtype_names:
898
+ lookup_paths.append(f"/{subtype_name}/{value}")
912
899
  if not lookup_paths:
913
900
  # No (i.e. zero) lookup strategy means no ref lookup at all.
914
- return []
915
- # Do the actual lookup in the portal for each of the desired lookup paths.
901
+ if called_from_map_ref:
902
+ self._ref_total_notfound_count += 1
903
+ return None
904
+ # Do the actual lookup in portal for each of the desired lookup paths.
916
905
  for lookup_path in lookup_paths:
917
- if isinstance(item := self.ref_lookup(lookup_path), dict):
918
- resolved = [{"type": type_name, "uuid": item.get("uuid", None)}]
919
- self._cache_ref(type_name, value, resolved, subtype_names)
906
+ if isinstance(resolved_item := self.ref_lookup(lookup_path), dict):
907
+ resolved = {"type": type_name, "uuid": resolved_item.get("uuid", None)}
908
+ self._cache_ref(type_name, value, resolved)
909
+ self._ref_exists_external_count += 1
910
+ if called_from_map_ref:
911
+ self._ref_total_found_count += 1
920
912
  return resolved
921
- # Not found at all; note that we cache this ([]) too; indicates lookup has been done.
922
- self._cache_ref(type_name, value, [], subtype_names)
923
- return []
924
-
925
- def _ref_exists_internally(
926
- self, type_name: str, value: str,
927
- subtype_names: Optional[List[str]] = None,
928
- incorrect_identifying_property: Optional[str] = None) -> Tuple[bool, Optional[str], Optional[str]]:
929
- for type_name in [type_name] + (subtype_names or []):
930
- is_resolved, identifying_property, resolved_uuid = self._ref_exists_single_internally(
931
- type_name, value, incorrect_identifying_property=incorrect_identifying_property)
932
- if is_resolved:
933
- return True, identifying_property, resolved_uuid
934
- return False, None, None
913
+ # Not found at all; note that we cache this ({}) too; indicates lookup has been done.
914
+ self._cache_ref(type_name, value, {})
915
+ if called_from_map_ref:
916
+ self._ref_total_notfound_count += 1
917
+ return None
935
918
 
936
- def _ref_exists_single_internally(
937
- self, type_name: str, value: str,
938
- incorrect_identifying_property:
939
- Optional[str] = None) -> Tuple[bool, Optional[str], Optional[str]]:
919
+ def ref_exists_internally(self, type_name: str, value: Optional[str] = None,
920
+ update_counts: bool = False) -> Optional[dict]:
921
+ """
922
+ Looks up the given reference (type/value) internally (i.e. with this data parsed thus far).
923
+ If found then returns a list of a single dictionary containing the (given) type name and
924
+ the uuid (if any) of the resolved item. If not found then returns an empty list; however,
925
+ if not found, but found using an "incorrect" identifying property, then returns None.
926
+ """
927
+ # print(f"\033[Kxyzzy:ref_exists_internally({type_name}/{value})")
928
+ if not value:
929
+ type_name, value = Portal._get_type_name_and_value_from_path(type_name)
930
+ if not type_name or not value:
931
+ return None
932
+ # Note that root lookup not applicable here.
933
+ ref_lookup_strategy, incorrect_identifying_property = (
934
+ self._ref_lookup_strategy(type_name, self.get_schema(type_name), value))
935
+ is_ref_lookup_subtypes = StructuredDataSet._is_ref_lookup_subtypes(ref_lookup_strategy)
936
+ subtype_names = self._get_schema_subtypes_names(type_name) if is_ref_lookup_subtypes else []
937
+ for type_name in [type_name] + subtype_names:
938
+ is_resolved, resolved_item = self._ref_exists_single_internally(type_name, value)
939
+ if is_resolved:
940
+ if update_counts:
941
+ self._ref_exists_internal_count += 1
942
+ self._ref_total_found_count += 1
943
+ resolved = {"type": type_name, "uuid": resolved_item.get("uuid")}
944
+ self._cache_ref(type_name, value, resolved)
945
+ return resolved
946
+ # Here this reference is not resolved internally; but let us check any specified incorrect
947
+ # property to see if it would have been resolved using that; for example, if we pretend that
948
+ # UnalignedReads.filename were an identifying property (which it is not), then we see if this
949
+ # reference, which would otherwise be unresolved, would be resolved; in which case we have an
950
+ # incorrect reference; doing this can cut down considerably on useless lookups (at least for
951
+ # a case from He Li, early March 2024).
952
+ for type_name in [type_name] + subtype_names:
953
+ if incorrect_identifying_property:
954
+ if self._data and (items := self._data.get(type_name)):
955
+ for item in items:
956
+ if (identifying_value := item.get(incorrect_identifying_property, None)) is not None:
957
+ if ((identifying_value == value) or
958
+ (isinstance(identifying_value, list) and (value in identifying_value))): # noqa
959
+ # Not REALLY resolved as it resolved to a property which is NOT an identifying
960
+ # property, but may be commonly mistaken for one (e.g. UnalignedReads.filename).
961
+ # Return value to prevent actual portal lookup from happening.
962
+ if update_counts:
963
+ self._ref_incorrect_identifying_property_count += 1
964
+ self._ref_total_notfound_count += 1
965
+ return None # None return means resolved internally incorrectly.
966
+ if update_counts:
967
+ self._ref_total_notfound_count += 1
968
+ return {} # Empty return means not resolved internally.
969
+
970
+ def _ref_exists_single_internally(self, type_name: str, value: str) -> Tuple[bool, Optional[dict]]:
940
971
  if self._data and (items := self._data.get(type_name)) and (schema := self.get_schema(type_name)):
941
972
  identifying_properties = set(schema.get("identifyingProperties", [])) | {"identifier", "uuid"}
942
973
  for item in items:
@@ -944,32 +975,41 @@ class Portal(PortalBase):
944
975
  if (identifying_value := item.get(identifying_property, None)) is not None:
945
976
  if ((identifying_value == value) or
946
977
  (isinstance(identifying_value, list) and (value in identifying_value))): # noqa
947
- self._ref_exists_internal_count += 1
948
- return True, identifying_property, item.get("uuid", None)
949
- if incorrect_identifying_property:
950
- if (identifying_value := item.get(incorrect_identifying_property, None)) is not None:
951
- if ((identifying_value == value) or
952
- (isinstance(identifying_value, list) and (value in identifying_value))): # noqa
953
- return True, incorrect_identifying_property, item.get("uuid", None)
954
- return False, None, None
978
+ return True, item
979
+ return False, None
980
+
981
+ @staticmethod
982
+ def _get_type_name_and_value_from_path(path: str) -> Tuple[Optional[str], Optional[str]]:
983
+ if path.startswith("/") and len(parts := path[1:].split("/")) == 2:
984
+ if not (type_name := parts[0]) or not (value := parts[1]):
985
+ return None
986
+ return type_name, value
987
+ return None, None
988
+
989
+ def _ref_exists_from_cache(self, type_name: str, value: str) -> Optional[List[dict]]:
990
+ if self._ref_cache is not None:
991
+ self._ref_exists_cache_hit_count += 1
992
+ return self._ref_cache.get(f"/{type_name}/{value}", None)
993
+ self._ref_exists_cache_miss_count += 1
994
+ return None
995
+
996
+ def _cache_ref(self, type_name: str, value: str, resolved: List[str]) -> None:
997
+ subtype_names = self._get_schema_subtypes_names(type_name)
998
+ if self._ref_cache is not None:
999
+ for type_name in [type_name] + subtype_names:
1000
+ self._ref_cache[f"/{type_name}/{value}"] = resolved
955
1001
 
956
1002
  @property
957
- def ref_lookup_cache_hit_count(self) -> int:
958
- if self._ref_cache is None:
959
- return 0
960
- try:
961
- return self.ref_lookup_cache.cache_info().hits
962
- except Exception:
963
- return -1
1003
+ def ref_total_count(self) -> int:
1004
+ return self._ref_total_count
964
1005
 
965
1006
  @property
966
- def ref_lookup_cache_miss_count(self) -> int:
967
- if self._ref_cache is None:
968
- return self.ref_lookup_count
969
- try:
970
- return self.ref_lookup_cache.cache_info().misses
971
- except Exception:
972
- return -1
1007
+ def ref_total_found_count(self) -> int:
1008
+ return self._ref_total_found_count
1009
+
1010
+ @property
1011
+ def ref_total_notfound_count(self) -> int:
1012
+ return self._ref_total_notfound_count
973
1013
 
974
1014
  @property
975
1015
  def ref_lookup_count(self) -> int:
@@ -987,10 +1027,32 @@ class Portal(PortalBase):
987
1027
  def ref_lookup_error_count(self) -> int:
988
1028
  return self._ref_lookup_error_count
989
1029
 
1030
+ @property
1031
+ def ref_lookup_cache_hit_count(self) -> int:
1032
+ if self._ref_cache is None:
1033
+ return -1
1034
+ try:
1035
+ return self.ref_lookup_cached.cache_info().hits
1036
+ except Exception:
1037
+ return -1
1038
+
1039
+ @property
1040
+ def ref_lookup_cache_miss_count(self) -> int:
1041
+ if self._ref_cache is None:
1042
+ return -1
1043
+ try:
1044
+ return self.ref_lookup_cached.cache_info().misses
1045
+ except Exception:
1046
+ return -1
1047
+
990
1048
  @property
991
1049
  def ref_exists_internal_count(self) -> int:
992
1050
  return self._ref_exists_internal_count
993
1051
 
1052
+ @property
1053
+ def ref_exists_external_count(self) -> int:
1054
+ return self._ref_exists_external_count
1055
+
994
1056
  @property
995
1057
  def ref_exists_cache_hit_count(self) -> int:
996
1058
  return self._ref_exists_cache_hit_count
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dcicutils"
3
- version = "8.8.0.1b15" # TODO: To become 8.8.1
3
+ version = "8.8.0.1b16" # TODO: To become 8.8.1
4
4
  description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
5
5
  authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
6
6
  license = "MIT"