dcicutils 8.8.0.1b15__tar.gz → 8.8.0.1b16__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/PKG-INFO +1 -1
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/structured_data.py +187 -125
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/pyproject.toml +1 -1
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/LICENSE.txt +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/README.rst +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/__init__.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/base.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/beanstalk_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/bundle_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/captured_output.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/cloudformation_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/codebuild_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/command_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/common.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/contribution_scripts.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/contribution_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/creds_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/data_readers.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/data_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/datetime_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/deployment_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/diff_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/docker_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/ecr_scripts.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/ecr_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/ecs_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/env_base.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/env_manager.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/env_scripts.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/env_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/env_utils_legacy.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/es_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/exceptions.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/ff_mocks.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/ff_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/file_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/function_cache_decorator.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/glacier_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/jh_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/kibana/dashboards.json +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/kibana/readme.md +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/lang_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/license_policies/c4-infrastructure.jsonc +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/license_policies/c4-python-infrastructure.jsonc +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/license_policies/park-lab-common-server.jsonc +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/license_policies/park-lab-common.jsonc +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/license_policies/park-lab-pipeline.jsonc +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/license_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/log_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/misc_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/obfuscation_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/opensearch_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/portal_object_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/portal_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/project_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/qa_checkers.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/qa_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/redis_tools.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/redis_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/s3_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/schema_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/scripts/publish_to_pypi.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/scripts/run_license_checker.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/scripts/view_portal_object.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/secrets_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/sheet_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/snapshot_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/ssl_certificate_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/task_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/tmpfile_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/trace_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/validation_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/variant_utils.py +0 -0
- {dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/zip_utils.py +0 -0
@@ -264,9 +264,10 @@ class StructuredDataSet:
|
|
264
264
|
ref_errors_actual = []
|
265
265
|
for ref_error in ref_errors:
|
266
266
|
if not (resolved := self.portal.ref_exists(ref := ref_error["error"])):
|
267
|
+
# if not (resolved := self.portal.ref_exists_internally(ref := ref_error["error"])):
|
267
268
|
ref_errors_actual.append(ref_error)
|
268
269
|
else:
|
269
|
-
self._resolved_refs.add((ref, resolved
|
270
|
+
self._resolved_refs.add((ref, resolved.get("uuid")))
|
270
271
|
if ref_errors_actual:
|
271
272
|
self._errors["ref"] = ref_errors_actual
|
272
273
|
else:
|
@@ -296,7 +297,7 @@ class StructuredDataSet:
|
|
296
297
|
self._add_properties(structured_row, self._autoadd_properties, schema)
|
297
298
|
self._add(type_name, structured_row)
|
298
299
|
if self._progress:
|
299
|
-
self._progress_update(-1,
|
300
|
+
self._progress_update(-1, self.ref_total_count, self.ref_total_notfound_count, self.ref_lookup_count)
|
300
301
|
self._note_warning(reader.warnings, "reader")
|
301
302
|
if schema:
|
302
303
|
self._note_error(schema._unresolved_refs, "ref")
|
@@ -328,6 +329,18 @@ class StructuredDataSet:
|
|
328
329
|
def _is_ref_lookup_subtypes(ref_lookup_flags: int) -> bool:
|
329
330
|
return (ref_lookup_flags & StructuredDataSet.REF_LOOKUP_SUBTYPES) == StructuredDataSet.REF_LOOKUP_SUBTYPES
|
330
331
|
|
332
|
+
@property
|
333
|
+
def ref_total_count(self) -> int:
|
334
|
+
return self.portal.ref_total_count if self.portal else -1
|
335
|
+
|
336
|
+
@property
|
337
|
+
def ref_total_found_count(self) -> int:
|
338
|
+
return self.portal.ref_total_found_count if self.portal else -1
|
339
|
+
|
340
|
+
@property
|
341
|
+
def ref_total_notfound_count(self) -> int:
|
342
|
+
return self.portal.ref_total_notfound_count if self.portal else -1
|
343
|
+
|
331
344
|
@property
|
332
345
|
def ref_lookup_cache_hit_count(self) -> int:
|
333
346
|
return self.portal.ref_lookup_cache_hit_count if self.portal else -1
|
@@ -356,6 +369,10 @@ class StructuredDataSet:
|
|
356
369
|
def ref_exists_internal_count(self) -> int:
|
357
370
|
return self.portal.ref_exists_internal_count if self.portal else -1
|
358
371
|
|
372
|
+
@property
|
373
|
+
def ref_exists_external_count(self) -> int:
|
374
|
+
return self.portal.ref_exists_external_count if self.portal else -1
|
375
|
+
|
359
376
|
@property
|
360
377
|
def ref_exists_cache_hit_count(self) -> int:
|
361
378
|
return self.portal.ref_exists_cache_hit_count if self.portal else -1
|
@@ -595,18 +612,11 @@ class Schema(SchemaBase):
|
|
595
612
|
# TODO: If think we do have the column (and type?) name(s) originating the ref yes?
|
596
613
|
self._unresolved_refs.append({"src": src, "error": f"/{link_to}/<null>"})
|
597
614
|
elif portal:
|
598
|
-
if not (resolved := portal.ref_exists(link_to, value)):
|
615
|
+
if not (resolved := portal.ref_exists(link_to, value, True)):
|
599
616
|
self._unresolved_refs.append({"src": src, "error": f"/{link_to}/{value}"})
|
600
|
-
elif len(resolved) > 1:
|
601
|
-
# TODO: Don't think we need this anymore; see TODO on Portal.ref_exists.
|
602
|
-
self._unresolved_refs.append({
|
603
|
-
"src": src,
|
604
|
-
"error": f"/{link_to}/{value}",
|
605
|
-
"types": [resolved_ref["type"] for resolved_ref in resolved]})
|
606
617
|
else:
|
607
618
|
# A resolved-ref set value is a tuple of the reference path and its uuid.
|
608
|
-
self._resolved_refs.add((f"/{link_to}/{value}", resolved
|
609
|
-
# self._resolved_refs.add((f"/{link_to}/{value}", resolved[0].get("uuid"), resolved[0].get("data")))
|
619
|
+
self._resolved_refs.add((f"/{link_to}/{value}", resolved.get("uuid")))
|
610
620
|
return value
|
611
621
|
return lambda value, src: map_ref(value, typeinfo.get("linkTo"), self._portal, src)
|
612
622
|
|
@@ -767,24 +777,28 @@ class Portal(PortalBase):
|
|
767
777
|
else:
|
768
778
|
self._ref_lookup_strategy = lambda type_name, schema, value: (StructuredDataSet.REF_LOOKUP_DEFAULT, None)
|
769
779
|
if ref_lookup_nocache is True:
|
770
|
-
self.ref_lookup = self.
|
780
|
+
self.ref_lookup = self.ref_lookup_uncached
|
771
781
|
self._ref_cache = None
|
772
782
|
else:
|
773
|
-
self.ref_lookup = self.
|
783
|
+
self.ref_lookup = self.ref_lookup_cached
|
774
784
|
self._ref_cache = {}
|
775
785
|
self._ref_lookup_found_count = 0
|
776
786
|
self._ref_lookup_notfound_count = 0
|
777
787
|
self._ref_lookup_error_count = 0
|
778
788
|
self._ref_exists_internal_count = 0
|
789
|
+
self._ref_exists_external_count = 0
|
779
790
|
self._ref_exists_cache_hit_count = 0
|
780
791
|
self._ref_exists_cache_miss_count = 0
|
781
792
|
self._ref_incorrect_identifying_property_count = 0
|
793
|
+
self._ref_total_count = 0
|
794
|
+
self._ref_total_found_count = 0
|
795
|
+
self._ref_total_notfound_count = 0
|
782
796
|
|
783
797
|
@lru_cache(maxsize=8092)
|
784
|
-
def
|
785
|
-
return self.
|
798
|
+
def ref_lookup_cached(self, object_name: str) -> Optional[dict]:
|
799
|
+
return self.ref_lookup_uncached(object_name)
|
786
800
|
|
787
|
-
def
|
801
|
+
def ref_lookup_uncached(self, object_name: str) -> Optional[dict]:
|
788
802
|
try:
|
789
803
|
result = super().get_metadata(object_name, raw=True)
|
790
804
|
self._ref_lookup_found_count += 1
|
@@ -819,10 +833,10 @@ class Portal(PortalBase):
|
|
819
833
|
return schemas
|
820
834
|
|
821
835
|
@lru_cache(maxsize=64)
|
822
|
-
def
|
836
|
+
def _get_schema_subtypes_names(self, type_name: str) -> List[str]:
|
823
837
|
if not (schemas_super_type_map := self.get_schemas_super_type_map()):
|
824
838
|
return []
|
825
|
-
return schemas_super_type_map.get(type_name)
|
839
|
+
return schemas_super_type_map.get(type_name, [])
|
826
840
|
|
827
841
|
def is_file_schema(self, schema_name: str) -> bool:
|
828
842
|
"""
|
@@ -830,74 +844,47 @@ class Portal(PortalBase):
|
|
830
844
|
"""
|
831
845
|
return self.is_schema_type(schema_name, FILE_SCHEMA_NAME)
|
832
846
|
|
833
|
-
def
|
834
|
-
|
835
|
-
|
836
|
-
return None
|
837
|
-
|
838
|
-
def _cache_ref(self, type_name: str, value: str, resolved: List[str], subtype_names: Optional[List[str]]) -> None:
|
839
|
-
if self._ref_cache is not None:
|
840
|
-
for type_name in [type_name] + (subtype_names if subtype_names else []):
|
841
|
-
self._ref_cache[f"/{type_name}/{value}"] = resolved
|
842
|
-
|
843
|
-
def ref_exists(self, type_name: str, value: Optional[str] = None) -> List[dict]:
|
847
|
+
def ref_exists(self, type_name: str, value: Optional[str] = None,
|
848
|
+
called_from_map_ref: bool = False) -> Optional[dict]:
|
849
|
+
# print(f"\033[Kxyzzy:ref_exists({type_name}/{value})")
|
844
850
|
if not value:
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
851
|
+
type_name, value = Portal._get_type_name_and_value_from_path(type_name)
|
852
|
+
if not type_name or not value:
|
853
|
+
return None
|
854
|
+
if called_from_map_ref:
|
855
|
+
self._ref_total_count += 1
|
856
|
+
# First check our reference cache.
|
850
857
|
if (resolved := self._ref_exists_from_cache(type_name, value)) is not None:
|
851
|
-
# Found
|
852
|
-
if
|
853
|
-
#
|
854
|
-
|
855
|
-
|
856
|
-
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
self._cache_ref(type_name, value, resolved, subtype_names)
|
872
|
-
return resolved
|
873
|
-
self._ref_exists_cache_hit_count += 1
|
858
|
+
# Found CACHED reference.
|
859
|
+
if resolved:
|
860
|
+
# Found cached RESOLVED reference (non-empty object).
|
861
|
+
if called_from_map_ref:
|
862
|
+
self._ref_total_found_count += 1
|
863
|
+
return resolved
|
864
|
+
# Found cached UNRESOLVED reference (empty object); meaning it was looked
|
865
|
+
# up but not found. It might NOW be found INTERNALLY, since the portal
|
866
|
+
# self._data can change, i.e. as data (e.g. spreadsheet sheets) are parsed.
|
867
|
+
return self.ref_exists_internally(type_name, value, update_counts=called_from_map_ref) or {}
|
868
|
+
# Reference is NOT cached here; lookup INTERNALLY first.
|
869
|
+
if (resolved := self.ref_exists_internally(type_name, value, update_counts=called_from_map_ref)) is None:
|
870
|
+
# Reference was resolved (internally) INCORRECTLY.
|
871
|
+
if called_from_map_ref:
|
872
|
+
self._ref_total_notfound_count += 1
|
873
|
+
return None
|
874
|
+
if resolved:
|
875
|
+
# Reference was resolved internally.
|
876
|
+
if called_from_map_ref:
|
877
|
+
self._ref_total_found_count += 1
|
874
878
|
return resolved
|
875
|
-
#
|
876
|
-
self._ref_exists_cache_miss_count += 1
|
879
|
+
# Reference is NOT cached and was NOT resolved internally; lookup in PORTAL.
|
877
880
|
# Get the lookup strategy; i.e. should do we lookup by root path, and if so, should
|
878
881
|
# we do this first, and do we lookup by subtypes; by default we lookup by root path
|
879
|
-
# but not first, and we
|
880
|
-
ref_lookup_strategy,
|
881
|
-
self._ref_lookup_strategy(type_name, self.get_schema(type_name), value))
|
882
|
+
# but not first, and we also lookup by subtypes by default.
|
883
|
+
ref_lookup_strategy, _ = self._ref_lookup_strategy(type_name, self.get_schema(type_name), value)
|
882
884
|
is_ref_lookup_specified_type = StructuredDataSet._is_ref_lookup_specified_type(ref_lookup_strategy)
|
883
885
|
is_ref_lookup_root = StructuredDataSet._is_ref_lookup_root(ref_lookup_strategy)
|
884
886
|
is_ref_lookup_root_first = StructuredDataSet._is_ref_lookup_root_first(ref_lookup_strategy)
|
885
887
|
is_ref_lookup_subtypes = StructuredDataSet._is_ref_lookup_subtypes(ref_lookup_strategy)
|
886
|
-
subtype_names = self._get_schema_subtypes(type_name) if is_ref_lookup_subtypes else None
|
887
|
-
# Lookup internally first (including at subtypes if desired; root lookup not applicable here).
|
888
|
-
is_resolved, identifying_property, resolved_uuid = (
|
889
|
-
self._ref_exists_internally(type_name, value, subtype_names,
|
890
|
-
incorrect_identifying_property=incorrect_identifying_property))
|
891
|
-
if is_resolved:
|
892
|
-
if identifying_property == incorrect_identifying_property:
|
893
|
-
# Not REALLY resolved as it resolved to a property which is NOT an identifying
|
894
|
-
# property, but may be commonly mistaken for one (e.g. UnalignedReads.filename).
|
895
|
-
self._ref_incorrect_identifying_property_count += 1
|
896
|
-
return []
|
897
|
-
resolved = [{"type": type_name, "uuid": resolved_uuid}]
|
898
|
-
self._cache_ref(type_name, value, resolved, subtype_names)
|
899
|
-
return resolved
|
900
|
-
# Not found internally; perform actual portal lookup (including at root and subtypes if desired).
|
901
888
|
# First construct the list of lookup paths at which to look for the referenced item.
|
902
889
|
lookup_paths = []
|
903
890
|
if is_ref_lookup_root_first:
|
@@ -906,37 +893,81 @@ class Portal(PortalBase):
|
|
906
893
|
lookup_paths.append(f"/{type_name}/{value}")
|
907
894
|
if is_ref_lookup_root and not is_ref_lookup_root_first:
|
908
895
|
lookup_paths.append(f"/{value}")
|
909
|
-
if
|
910
|
-
|
911
|
-
|
896
|
+
subtype_names = self._get_schema_subtypes_names(type_name) if is_ref_lookup_subtypes else []
|
897
|
+
for subtype_name in subtype_names:
|
898
|
+
lookup_paths.append(f"/{subtype_name}/{value}")
|
912
899
|
if not lookup_paths:
|
913
900
|
# No (i.e. zero) lookup strategy means no ref lookup at all.
|
914
|
-
|
915
|
-
|
901
|
+
if called_from_map_ref:
|
902
|
+
self._ref_total_notfound_count += 1
|
903
|
+
return None
|
904
|
+
# Do the actual lookup in portal for each of the desired lookup paths.
|
916
905
|
for lookup_path in lookup_paths:
|
917
|
-
if isinstance(
|
918
|
-
resolved =
|
919
|
-
self._cache_ref(type_name, value, resolved
|
906
|
+
if isinstance(resolved_item := self.ref_lookup(lookup_path), dict):
|
907
|
+
resolved = {"type": type_name, "uuid": resolved_item.get("uuid", None)}
|
908
|
+
self._cache_ref(type_name, value, resolved)
|
909
|
+
self._ref_exists_external_count += 1
|
910
|
+
if called_from_map_ref:
|
911
|
+
self._ref_total_found_count += 1
|
920
912
|
return resolved
|
921
|
-
# Not found at all; note that we cache this (
|
922
|
-
self._cache_ref(type_name, value,
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
self, type_name: str, value: str,
|
927
|
-
subtype_names: Optional[List[str]] = None,
|
928
|
-
incorrect_identifying_property: Optional[str] = None) -> Tuple[bool, Optional[str], Optional[str]]:
|
929
|
-
for type_name in [type_name] + (subtype_names or []):
|
930
|
-
is_resolved, identifying_property, resolved_uuid = self._ref_exists_single_internally(
|
931
|
-
type_name, value, incorrect_identifying_property=incorrect_identifying_property)
|
932
|
-
if is_resolved:
|
933
|
-
return True, identifying_property, resolved_uuid
|
934
|
-
return False, None, None
|
913
|
+
# Not found at all; note that we cache this ({}) too; indicates lookup has been done.
|
914
|
+
self._cache_ref(type_name, value, {})
|
915
|
+
if called_from_map_ref:
|
916
|
+
self._ref_total_notfound_count += 1
|
917
|
+
return None
|
935
918
|
|
936
|
-
def
|
937
|
-
|
938
|
-
|
939
|
-
|
919
|
+
def ref_exists_internally(self, type_name: str, value: Optional[str] = None,
|
920
|
+
update_counts: bool = False) -> Optional[dict]:
|
921
|
+
"""
|
922
|
+
Looks up the given reference (type/value) internally (i.e. with this data parsed thus far).
|
923
|
+
If found then returns a list of a single dictionary containing the (given) type name and
|
924
|
+
the uuid (if any) of the resolved item. If not found then returns an empty list; however,
|
925
|
+
if not found, but found using an "incorrect" identifying property, then returns None.
|
926
|
+
"""
|
927
|
+
# print(f"\033[Kxyzzy:ref_exists_internally({type_name}/{value})")
|
928
|
+
if not value:
|
929
|
+
type_name, value = Portal._get_type_name_and_value_from_path(type_name)
|
930
|
+
if not type_name or not value:
|
931
|
+
return None
|
932
|
+
# Note that root lookup not applicable here.
|
933
|
+
ref_lookup_strategy, incorrect_identifying_property = (
|
934
|
+
self._ref_lookup_strategy(type_name, self.get_schema(type_name), value))
|
935
|
+
is_ref_lookup_subtypes = StructuredDataSet._is_ref_lookup_subtypes(ref_lookup_strategy)
|
936
|
+
subtype_names = self._get_schema_subtypes_names(type_name) if is_ref_lookup_subtypes else []
|
937
|
+
for type_name in [type_name] + subtype_names:
|
938
|
+
is_resolved, resolved_item = self._ref_exists_single_internally(type_name, value)
|
939
|
+
if is_resolved:
|
940
|
+
if update_counts:
|
941
|
+
self._ref_exists_internal_count += 1
|
942
|
+
self._ref_total_found_count += 1
|
943
|
+
resolved = {"type": type_name, "uuid": resolved_item.get("uuid")}
|
944
|
+
self._cache_ref(type_name, value, resolved)
|
945
|
+
return resolved
|
946
|
+
# Here this reference is not resolved internally; but let us check any specified incorrect
|
947
|
+
# property to see if it would have been resolved using that; for example, if we pretend that
|
948
|
+
# UnalignedReads.filename were an identifying property (which it is not), then we see if this
|
949
|
+
# reference, which would otherwise be unresolved, would be resolved; in which case we have an
|
950
|
+
# incorrect reference; doing this can cut down considerably on useless lookups (at least for
|
951
|
+
# a case from He Li, early March 2024).
|
952
|
+
for type_name in [type_name] + subtype_names:
|
953
|
+
if incorrect_identifying_property:
|
954
|
+
if self._data and (items := self._data.get(type_name)):
|
955
|
+
for item in items:
|
956
|
+
if (identifying_value := item.get(incorrect_identifying_property, None)) is not None:
|
957
|
+
if ((identifying_value == value) or
|
958
|
+
(isinstance(identifying_value, list) and (value in identifying_value))): # noqa
|
959
|
+
# Not REALLY resolved as it resolved to a property which is NOT an identifying
|
960
|
+
# property, but may be commonly mistaken for one (e.g. UnalignedReads.filename).
|
961
|
+
# Return value to prevent actual portal lookup from happening.
|
962
|
+
if update_counts:
|
963
|
+
self._ref_incorrect_identifying_property_count += 1
|
964
|
+
self._ref_total_notfound_count += 1
|
965
|
+
return None # None return means resolved internally incorrectly.
|
966
|
+
if update_counts:
|
967
|
+
self._ref_total_notfound_count += 1
|
968
|
+
return {} # Empty return means not resolved internally.
|
969
|
+
|
970
|
+
def _ref_exists_single_internally(self, type_name: str, value: str) -> Tuple[bool, Optional[dict]]:
|
940
971
|
if self._data and (items := self._data.get(type_name)) and (schema := self.get_schema(type_name)):
|
941
972
|
identifying_properties = set(schema.get("identifyingProperties", [])) | {"identifier", "uuid"}
|
942
973
|
for item in items:
|
@@ -944,32 +975,41 @@ class Portal(PortalBase):
|
|
944
975
|
if (identifying_value := item.get(identifying_property, None)) is not None:
|
945
976
|
if ((identifying_value == value) or
|
946
977
|
(isinstance(identifying_value, list) and (value in identifying_value))): # noqa
|
947
|
-
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
953
|
-
|
954
|
-
|
978
|
+
return True, item
|
979
|
+
return False, None
|
980
|
+
|
981
|
+
@staticmethod
|
982
|
+
def _get_type_name_and_value_from_path(path: str) -> Tuple[Optional[str], Optional[str]]:
|
983
|
+
if path.startswith("/") and len(parts := path[1:].split("/")) == 2:
|
984
|
+
if not (type_name := parts[0]) or not (value := parts[1]):
|
985
|
+
return None
|
986
|
+
return type_name, value
|
987
|
+
return None, None
|
988
|
+
|
989
|
+
def _ref_exists_from_cache(self, type_name: str, value: str) -> Optional[List[dict]]:
|
990
|
+
if self._ref_cache is not None:
|
991
|
+
self._ref_exists_cache_hit_count += 1
|
992
|
+
return self._ref_cache.get(f"/{type_name}/{value}", None)
|
993
|
+
self._ref_exists_cache_miss_count += 1
|
994
|
+
return None
|
995
|
+
|
996
|
+
def _cache_ref(self, type_name: str, value: str, resolved: List[str]) -> None:
|
997
|
+
subtype_names = self._get_schema_subtypes_names(type_name)
|
998
|
+
if self._ref_cache is not None:
|
999
|
+
for type_name in [type_name] + subtype_names:
|
1000
|
+
self._ref_cache[f"/{type_name}/{value}"] = resolved
|
955
1001
|
|
956
1002
|
@property
|
957
|
-
def
|
958
|
-
|
959
|
-
return 0
|
960
|
-
try:
|
961
|
-
return self.ref_lookup_cache.cache_info().hits
|
962
|
-
except Exception:
|
963
|
-
return -1
|
1003
|
+
def ref_total_count(self) -> int:
|
1004
|
+
return self._ref_total_count
|
964
1005
|
|
965
1006
|
@property
|
966
|
-
def
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
return -1
|
1007
|
+
def ref_total_found_count(self) -> int:
|
1008
|
+
return self._ref_total_found_count
|
1009
|
+
|
1010
|
+
@property
|
1011
|
+
def ref_total_notfound_count(self) -> int:
|
1012
|
+
return self._ref_total_notfound_count
|
973
1013
|
|
974
1014
|
@property
|
975
1015
|
def ref_lookup_count(self) -> int:
|
@@ -987,10 +1027,32 @@ class Portal(PortalBase):
|
|
987
1027
|
def ref_lookup_error_count(self) -> int:
|
988
1028
|
return self._ref_lookup_error_count
|
989
1029
|
|
1030
|
+
@property
|
1031
|
+
def ref_lookup_cache_hit_count(self) -> int:
|
1032
|
+
if self._ref_cache is None:
|
1033
|
+
return -1
|
1034
|
+
try:
|
1035
|
+
return self.ref_lookup_cached.cache_info().hits
|
1036
|
+
except Exception:
|
1037
|
+
return -1
|
1038
|
+
|
1039
|
+
@property
|
1040
|
+
def ref_lookup_cache_miss_count(self) -> int:
|
1041
|
+
if self._ref_cache is None:
|
1042
|
+
return -1
|
1043
|
+
try:
|
1044
|
+
return self.ref_lookup_cached.cache_info().misses
|
1045
|
+
except Exception:
|
1046
|
+
return -1
|
1047
|
+
|
990
1048
|
@property
|
991
1049
|
def ref_exists_internal_count(self) -> int:
|
992
1050
|
return self._ref_exists_internal_count
|
993
1051
|
|
1052
|
+
@property
|
1053
|
+
def ref_exists_external_count(self) -> int:
|
1054
|
+
return self._ref_exists_external_count
|
1055
|
+
|
994
1056
|
@property
|
995
1057
|
def ref_exists_cache_hit_count(self) -> int:
|
996
1058
|
return self._ref_exists_cache_hit_count
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "dcicutils"
|
3
|
-
version = "8.8.0.
|
3
|
+
version = "8.8.0.1b16" # TODO: To become 8.8.1
|
4
4
|
description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
|
5
5
|
authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
|
6
6
|
license = "MIT"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/license_policies/c4-infrastructure.jsonc
RENAMED
File without changes
|
File without changes
|
File without changes
|
{dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/license_policies/park-lab-common.jsonc
RENAMED
File without changes
|
{dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc
RENAMED
File without changes
|
{dcicutils-8.8.0.1b15 → dcicutils-8.8.0.1b16}/dcicutils/license_policies/park-lab-pipeline.jsonc
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|