dcicutils 8.8.0.1b4__tar.gz → 8.8.0.1b6__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/PKG-INFO +1 -1
  2. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/structured_data.py +84 -42
  3. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/pyproject.toml +1 -1
  4. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/LICENSE.txt +0 -0
  5. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/README.rst +0 -0
  6. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/__init__.py +0 -0
  7. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/base.py +0 -0
  8. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/beanstalk_utils.py +0 -0
  9. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/bundle_utils.py +0 -0
  10. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/captured_output.py +0 -0
  11. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/cloudformation_utils.py +0 -0
  12. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/codebuild_utils.py +0 -0
  13. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/command_utils.py +0 -0
  14. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/common.py +0 -0
  15. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/contribution_scripts.py +0 -0
  16. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/contribution_utils.py +0 -0
  17. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/creds_utils.py +0 -0
  18. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/data_readers.py +0 -0
  19. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/data_utils.py +0 -0
  20. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/datetime_utils.py +0 -0
  21. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/deployment_utils.py +0 -0
  22. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/diff_utils.py +0 -0
  23. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/docker_utils.py +0 -0
  24. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/ecr_scripts.py +0 -0
  25. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/ecr_utils.py +0 -0
  26. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/ecs_utils.py +0 -0
  27. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/env_base.py +0 -0
  28. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/env_manager.py +0 -0
  29. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/env_scripts.py +0 -0
  30. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/env_utils.py +0 -0
  31. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/env_utils_legacy.py +0 -0
  32. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/es_utils.py +0 -0
  33. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/exceptions.py +0 -0
  34. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/ff_mocks.py +0 -0
  35. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/ff_utils.py +0 -0
  36. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/file_utils.py +0 -0
  37. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/function_cache_decorator.py +0 -0
  38. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/glacier_utils.py +0 -0
  39. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/jh_utils.py +0 -0
  40. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/kibana/dashboards.json +0 -0
  41. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/kibana/readme.md +0 -0
  42. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/lang_utils.py +0 -0
  43. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/license_policies/c4-infrastructure.jsonc +0 -0
  44. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/license_policies/c4-python-infrastructure.jsonc +0 -0
  45. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/license_policies/park-lab-common-server.jsonc +0 -0
  46. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/license_policies/park-lab-common.jsonc +0 -0
  47. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc +0 -0
  48. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/license_policies/park-lab-pipeline.jsonc +0 -0
  49. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/license_utils.py +0 -0
  50. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/log_utils.py +0 -0
  51. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/misc_utils.py +0 -0
  52. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/obfuscation_utils.py +0 -0
  53. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/opensearch_utils.py +0 -0
  54. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/portal_object_utils.py +0 -0
  55. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/portal_utils.py +0 -0
  56. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/project_utils.py +0 -0
  57. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/qa_checkers.py +0 -0
  58. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/qa_utils.py +0 -0
  59. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/redis_tools.py +0 -0
  60. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/redis_utils.py +0 -0
  61. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/s3_utils.py +0 -0
  62. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/schema_utils.py +0 -0
  63. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/scripts/publish_to_pypi.py +0 -0
  64. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/scripts/run_license_checker.py +0 -0
  65. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/scripts/view_portal_object.py +0 -0
  66. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/secrets_utils.py +0 -0
  67. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/sheet_utils.py +0 -0
  68. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/snapshot_utils.py +0 -0
  69. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/ssl_certificate_utils.py +0 -0
  70. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/task_utils.py +0 -0
  71. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/tmpfile_utils.py +0 -0
  72. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/trace_utils.py +0 -0
  73. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/validation_utils.py +0 -0
  74. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/variant_utils.py +0 -0
  75. {dcicutils-8.8.0.1b4 → dcicutils-8.8.0.1b6}/dcicutils/zip_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.8.0.1b4
3
+ Version: 8.8.0.1b6
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -12,7 +12,7 @@ from dcicutils.common import OrchestratedApp
12
12
  from dcicutils.data_readers import CsvReader, Excel, RowReader
13
13
  from dcicutils.datetime_utils import normalize_date_string, normalize_datetime_string
14
14
  from dcicutils.file_utils import search_for_file
15
- from dcicutils.misc_utils import (create_dict, create_readonly_object, is_uuid, load_json_if,
15
+ from dcicutils.misc_utils import (create_dict, create_readonly_object, load_json_if,
16
16
  merge_objects, remove_empty_properties, right_trim,
17
17
  split_string, to_boolean, to_enum, to_float, to_integer, VirtualApp)
18
18
  from dcicutils.portal_object_utils import PortalObject
@@ -53,6 +53,10 @@ class StructuredDataSet:
53
53
  # can choose to lookup root path first, or not lookup root path at all, or not lookup
54
54
  # subtypes at all; the ref_lookup_strategy callable if specified should take a type_name
55
55
  # and value (string) arguements and return an integer of any of the below ORed together.
56
+ # The main purpose of this is optimization; to minimum portal lookups; since for example,
57
+ # currently at least, /{type}/{accession} does not work but /{accession} does; so we
58
+ # currently (smaht-portal/.../ingestion_processors) use REF_LOOKUP_ROOT_FIRST for this.
59
+ # And current usage NEVER has REF_LOOKUP_SUBTYPES turned OFF; but support just in case.
56
60
  REF_LOOKUP_ROOT = 0x0001
57
61
  REF_LOOKUP_ROOT_FIRST = 0x0002 | REF_LOOKUP_ROOT
58
62
  REF_LOOKUP_SUBTYPES = 0x0004
@@ -228,8 +232,10 @@ class StructuredDataSet:
228
232
  if ref_errors := self.ref_errors:
229
233
  ref_errors_actual = []
230
234
  for ref_error in ref_errors:
231
- if not self.portal.ref_exists(ref_error["error"]):
235
+ if not (resolved := self.portal.ref_exists(ref := ref_error["error"])):
232
236
  ref_errors_actual.append(ref_error)
237
+ else:
238
+ self._resolved_refs.add((ref, resolved[0].get("uuid")))
233
239
  if ref_errors_actual:
234
240
  self._errors["ref"] = ref_errors_actual
235
241
  else:
@@ -291,6 +297,10 @@ class StructuredDataSet:
291
297
  def ref_lookup_cache_miss_count(self) -> int:
292
298
  return self.portal.ref_lookup_cache_miss_count if self.portal else -1
293
299
 
300
+ @property
301
+ def ref_lookup_count(self) -> int:
302
+ return self.portal.ref_lookup_count if self.portal else -1
303
+
294
304
  @property
295
305
  def ref_lookup_found_count(self) -> int:
296
306
  return self.portal.ref_lookup_found_count if self.portal else -1
@@ -561,7 +571,7 @@ class Schema(SchemaBase):
561
571
  the names of any nested properties (i.e objects within objects) flattened into a single
562
572
  property name in dot notation; and set the value of each of these flat property names
563
573
  to the type of the terminal/leaf value of the (either) top-level or nested type. N.B. We
564
- do NOT currently support array-of-arry or array-of-multiple-types. E.g. for this schema:
574
+ do NOT currently support array-of-array or array-of-multiple-types. E.g. for this schema:
565
575
 
566
576
  { "properties": {
567
577
  "abc": {
@@ -779,69 +789,95 @@ class Portal(PortalBase):
779
789
  return self._ref_cache.get(f"/{type_name}/{value}", None)
780
790
  return None
781
791
 
782
- def _cache_ref(self, type_name: str, value: str, resolved: List[str],
783
- subtype_names: Optional[List[str]]) -> None:
792
+ def _cache_ref(self, type_name: str, value: str, resolved: List[str], subtype_names: Optional[List[str]]) -> None:
784
793
  if self._ref_cache is not None:
785
- for type_name in [type_name] + (subtype_names or []):
786
- object_path = f"/{type_name}/{value}"
787
- if self._ref_cache.get(object_path, None) is None:
788
- self._ref_cache[object_path] = resolved
794
+ for type_name in [type_name] + (subtype_names if subtype_names else []):
795
+ self._ref_cache[f"/{type_name}/{value}"] = resolved
789
796
 
790
797
  def ref_exists(self, type_name: str, value: Optional[str] = None) -> List[dict]:
791
798
  if not value:
792
799
  if type_name.startswith("/") and len(parts := type_name[1:].split("/")) == 2:
793
- type_name = parts[0]
794
- value = parts[1]
800
+ if not (type_name := parts[0]) or not (value := parts[1]):
801
+ return []
795
802
  else:
796
- return [] # Should not happen.
803
+ return []
797
804
  if (resolved := self._ref_exists_from_cache(type_name, value)) is not None:
805
+ # Found cached resolved reference.
806
+ if not resolved:
807
+ # Cached resolved reference is empty ([]).
808
+ # It might NOW be found internally, since the portal self._data can change.
809
+ # TODO
810
+ ref_lookup_strategy = self._ref_lookup_strategy(type_name, value)
811
+ is_ref_lookup_subtypes = StructuredDataSet._is_ref_lookup_subtypes(ref_lookup_strategy)
812
+ subtype_names = self._get_schema_subtypes(type_name) if is_ref_lookup_subtypes else None
813
+ is_resolved, resolved_uuid = self._ref_exists_internally(type_name, value, subtype_names)
814
+ if is_resolved:
815
+ resolved = [{"type": type_name, "uuid": resolved_uuid}]
816
+ self._cache_ref(type_name, value, resolved, subtype_names)
817
+ return resolved
798
818
  self._ref_exists_cache_hit_count += 1
799
819
  return resolved
800
820
  # Not cached here.
801
821
  self._ref_exists_cache_miss_count += 1
802
- resolved = []
822
+ # Get the lookup strategy.
803
823
  ref_lookup_strategy = self._ref_lookup_strategy(type_name, value)
804
824
  is_ref_lookup_root = StructuredDataSet._is_ref_lookup_root(ref_lookup_strategy)
805
825
  is_ref_lookup_root_first = StructuredDataSet._is_ref_lookup_root_first(ref_lookup_strategy)
806
826
  is_ref_lookup_subtypes = StructuredDataSet._is_ref_lookup_subtypes(ref_lookup_strategy)
807
- is_resolved = False
808
- subtype_names = self._get_schema_subtypes(type_name)
827
+ subtype_names = self._get_schema_subtypes(type_name) if is_ref_lookup_subtypes else None
828
+ # Lookup internally first (including at subtypes if desired).
829
+ is_resolved, resolved_uuid = self._ref_exists_internally(type_name, value, subtype_names)
830
+ if is_resolved:
831
+ resolved = [{"type": type_name, "uuid": resolved_uuid}]
832
+ self._cache_ref(type_name, value, resolved, subtype_names)
833
+ return resolved
834
+ # Not found internally; perform actual portal lookup (included at root and subtypes if desired).
835
+ # First construct the list of lookup paths at which to look for the referenced item.
836
+ lookup_paths = []
809
837
  if is_ref_lookup_root_first:
810
- is_resolved, resolved_uuid = self._ref_exists_single(type_name, value, root=True)
811
- if not is_resolved:
812
- is_resolved, resolved_uuid = self._ref_exists_single(type_name, value)
813
- if not is_resolved and is_ref_lookup_root and not is_ref_lookup_root_first:
814
- is_resolved, resolved_uuid = self._ref_exists_single(type_name, value, root=True)
838
+ lookup_paths.append(f"/{value}")
839
+ lookup_paths.append(f"/{type_name}/{value}")
840
+ if is_ref_lookup_root and not is_ref_lookup_root_first:
841
+ lookup_paths.append(f"/{value}")
842
+ if subtype_names:
843
+ for subtype_name in subtype_names:
844
+ lookup_paths.append(f"/{subtype_name}/{value}")
845
+ # Do the actual lookup in the portal for each of the desired lookup paths.
846
+ for lookup_path in lookup_paths:
847
+ if isinstance(item := self.get_metadata(lookup_path), dict):
848
+ resolved = [{"type": type_name, "uuid": item.get("uuid", None)}]
849
+ self._cache_ref(type_name, value, resolved, subtype_names)
850
+ return resolved
851
+ return []
852
+
853
+ def _ref_exists_internally(self, type_name: str, value: str,
854
+ subtype_names: Optional[List[str]] = None) -> Tuple[bool, Optional[str]]:
855
+ is_resolved, resolved_uuid = self._ref_exists_single_internally(type_name, value)
815
856
  if is_resolved:
816
- resolved.append({"type": type_name, "uuid": resolved_uuid})
817
- # Check for the given ref in all subtypes of the given type.
818
- elif subtype_names and is_ref_lookup_subtypes:
857
+ return True, resolved_uuid
858
+ if subtype_names:
819
859
  for subtype_name in subtype_names:
820
- is_resolved, resolved_uuid = self._ref_exists_single(subtype_name, value)
860
+ is_resolved, resolved_uuid = self._ref_exists_single_internally(subtype_name, value)
821
861
  if is_resolved:
822
- resolved.append({"type": type_name, "uuid": resolved_uuid})
823
- break
824
- # Cache this ref (and all subtype versions of it); whether or not found;
825
- # if not found it will be an empty array (array because caching all matches;
826
- # but TODO - do not think we should do this anymore - maybe test changes needed).
827
- self._cache_ref(type_name, value, resolved, subtype_names)
828
- return resolved
829
-
830
- def _ref_exists_single(self, type_name: str, value: str, root: bool = False) -> Tuple[bool, Optional[str]]:
831
- # Check first in our own data (i.e. e.g. within the given spreadsheet).
862
+ return True, resolved_uuid
863
+ return False, None
864
+
865
+ def _ref_exists_single_internally(self, type_name: str, value: str) -> Tuple[bool, Optional[str]]:
832
866
  if self._data and (items := self._data.get(type_name)) and (schema := self.get_schema(type_name)):
833
- iproperties = set(schema.get("identifyingProperties", [])) | {"identifier", "uuid"}
867
+ identifying_properties = set(schema.get("identifyingProperties", [])) | {"identifier", "uuid"}
834
868
  for item in items:
835
- if (ivalue := next((item[iproperty] for iproperty in iproperties if iproperty in item), None)):
836
- if isinstance(ivalue, list) and value in ivalue or ivalue == value:
837
- self._ref_exists_internal_count += 1
838
- return True, (ivalue if isinstance(ivalue, str) and is_uuid(ivalue) else None)
839
- if (value := self.get_metadata(f"/{type_name}/{value}" if not root else f"/{value}")) is None:
840
- return False, None
841
- return True, value.get("uuid")
869
+ for identifying_property in identifying_properties:
870
+ if (identifying_value := item.get(identifying_property, None)) is not None:
871
+ if ((identifying_value == value) or
872
+ (isinstance(identifying_value, list) and (value in identifying_value))): # noqa
873
+ self._ref_exists_internal_count += 1
874
+ return True, item.get("uuid", None)
875
+ return False, None
842
876
 
843
877
  @property
844
878
  def ref_lookup_cache_hit_count(self) -> int:
879
+ if self._ref_cache is None:
880
+ return 0
845
881
  try:
846
882
  return self.get_metadata_cache.cache_info().hits
847
883
  except Exception:
@@ -849,11 +885,17 @@ class Portal(PortalBase):
849
885
 
850
886
  @property
851
887
  def ref_lookup_cache_miss_count(self) -> int:
888
+ if self._ref_cache is None:
889
+ return self.ref_lookup_count
852
890
  try:
853
891
  return self.get_metadata_cache.cache_info().misses
854
892
  except Exception:
855
893
  return -1
856
894
 
895
+ @property
896
+ def ref_lookup_count(self) -> int:
897
+ return self._ref_lookup_found_count + self._ref_lookup_notfound_count + self._ref_lookup_error_count
898
+
857
899
  @property
858
900
  def ref_lookup_found_count(self) -> int:
859
901
  return self._ref_lookup_found_count
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dcicutils"
3
- version = "8.8.0.1b4" # TODO: To become 8.8.1
3
+ version = "8.8.0.1b6" # TODO: To become 8.8.1
4
4
  description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
5
5
  authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
6
6
  license = "MIT"
File without changes
File without changes