dcicutils 8.8.6.1b2__tar.gz → 8.8.6.1b5__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/PKG-INFO +1 -1
  2. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/portal_object_utils.py +28 -90
  3. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/portal_utils.py +180 -42
  4. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/structured_data.py +1 -1
  5. dcicutils-8.8.6.1b5/dcicutils/submitr/ref_lookup_strategy.py +73 -0
  6. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/pyproject.toml +1 -1
  7. dcicutils-8.8.6.1b2/dcicutils/submitr/ref_lookup_strategy.py +0 -67
  8. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/LICENSE.txt +0 -0
  9. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/README.rst +0 -0
  10. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/__init__.py +0 -0
  11. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/base.py +0 -0
  12. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/beanstalk_utils.py +0 -0
  13. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/bundle_utils.py +0 -0
  14. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/captured_output.py +0 -0
  15. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/cloudformation_utils.py +0 -0
  16. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/codebuild_utils.py +0 -0
  17. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/command_utils.py +0 -0
  18. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/common.py +0 -0
  19. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/contribution_scripts.py +0 -0
  20. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/contribution_utils.py +0 -0
  21. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/creds_utils.py +0 -0
  22. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/data_readers.py +0 -0
  23. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/data_utils.py +0 -0
  24. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/datetime_utils.py +0 -0
  25. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/deployment_utils.py +0 -0
  26. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/diff_utils.py +0 -0
  27. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/docker_utils.py +0 -0
  28. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/ecr_scripts.py +0 -0
  29. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/ecr_utils.py +0 -0
  30. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/ecs_utils.py +0 -0
  31. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/env_base.py +0 -0
  32. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/env_manager.py +0 -0
  33. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/env_scripts.py +0 -0
  34. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/env_utils.py +0 -0
  35. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/env_utils_legacy.py +0 -0
  36. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/es_utils.py +0 -0
  37. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/exceptions.py +0 -0
  38. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/ff_mocks.py +0 -0
  39. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/ff_utils.py +0 -0
  40. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/file_utils.py +0 -0
  41. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/function_cache_decorator.py +0 -0
  42. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/glacier_utils.py +0 -0
  43. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/http_utils.py +0 -0
  44. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/jh_utils.py +0 -0
  45. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/kibana/dashboards.json +0 -0
  46. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/kibana/readme.md +0 -0
  47. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/lang_utils.py +0 -0
  48. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/license_policies/c4-infrastructure.jsonc +0 -0
  49. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/license_policies/c4-python-infrastructure.jsonc +0 -0
  50. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/license_policies/park-lab-common-server.jsonc +0 -0
  51. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/license_policies/park-lab-common.jsonc +0 -0
  52. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc +0 -0
  53. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/license_policies/park-lab-pipeline.jsonc +0 -0
  54. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/license_utils.py +0 -0
  55. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/log_utils.py +0 -0
  56. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/misc_utils.py +0 -0
  57. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/obfuscation_utils.py +0 -0
  58. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/opensearch_utils.py +0 -0
  59. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/progress_bar.py +0 -0
  60. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/project_utils.py +0 -0
  61. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/qa_checkers.py +0 -0
  62. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/qa_utils.py +0 -0
  63. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/redis_tools.py +0 -0
  64. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/redis_utils.py +0 -0
  65. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/s3_utils.py +0 -0
  66. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/schema_utils.py +0 -0
  67. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/scripts/publish_to_pypi.py +0 -0
  68. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/scripts/run_license_checker.py +0 -0
  69. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/scripts/view_portal_object.py +0 -0
  70. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/secrets_utils.py +0 -0
  71. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/sheet_utils.py +0 -0
  72. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/snapshot_utils.py +0 -0
  73. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/ssl_certificate_utils.py +0 -0
  74. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/submitr/progress_constants.py +0 -0
  75. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/task_utils.py +0 -0
  76. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/tmpfile_utils.py +0 -0
  77. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/trace_utils.py +0 -0
  78. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/validation_utils.py +0 -0
  79. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/variant_utils.py +0 -0
  80. {dcicutils-8.8.6.1b2 → dcicutils-8.8.6.1b5}/dcicutils/zip_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.8.6.1b2
3
+ Version: 8.8.6.1b5
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -1,6 +1,5 @@
1
1
  from copy import deepcopy
2
2
  from functools import lru_cache
3
- import re
4
3
  from typing import Any, Callable, List, Optional, Tuple, Type, Union
5
4
  from dcicutils.data_readers import RowReader
6
5
  from dcicutils.misc_utils import create_readonly_object
@@ -14,11 +13,9 @@ class PortalObject:
14
13
 
15
14
  _PROPERTY_DELETION_SENTINEL = RowReader.CELL_DELETION_SENTINEL
16
15
 
17
- def __init__(self, data: dict, portal: Portal = None,
18
- schema: Optional[Union[dict, Schema]] = None, type: Optional[str] = None) -> None:
16
+ def __init__(self, data: dict, portal: Optional[Portal] = None, type: Optional[str] = None) -> None:
19
17
  self._data = data if isinstance(data, dict) else {}
20
18
  self._portal = portal if isinstance(portal, Portal) else None
21
- self._schema = schema if isinstance(schema, dict) else (schema.data if isinstance(schema, Schema) else None)
22
19
  self._type = type if isinstance(type, str) else ""
23
20
 
24
21
  @property
@@ -32,7 +29,7 @@ class PortalObject:
32
29
  @property
33
30
  @lru_cache(maxsize=1)
34
31
  def type(self) -> str:
35
- return self._type or Portal.get_schema_type(self._data) or (Schema(self._schema).type if self._schema else "")
32
+ return self._type or Portal.get_schema_type(self._data) or ""
36
33
 
37
34
  @property
38
35
  @lru_cache(maxsize=1)
@@ -47,7 +44,7 @@ class PortalObject:
47
44
  @property
48
45
  @lru_cache(maxsize=1)
49
46
  def schema(self) -> Optional[dict]:
50
- return self._schema if self._schema else (self._portal.get_schema(self.type) if self._portal else None)
47
+ return self._portal.get_schema(self.type) if self._portal else None
51
48
 
52
49
  def copy(self) -> PortalObject:
53
50
  return PortalObject(deepcopy(self.data), portal=self.portal, type=self.type)
@@ -59,39 +56,29 @@ class PortalObject:
59
56
  Returns the list of all identifying property names of this Portal object which actually have values.
60
57
  Implicitly include "uuid" and "identifier" properties as identifying properties if they are actually
61
58
  properties in the object schema, and favor these (first); defavor "aliases"; no other ordering defined.
59
+ Changed (2024-05-26) to use portal_utils.get_identifying_property_names; migrating some intricate stuff there.
62
60
  """
63
- if not (schema := self.schema) or not (schema_identifying_properties := schema.get("identifyingProperties")):
64
- return None
65
- identifying_properties = []
66
- for identifying_property in schema_identifying_properties:
67
- if identifying_property not in ["uuid", "identifier", "aliases"]:
68
- if self._data.get(identifying_property):
69
- identifying_properties.append(identifying_property)
70
- if self._data.get("identifier"):
71
- identifying_properties.insert(0, "identifier")
72
- if self._data.get("uuid"):
73
- identifying_properties.insert(0, "uuid")
74
- if "aliases" in schema_identifying_properties and self._data.get("aliases"):
75
- identifying_properties.append("aliases")
76
- return identifying_properties or None
61
+ # Migrating to and unifying this in portal_utils.Portal.get_identifying_paths (2024-05-26).
62
+ return self._portal.get_identifying_property_names(self.type, portal_object=self._data) if self._portal else []
77
63
 
78
64
  @lru_cache(maxsize=8192)
79
65
  def lookup(self, raw: bool = False,
80
66
  ref_lookup_strategy: Optional[Callable] = None) -> Tuple[Optional[PortalObject], Optional[str], int]:
67
+ if not (identifying_paths := self._get_identifying_paths(ref_lookup_strategy=ref_lookup_strategy)):
68
+ return None, None, 0
81
69
  nlookups = 0
82
70
  first_identifying_path = None
83
71
  try:
84
- if identifying_paths := self._get_identifying_paths(ref_lookup_strategy=ref_lookup_strategy):
85
- for identifying_path in identifying_paths:
86
- if not first_identifying_path:
87
- first_identifying_path = identifying_path
88
- nlookups += 1
89
- if (value := self._portal.get(identifying_path, raw=raw)) and (value.status_code == 200):
90
- return (
91
- PortalObject(value.json(), portal=self._portal, type=self.type if raw else None),
92
- identifying_path,
93
- nlookups
94
- )
72
+ for identifying_path in identifying_paths:
73
+ if not first_identifying_path:
74
+ first_identifying_path = identifying_path
75
+ nlookups += 1
76
+ if self._portal and (item := self._portal.get(identifying_path, raw=raw)) and (item.status_code == 200):
77
+ return (
78
+ PortalObject(item.json(), portal=self._portal, type=self.type if raw else None),
79
+ identifying_path,
80
+ nlookups
81
+ )
95
82
  except Exception:
96
83
  pass
97
84
  return None, first_identifying_path, nlookups
@@ -158,65 +145,16 @@ class PortalObject:
158
145
  return diffs
159
146
 
160
147
  @lru_cache(maxsize=1)
161
- def _get_identifying_paths(self, ref_lookup_strategy: Optional[Callable] = None) -> Optional[List[str]]:
162
- """
163
- Returns a list of the possible Portal URL paths identifying this Portal object.
164
- """
165
- identifying_paths = []
166
- if not (identifying_properties := self.identifying_properties):
167
- if self.uuid:
168
- if self.type:
169
- identifying_paths.append(f"/{self.type}/{self.uuid}")
170
- identifying_paths.append(f"/{self.uuid}")
171
- return identifying_paths
172
- for identifying_property in identifying_properties:
173
- if identifying_value := self._data.get(identifying_property):
174
- if identifying_property == "uuid":
175
- if self.type:
176
- identifying_paths.append(f"/{self.type}/{identifying_value}")
177
- identifying_paths.append(f"/{identifying_value}")
178
- # For now at least we include the path both with and without the schema type component,
179
- # as for some identifying values, it works (only) with, and some, it works (only) without.
180
- # For example: If we have FileSet with "accession", an identifying property, with value
181
- # SMAFSFXF1RO4 then /SMAFSFXF1RO4 works but /FileSet/SMAFSFXF1RO4 does not; and
182
- # conversely using "submitted_id", also an identifying property, with value
183
- # UW_FILE-SET_COLO-829BL_HI-C_1 then /UW_FILE-SET_COLO-829BL_HI-C_1 does
184
- # not work but /FileSet/UW_FILE-SET_COLO-829BL_HI-C_1 does work.
185
- elif isinstance(identifying_value, list):
186
- for identifying_value_item in identifying_value:
187
- if self.type:
188
- identifying_paths.append(f"/{self.type}/{identifying_value_item}")
189
- identifying_paths.append(f"/{identifying_value_item}")
190
- else:
191
- # TODO: Import from somewhere ...
192
- lookup_options = 0
193
- if schema := self.schema:
194
- # TODO: Hook into the ref_lookup_strategy thing in structured_data to make
195
- # sure we check accession format (since it does not have a pattern).
196
- if callable(ref_lookup_strategy):
197
- lookup_options, ref_validator = ref_lookup_strategy(
198
- self._portal, self.type, schema, identifying_value)
199
- if callable(ref_validator):
200
- if ref_validator(schema, identifying_property, identifying_value) is False:
201
- continue
202
- if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
203
- if not re.match(pattern, identifying_value):
204
- # If this identifying value is for a (identifying) property which has a
205
- # pattern, and the value does NOT match the pattern, then do NOT include
206
- # this value as an identifying path, since it cannot possibly be found.
207
- continue
208
- if not lookup_options:
209
- lookup_options = Portal.LOOKUP_DEFAULT
210
- if Portal.is_lookup_root_first(lookup_options):
211
- identifying_paths.append(f"/{identifying_value}")
212
- if Portal.is_lookup_specified_type(lookup_options) and self.type:
213
- identifying_paths.append(f"/{self.type}/{identifying_value}")
214
- if Portal.is_lookup_root(lookup_options) and not Portal.is_lookup_root_first(lookup_options):
215
- identifying_paths.append(f"/{identifying_value}")
216
- if Portal.is_lookup_subtypes(lookup_options):
217
- for subtype_name in self._portal.get_schema_subtype_names(self.type):
218
- identifying_paths.append(f"/{subtype_name}/{identifying_value}")
219
- return identifying_paths or None
148
+ def _get_identifying_paths(self, all: bool = True,
149
+ ref_lookup_strategy: Optional[Callable] = None) -> Optional[List[str]]:
150
+ if not self._portal and (uuid := self.uuid):
151
+ if all is True and (type := self.type):
152
+ return [f"/{type}/{uuid}", f"/{uuid}"]
153
+ return [f"/{uuid}"]
154
+ # Migrating to and unifying this in portal_utils.Portal.get_identifying_paths (2024-05-26).
155
+ return self._portal.get_identifying_paths(self._data,
156
+ portal_type=self.schema, all=all,
157
+ lookup_strategy=ref_lookup_strategy) if self._portal else None
220
158
 
221
159
  def _normalized_refs(self, refs: List[dict]) -> Tuple[PortalObject, int]:
222
160
  """
@@ -50,15 +50,16 @@ class Portal:
50
50
  FILE_TYPE_SCHEMA_NAME = "File"
51
51
 
52
52
  # Object lookup strategies; on a per-reference (type/value) basis, used currently ONLY by
53
- # structured_data.py; controlled by an optional ref_lookup_strategy callable; default is
53
+ # structured_data.py; controlled by an optional lookup_strategy callable; default is
54
54
  # lookup at root path but after the specified type path lookup, and then lookup all subtypes;
55
55
  # can choose to lookup root path first, or not lookup root path at all, or not lookup
56
- # subtypes at all; the ref_lookup_strategy callable if specified should take a type_name
56
+ # subtypes at all; the lookup_strategy callable if specified should take a type_name
57
57
  # and value (string) arguements and return an integer of any of the below ORed together.
58
58
  # The main purpose of this is optimization; to minimize portal lookups; since for example,
59
59
  # currently at least, /{type}/{accession} does not work but /{accession} does; so we
60
60
  # currently (smaht-portal/.../ingestion_processors) use LOOKUP_ROOT_FIRST for this.
61
61
  # And current usage NEVER has LOOKUP_SUBTYPES turned OFF; but support just in case.
62
+ LOOKUP_UNDEFINED = 0
62
63
  LOOKUP_SPECIFIED_TYPE = 0x0001
63
64
  LOOKUP_ROOT = 0x0002
64
65
  LOOKUP_ROOT_FIRST = 0x0004 | LOOKUP_ROOT
@@ -207,23 +208,6 @@ class Portal:
207
208
  def vapp(self) -> Optional[TestApp]:
208
209
  return self._vapp
209
210
 
210
- @staticmethod
211
- def is_lookup_specified_type(lookup_options: int) -> bool:
212
- return (lookup_options &
213
- Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
214
-
215
- @staticmethod
216
- def is_lookup_root(lookup_options: int) -> bool:
217
- return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
218
-
219
- @staticmethod
220
- def is_lookup_root_first(lookup_options: int) -> bool:
221
- return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
222
-
223
- @staticmethod
224
- def is_lookup_subtypes(lookup_options: int) -> bool:
225
- return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
226
-
227
211
  def get(self, url: str, follow: bool = True,
228
212
  raw: bool = False, database: bool = False, raise_for_status: bool = False, **kwargs) -> OptionalResponse:
229
213
  url = self.url(url, raw, database)
@@ -307,7 +291,10 @@ class Portal:
307
291
 
308
292
  @lru_cache(maxsize=100)
309
293
  def get_schema(self, schema_name: str) -> Optional[dict]:
310
- return get_schema(self.schema_name(schema_name), portal_vapp=self.vapp, key=self.key)
294
+ try:
295
+ return get_schema(self.schema_name(schema_name), portal_vapp=self.vapp, key=self.key)
296
+ except Exception:
297
+ return None
311
298
 
312
299
  @lru_cache(maxsize=1)
313
300
  def get_schemas(self) -> dict:
@@ -419,53 +406,204 @@ class Portal:
419
406
  return schemas_super_type_map.get(type_name, [])
420
407
 
421
408
  @function_cache(maxsize=100, serialize_key=True)
422
- def get_identifying_paths(self, portal_object: dict, portal_type: Optional[str] = None) -> List[str]:
409
+ def get_identifying_paths(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None,
410
+ all: bool = True, lookup_strategy: Optional[Union[Callable, bool]] = None) -> List[str]:
423
411
  """
424
- Returns the list of the identifying Portal (URL) paths for the given Portal object. Favors any
425
- uuid based path and defavors aliases based paths (ala self.get_identifying_property_names);
426
- no other ordering defined. Returns empty list of none or otherwise not found.
412
+ Returns the list of the identifying Portal (URL) paths for the given Portal object. Favors any uuid
413
+ and identifier based paths and defavors aliases based paths (ala self.get_identifying_property_names);
414
+ no other ordering defined. Returns an empty list if no identifying properties or otherwise not found.
415
+ Note that this is a newer version of what was in portal_object_utils and just uses the ref_lookup_stratey
416
+ module directly, as it no longer needs to be exposed (to smaht-portal/ingester and smaht-submitr) and so
417
+ this is a first step toward internalizing it to structured_data/portal_utils/portal_object_utils usages.
427
418
  """
419
+ def is_lookup_specified_type(lookup_options: int) -> bool:
420
+ return (lookup_options & Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
421
+ def is_lookup_root(lookup_options: int) -> bool: # noqa
422
+ return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
423
+ def is_lookup_root_first(lookup_options: int) -> bool: # noqa
424
+ return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
425
+ def is_lookup_subtypes(lookup_options: int) -> bool: # noqa
426
+ return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
427
+
428
428
  results = []
429
429
  if not isinstance(portal_object, dict):
430
430
  return results
431
- if not isinstance(portal_type, str) or not portal_type:
432
- if not (portal_type := self.get_schema_type(portal_object)):
433
- return results
431
+ if not (isinstance(portal_type, str) and portal_type):
432
+ if isinstance(portal_type, dict):
433
+ # It appears that the given portal_type is an actual schema dictionary.
434
+ portal_type = self.schema_name(portal_type.get("title"))
435
+ if not (isinstance(portal_type, str) and portal_type):
436
+ if not (portal_type := self.get_schema_type(portal_object)):
437
+ return results
438
+ if not callable(lookup_strategy):
439
+ lookup_strategy = None if lookup_strategy is False else Portal._lookup_strategy
434
440
  for identifying_property in self.get_identifying_property_names(portal_type):
435
- if identifying_value := portal_object.get(identifying_property):
436
- if isinstance(identifying_value, list):
437
- for identifying_value_item in identifying_value:
441
+ if not (identifying_value := portal_object.get(identifying_property)):
442
+ continue
443
+ # The get_identifying_property_names call above ensures uuid is first if it is in the object.
444
+ # And also note that ALL schemas do in fact have identifyingProperties which do in fact have
445
+ # uuid, except for a couple "Test" ones, and (for some reason) SubmittedItem; otherwise we
446
+ # might have a special case to check the Portal object explicitly for uuid, but no need.
447
+ if identifying_property == "uuid":
448
+ #
449
+ # Note this idiosyncrasy with Portal paths: the only way we do NOT get a (HTTP 301) redirect
450
+ # is if we use the lower-case-dashed-plural based version of the path, e.g. all of these:
451
+ #
452
+ # - /d13d06c1-218e-4f61-aaf0-91f226248b3c
453
+ # - /d13d06c1-218e-4f61-aaf0-91f226248b3c/
454
+ # - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c
455
+ # - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c/
456
+ # - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c
457
+ #
458
+ # Will result in a (HTTP 301) redirect to:
459
+ #
460
+ # - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c/
461
+ #
462
+ # Unfortunately, this code here has no reasonable way of getting that lower-case-dashed-plural
463
+ # based name (e.g. file-formats) from the schema/portal type name (e.g. FileFormat); as the
464
+ # information is contained, for this example, in the snovault.collection decorator for the
465
+ # endpoint definition in smaht-portal/.../types/file_format.py. Unfortunately merely because
466
+ # behind-the-scenes an extra round-trip HTTP request will occur, but happens automatically.
467
+ # And note the disction of just using /{uuid} here rather than /{type}/{uuid} as in the else
468
+ # statement below is not really necessary; just here for emphasis that this is all that's needed.
469
+ #
470
+ if all is True:
471
+ results.append(f"/{portal_type}/{identifying_value}")
472
+ results.append(f"/{identifying_value}")
473
+ elif isinstance(identifying_value, list):
474
+ for identifying_value_item in identifying_value:
475
+ if identifying_value_item:
438
476
  results.append(f"/{portal_type}/{identifying_value_item}")
439
- elif identifying_property == "uuid":
477
+ if all is True:
478
+ results.append(f"/{identifying_value_item}")
479
+ else:
480
+ lookup_options = Portal.LOOKUP_UNDEFINED
481
+ if schema := self.get_schema(portal_type):
482
+ if callable(lookup_strategy):
483
+ lookup_options, validator = lookup_strategy(self._portal, self.type, schema, identifying_value)
484
+ if callable(validator):
485
+ if validator(schema, identifying_property, identifying_value) is False:
486
+ continue
487
+ if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
488
+ if not re.match(pattern, identifying_value):
489
+ # If this identifying value is for a (identifying) property which has a
490
+ # pattern, and the value does NOT match the pattern, then do NOT include
491
+ # this value as an identifying path, since it cannot possibly be found.
492
+ continue
493
+ if lookup_options == Portal.LOOKUP_UNDEFINED:
494
+ lookup_options = Portal.LOOKUP_DEFAULT
495
+ if is_lookup_root_first(lookup_options):
440
496
  results.append(f"/{identifying_value}")
441
- else:
442
- results.append(f"/{portal_type}/{identifying_value}")
497
+ if is_lookup_specified_type(lookup_options) and self.type:
498
+ results.append(f"/{self.type}/{identifying_value}")
499
+ if is_lookup_root(lookup_options) and not is_lookup_root_first(lookup_options):
500
+ results.append(f"/{identifying_value}")
501
+ if is_lookup_subtypes(lookup_options):
502
+ for subtype_name in self._portal.get_schema_subtype_names(self.type):
503
+ results.append(f"/{subtype_name}/{identifying_value}")
443
504
  return results
444
505
 
445
506
  @function_cache(maxsize=100, serialize_key=True)
446
- def get_identifying_property_names(self, schema: Union[str, dict]) -> List[str]:
507
+ def get_identifying_property_names(self, schema: Union[str, dict],
508
+ portal_object: Optional[dict] = None) -> List[str]:
447
509
  """
448
- Returns the list of identifying property names for the given Portal schema, which may
449
- be either a schema name or a schema object; empty list of none or otherwise not found.
510
+ Returns the list of identifying property names for the given Portal schema, which may be
511
+ either a schema name or a schema object. If a Portal object is also given then restricts this
512
+ set of identifying properties to those which actually have values within this Portal object.
513
+ Favors the uuid and identifier property names and defavors the aliases property name; no other
514
+ ordering imposed. Returns empty list if no identifying properties or otherwise not found.
450
515
  """
451
516
  results = []
452
517
  if isinstance(schema, str):
453
- try:
454
- if not (schema := self.get_schema(schema)):
455
- return results
456
- except Exception:
518
+ if not (schema := self.get_schema(schema)):
457
519
  return results
458
520
  elif not isinstance(schema, dict):
459
521
  return results
460
522
  if not (identifying_properties := get_identifying_properties(schema)):
461
523
  return results
462
- identifying_properties = [*identifying_properties]
463
- for favored_identifying_property in reversed(["uuid", "identifier"]):
524
+ identifying_properties = list(set(identifying_properties)) # paranoid dedup
525
+ identifying_properties = [*identifying_properties] # copy so as not to change schema if given
526
+ favored_identifying_properties = ["uuid", "identifier"]
527
+ defavored_identifying_properties = ["aliases"]
528
+ for favored_identifying_property in reversed(favored_identifying_properties):
464
529
  if favored_identifying_property in identifying_properties:
465
530
  identifying_properties.remove(favored_identifying_property)
466
531
  identifying_properties.insert(0, favored_identifying_property)
532
+ for defavored_identifying_property in defavored_identifying_properties:
533
+ if defavored_identifying_property in identifying_properties:
534
+ identifying_properties.remove(defavored_identifying_property)
535
+ identifying_properties.append(defavored_identifying_property)
536
+ if isinstance(portal_object, dict):
537
+ for identifying_property in [*identifying_properties]:
538
+ if portal_object.get(identifying_property) is None:
539
+ identifying_properties.remove(identifying_property)
467
540
  return identifying_properties
468
541
 
542
+ @staticmethod
543
+ def _lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
544
+ #
545
+ # Note this slight odd situation WRT object lookups by submitted_id and accession:
546
+ # -----------------------------+-----------------------------------------------+---------------+
547
+ # PATH | EXAMPLE | LOOKUP RESULT |
548
+ # -----------------------------+-----------------------------------------------+---------------+
549
+ # /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
550
+ # /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
551
+ # /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
552
+ # /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
553
+ # -----------------------------+-----------------------------------------------+---------------+
554
+ # /accession | /SMAFSFXF1RO4 | FOUND |
555
+ # /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
556
+ # /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
557
+ # /File/accession | /File/SMAFSFXF1RO4 | FOUND |
558
+ # -----------------------------+-----------------------------------------------+---------------+
559
+ #
560
+ def ref_validator(schema: Optional[dict],
561
+ property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
562
+ """
563
+ Returns False iff objects of type represented by the given schema, CANNOT be referenced with
564
+ a Portal path using the given property name and its given property value, otherwise returns None.
565
+
566
+ For example, if the schema is for UnalignedReads and the property name is accession, then we will
567
+ return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
568
+ will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
569
+ will continue executing its default behavior, which is to check other ways in which the given type
570
+ CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
571
+ and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
572
+
573
+ The goal (in structured_data) being to detect if a type is being referenced in such a way that
574
+ CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
575
+ if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
576
+ identifying property for the given type.
577
+ """
578
+ if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
579
+ if (property_format == "accession") and (property_name == "accession"):
580
+ if not Portal._is_accession_id(property_value):
581
+ return False
582
+ return None
583
+
584
+ DEFAULT_RESULT = (Portal.LOOKUP_DEFAULT, ref_validator)
585
+ if not value:
586
+ return DEFAULT_RESULT
587
+ if not schema:
588
+ if not isinstance(portal, Portal) or not (schema := portal.get_schema(type_name)):
589
+ return DEFAULT_RESULT
590
+ if schema_properties := schema.get("properties"):
591
+ if schema_properties.get("accession") and Portal._is_accession_id(value):
592
+ # Case: lookup by accession (only by root).
593
+ return (Portal.LOOKUP_ROOT, ref_validator)
594
+ elif schema_property_info_submitted_id := schema_properties.get("submitted_id"):
595
+ if schema_property_pattern_submitted_id := schema_property_info_submitted_id.get("pattern"):
596
+ if re.match(schema_property_pattern_submitted_id, value):
597
+ # Case: lookup by submitted_id (only by specified type).
598
+ return (Portal.LOOKUP_SPECIFIED_TYPE, ref_validator)
599
+ return DEFAULT_RESULT
600
+
601
+ @staticmethod
602
+ def _is_accession_id(value: str) -> bool:
603
+ # This is here for now because of problems with circular dependencies.
604
+ # See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
605
+ return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
606
+
469
607
  def url(self, url: str, raw: bool = False, database: bool = False) -> str:
470
608
  if not isinstance(url, str) or not url:
471
609
  return "/"
@@ -445,7 +445,7 @@ class StructuredDataSet:
445
445
  just returns the given object. Note that the given object may be CHANGED in place.
446
446
  """
447
447
  for identifying_path in self._portal.get_identifying_paths(portal_object, portal_type):
448
- if existing_portal_object := self._portal.get_metadata(identifying_path, raw=True):
448
+ if existing_portal_object := self._portal.get_metadata(identifying_path, raw=True, raise_exception=False):
449
449
  return merge_objects(existing_portal_object, portal_object)
450
450
  return portal_object
451
451
 
@@ -0,0 +1,73 @@
1
+ import re
2
+ from typing import Optional
3
+ from dcicutils.structured_data import Portal
4
+
5
+ # This function is exposed (to smaht-portal/ingester and smaht-submitr) only because previously,
6
+ # before it was fully developed, we had differing behaviors; but this has been unified; so this
7
+ # could now be internalized to structured_data, and portal_object_utils (TODO).
8
+
9
+
10
+ def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
11
+ #
12
+ # Note this slight odd situation WRT object lookups by submitted_id and accession:
13
+ # -----------------------------+-----------------------------------------------+---------------+
14
+ # PATH | EXAMPLE | LOOKUP RESULT |
15
+ # -----------------------------+-----------------------------------------------+---------------+
16
+ # /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
17
+ # /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
18
+ # /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
19
+ # /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
20
+ # -----------------------------+-----------------------------------------------+---------------+
21
+ # /accession | /SMAFSFXF1RO4 | FOUND |
22
+ # /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
23
+ # /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
24
+ # /File/accession | /File/SMAFSFXF1RO4 | FOUND |
25
+ # -----------------------------+-----------------------------------------------+---------------+
26
+ #
27
+ def ref_validator(schema: Optional[dict],
28
+ property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
29
+ """
30
+ Returns False iff objects of type represented by the given schema, CANNOT be referenced with
31
+ a Portal path using the given property name and its given property value, otherwise returns None.
32
+
33
+ For example, if the schema is for UnalignedReads and the property name is accession, then we will
34
+ return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
35
+ will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
36
+ will continue executing its default behavior, which is to check other ways in which the given type
37
+ CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
38
+ and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
39
+
40
+ The goal (in structured_data) being to detect if a type is being referenced in such a way that
41
+ CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
42
+ if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
43
+ identifying property for the given type.
44
+ """
45
+ if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
46
+ if (property_format == "accession") and (property_name == "accession"):
47
+ if not _is_accession_id(property_value):
48
+ return False
49
+ return None
50
+
51
+ DEFAULT_RESPONSE = (Portal.LOOKUP_DEFAULT, ref_validator)
52
+
53
+ if not value:
54
+ return DEFAULT_RESPONSE
55
+ if not schema:
56
+ if not isinstance(portal, Portal) or not (schema := portal.get_schema(type_name)):
57
+ return DEFAULT_RESPONSE
58
+ if schema_properties := schema.get("properties"):
59
+ if schema_properties.get("accession") and _is_accession_id(value):
60
+ # Case: lookup by accession (only by root).
61
+ return Portal.LOOKUP_ROOT, ref_validator
62
+ elif schema_property_info_submitted_id := schema_properties.get("submitted_id"):
63
+ if schema_property_pattern_submitted_id := schema_property_info_submitted_id.get("pattern"):
64
+ if re.match(schema_property_pattern_submitted_id, value):
65
+ # Case: lookup by submitted_id (only by specified type).
66
+ return Portal.LOOKUP_SPECIFIED_TYPE, ref_validator
67
+ return DEFAULT_RESPONSE
68
+
69
+
70
+ # This is here for now because of problems with circular dependencies.
71
+ # See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
72
+ def _is_accession_id(value: str) -> bool:
73
+ return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dcicutils"
3
- version = "8.8.6.1b2"
3
+ version = "8.8.6.1b5"
4
4
  description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
5
5
  authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
6
6
  license = "MIT"
@@ -1,67 +0,0 @@
1
- import re
2
- from typing import Optional
3
- from dcicutils.structured_data import Portal
4
-
5
-
6
- def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
7
- #
8
- # FYI: Note this situation WRT object lookups ...
9
- #
10
- # /{submitted_id} # NOT FOUND
11
- # /UnalignedReads/{submitted_id} # OK
12
- # /SubmittedFile/{submitted_id} # OK
13
- # /File/{submitted_id} # NOT FOUND
14
- #
15
- # /{accession} # OK
16
- # /UnalignedReads/{accession} # NOT FOUND
17
- # /SubmittedFile/{accession} # NOT FOUND
18
- # /File/{accession} # OK
19
- #
20
- def ref_validator(schema: Optional[dict],
21
- property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
22
- """
23
- Returns False iff the type represented by the given schema, can NOT be referenced by
24
- the given property name with the given property value, otherwise returns None.
25
-
26
- For example, if the schema is for the UnalignedReads type and the property name
27
- is accession, then we will return False iff the given property value is NOT a properly
28
- formatted accession ID. Otherwise, we will return None, which indicates that the
29
- caller (in dcicutils.structured_data.Portal.ref_exists) will continue executing
30
- its default behavior, which is to check other ways in which the given type can NOT
31
- be referenced by the given value, i.e. it checks other identifying properties for
32
- the type and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
33
-
34
- The goal (in structured_data) being to detect if a type is being referenced in such
35
- a way that cannot possibly be allowed, i.e. because none of its identifying types
36
- are in the required form (if indeed there any requirements). Note that it is guaranteed
37
- that the given property name is indeed an identifying property for the given type.
38
- """
39
- if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
40
- if (property_format == "accession") and (property_name == "accession"):
41
- if not _is_accession_id(property_value):
42
- return False
43
- return None
44
-
45
- DEFAULT_RESPONSE = (Portal.LOOKUP_DEFAULT, ref_validator)
46
-
47
- if not value:
48
- return DEFAULT_RESPONSE
49
- if not schema:
50
- if not isinstance(portal, Portal) or not (schema := portal.get_schema(type_name)):
51
- return DEFAULT_RESPONSE
52
- if schema_properties := schema.get("properties"):
53
- if schema_properties.get("accession") and _is_accession_id(value):
54
- # Case: lookup by accession (only by root).
55
- return Portal.LOOKUP_ROOT, ref_validator
56
- elif schema_property_info_submitted_id := schema_properties.get("submitted_id"):
57
- if schema_property_pattern_submitted_id := schema_property_info_submitted_id.get("pattern"):
58
- if re.match(schema_property_pattern_submitted_id, value):
59
- # Case: lookup by submitted_id (only by specified type).
60
- return Portal.LOOKUP_SPECIFIED_TYPE, ref_validator
61
- return DEFAULT_RESPONSE
62
-
63
-
64
- # This is here for now because of problems with circular dependencies.
65
- # See: smaht-portal/.../schema_formats.py
66
- def _is_accession_id(value: str) -> bool:
67
- return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
File without changes
File without changes