dcicutils 8.8.6.1b3__py3-none-any.whl → 8.8.6.1b5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dcicutils/portal_object_utils.py +28 -90
- dcicutils/portal_utils.py +180 -42
- dcicutils/submitr/ref_lookup_strategy.py +31 -25
- {dcicutils-8.8.6.1b3.dist-info → dcicutils-8.8.6.1b5.dist-info}/METADATA +1 -1
- {dcicutils-8.8.6.1b3.dist-info → dcicutils-8.8.6.1b5.dist-info}/RECORD +8 -8
- {dcicutils-8.8.6.1b3.dist-info → dcicutils-8.8.6.1b5.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.8.6.1b3.dist-info → dcicutils-8.8.6.1b5.dist-info}/WHEEL +0 -0
- {dcicutils-8.8.6.1b3.dist-info → dcicutils-8.8.6.1b5.dist-info}/entry_points.txt +0 -0
dcicutils/portal_object_utils.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
from copy import deepcopy
|
2
2
|
from functools import lru_cache
|
3
|
-
import re
|
4
3
|
from typing import Any, Callable, List, Optional, Tuple, Type, Union
|
5
4
|
from dcicutils.data_readers import RowReader
|
6
5
|
from dcicutils.misc_utils import create_readonly_object
|
@@ -14,11 +13,9 @@ class PortalObject:
|
|
14
13
|
|
15
14
|
_PROPERTY_DELETION_SENTINEL = RowReader.CELL_DELETION_SENTINEL
|
16
15
|
|
17
|
-
def __init__(self, data: dict, portal: Portal = None,
|
18
|
-
schema: Optional[Union[dict, Schema]] = None, type: Optional[str] = None) -> None:
|
16
|
+
def __init__(self, data: dict, portal: Optional[Portal] = None, type: Optional[str] = None) -> None:
|
19
17
|
self._data = data if isinstance(data, dict) else {}
|
20
18
|
self._portal = portal if isinstance(portal, Portal) else None
|
21
|
-
self._schema = schema if isinstance(schema, dict) else (schema.data if isinstance(schema, Schema) else None)
|
22
19
|
self._type = type if isinstance(type, str) else ""
|
23
20
|
|
24
21
|
@property
|
@@ -32,7 +29,7 @@ class PortalObject:
|
|
32
29
|
@property
|
33
30
|
@lru_cache(maxsize=1)
|
34
31
|
def type(self) -> str:
|
35
|
-
return self._type or Portal.get_schema_type(self._data) or
|
32
|
+
return self._type or Portal.get_schema_type(self._data) or ""
|
36
33
|
|
37
34
|
@property
|
38
35
|
@lru_cache(maxsize=1)
|
@@ -47,7 +44,7 @@ class PortalObject:
|
|
47
44
|
@property
|
48
45
|
@lru_cache(maxsize=1)
|
49
46
|
def schema(self) -> Optional[dict]:
|
50
|
-
return self.
|
47
|
+
return self._portal.get_schema(self.type) if self._portal else None
|
51
48
|
|
52
49
|
def copy(self) -> PortalObject:
|
53
50
|
return PortalObject(deepcopy(self.data), portal=self.portal, type=self.type)
|
@@ -59,39 +56,29 @@ class PortalObject:
|
|
59
56
|
Returns the list of all identifying property names of this Portal object which actually have values.
|
60
57
|
Implicitly include "uuid" and "identifier" properties as identifying properties if they are actually
|
61
58
|
properties in the object schema, and favor these (first); defavor "aliases"; no other ordering defined.
|
59
|
+
Changed (2024-05-26) to use portal_utils.get_identifying_property_names; migrating some intricate stuff there.
|
62
60
|
"""
|
63
|
-
|
64
|
-
|
65
|
-
identifying_properties = []
|
66
|
-
for identifying_property in schema_identifying_properties:
|
67
|
-
if identifying_property not in ["uuid", "identifier", "aliases"]:
|
68
|
-
if self._data.get(identifying_property):
|
69
|
-
identifying_properties.append(identifying_property)
|
70
|
-
if self._data.get("identifier"):
|
71
|
-
identifying_properties.insert(0, "identifier")
|
72
|
-
if self._data.get("uuid"):
|
73
|
-
identifying_properties.insert(0, "uuid")
|
74
|
-
if "aliases" in schema_identifying_properties and self._data.get("aliases"):
|
75
|
-
identifying_properties.append("aliases")
|
76
|
-
return identifying_properties or None
|
61
|
+
# Migrating to and unifying this in portal_utils.Portal.get_identifying_paths (2024-05-26).
|
62
|
+
return self._portal.get_identifying_property_names(self.type, portal_object=self._data) if self._portal else []
|
77
63
|
|
78
64
|
@lru_cache(maxsize=8192)
|
79
65
|
def lookup(self, raw: bool = False,
|
80
66
|
ref_lookup_strategy: Optional[Callable] = None) -> Tuple[Optional[PortalObject], Optional[str], int]:
|
67
|
+
if not (identifying_paths := self._get_identifying_paths(ref_lookup_strategy=ref_lookup_strategy)):
|
68
|
+
return None, None, 0
|
81
69
|
nlookups = 0
|
82
70
|
first_identifying_path = None
|
83
71
|
try:
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
)
|
72
|
+
for identifying_path in identifying_paths:
|
73
|
+
if not first_identifying_path:
|
74
|
+
first_identifying_path = identifying_path
|
75
|
+
nlookups += 1
|
76
|
+
if self._portal and (item := self._portal.get(identifying_path, raw=raw)) and (item.status_code == 200):
|
77
|
+
return (
|
78
|
+
PortalObject(item.json(), portal=self._portal, type=self.type if raw else None),
|
79
|
+
identifying_path,
|
80
|
+
nlookups
|
81
|
+
)
|
95
82
|
except Exception:
|
96
83
|
pass
|
97
84
|
return None, first_identifying_path, nlookups
|
@@ -158,65 +145,16 @@ class PortalObject:
|
|
158
145
|
return diffs
|
159
146
|
|
160
147
|
@lru_cache(maxsize=1)
|
161
|
-
def _get_identifying_paths(self,
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
return identifying_paths
|
172
|
-
for identifying_property in identifying_properties:
|
173
|
-
if identifying_value := self._data.get(identifying_property):
|
174
|
-
if identifying_property == "uuid":
|
175
|
-
if self.type:
|
176
|
-
identifying_paths.append(f"/{self.type}/{identifying_value}")
|
177
|
-
identifying_paths.append(f"/{identifying_value}")
|
178
|
-
# For now at least we include the path both with and without the schema type component,
|
179
|
-
# as for some identifying values, it works (only) with, and some, it works (only) without.
|
180
|
-
# For example: If we have FileSet with "accession", an identifying property, with value
|
181
|
-
# SMAFSFXF1RO4 then /SMAFSFXF1RO4 works but /FileSet/SMAFSFXF1RO4 does not; and
|
182
|
-
# conversely using "submitted_id", also an identifying property, with value
|
183
|
-
# UW_FILE-SET_COLO-829BL_HI-C_1 then /UW_FILE-SET_COLO-829BL_HI-C_1 does
|
184
|
-
# not work but /FileSet/UW_FILE-SET_COLO-829BL_HI-C_1 does work.
|
185
|
-
elif isinstance(identifying_value, list):
|
186
|
-
for identifying_value_item in identifying_value:
|
187
|
-
if self.type:
|
188
|
-
identifying_paths.append(f"/{self.type}/{identifying_value_item}")
|
189
|
-
identifying_paths.append(f"/{identifying_value_item}")
|
190
|
-
else:
|
191
|
-
# TODO: Import from somewhere ...
|
192
|
-
lookup_options = 0
|
193
|
-
if schema := self.schema:
|
194
|
-
# TODO: Hook into the ref_lookup_strategy thing in structured_data to make
|
195
|
-
# sure we check accession format (since it does not have a pattern).
|
196
|
-
if callable(ref_lookup_strategy):
|
197
|
-
lookup_options, ref_validator = ref_lookup_strategy(
|
198
|
-
self._portal, self.type, schema, identifying_value)
|
199
|
-
if callable(ref_validator):
|
200
|
-
if ref_validator(schema, identifying_property, identifying_value) is False:
|
201
|
-
continue
|
202
|
-
if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
|
203
|
-
if not re.match(pattern, identifying_value):
|
204
|
-
# If this identifying value is for a (identifying) property which has a
|
205
|
-
# pattern, and the value does NOT match the pattern, then do NOT include
|
206
|
-
# this value as an identifying path, since it cannot possibly be found.
|
207
|
-
continue
|
208
|
-
if not lookup_options:
|
209
|
-
lookup_options = Portal.LOOKUP_DEFAULT
|
210
|
-
if Portal.is_lookup_root_first(lookup_options):
|
211
|
-
identifying_paths.append(f"/{identifying_value}")
|
212
|
-
if Portal.is_lookup_specified_type(lookup_options) and self.type:
|
213
|
-
identifying_paths.append(f"/{self.type}/{identifying_value}")
|
214
|
-
if Portal.is_lookup_root(lookup_options) and not Portal.is_lookup_root_first(lookup_options):
|
215
|
-
identifying_paths.append(f"/{identifying_value}")
|
216
|
-
if Portal.is_lookup_subtypes(lookup_options):
|
217
|
-
for subtype_name in self._portal.get_schema_subtype_names(self.type):
|
218
|
-
identifying_paths.append(f"/{subtype_name}/{identifying_value}")
|
219
|
-
return identifying_paths or None
|
148
|
+
def _get_identifying_paths(self, all: bool = True,
|
149
|
+
ref_lookup_strategy: Optional[Callable] = None) -> Optional[List[str]]:
|
150
|
+
if not self._portal and (uuid := self.uuid):
|
151
|
+
if all is True and (type := self.type):
|
152
|
+
return [f"/{type}/{uuid}", f"/{uuid}"]
|
153
|
+
return [f"/{uuid}"]
|
154
|
+
# Migrating to and unifying this in portal_utils.Portal.get_identifying_paths (2024-05-26).
|
155
|
+
return self._portal.get_identifying_paths(self._data,
|
156
|
+
portal_type=self.schema, all=all,
|
157
|
+
lookup_strategy=ref_lookup_strategy) if self._portal else None
|
220
158
|
|
221
159
|
def _normalized_refs(self, refs: List[dict]) -> Tuple[PortalObject, int]:
|
222
160
|
"""
|
dcicutils/portal_utils.py
CHANGED
@@ -50,15 +50,16 @@ class Portal:
|
|
50
50
|
FILE_TYPE_SCHEMA_NAME = "File"
|
51
51
|
|
52
52
|
# Object lookup strategies; on a per-reference (type/value) basis, used currently ONLY by
|
53
|
-
# structured_data.py; controlled by an optional
|
53
|
+
# structured_data.py; controlled by an optional lookup_strategy callable; default is
|
54
54
|
# lookup at root path but after the specified type path lookup, and then lookup all subtypes;
|
55
55
|
# can choose to lookup root path first, or not lookup root path at all, or not lookup
|
56
|
-
# subtypes at all; the
|
56
|
+
# subtypes at all; the lookup_strategy callable if specified should take a type_name
|
57
57
|
# and value (string) arguements and return an integer of any of the below ORed together.
|
58
58
|
# The main purpose of this is optimization; to minimize portal lookups; since for example,
|
59
59
|
# currently at least, /{type}/{accession} does not work but /{accession} does; so we
|
60
60
|
# currently (smaht-portal/.../ingestion_processors) use LOOKUP_ROOT_FIRST for this.
|
61
61
|
# And current usage NEVER has LOOKUP_SUBTYPES turned OFF; but support just in case.
|
62
|
+
LOOKUP_UNDEFINED = 0
|
62
63
|
LOOKUP_SPECIFIED_TYPE = 0x0001
|
63
64
|
LOOKUP_ROOT = 0x0002
|
64
65
|
LOOKUP_ROOT_FIRST = 0x0004 | LOOKUP_ROOT
|
@@ -207,23 +208,6 @@ class Portal:
|
|
207
208
|
def vapp(self) -> Optional[TestApp]:
|
208
209
|
return self._vapp
|
209
210
|
|
210
|
-
@staticmethod
|
211
|
-
def is_lookup_specified_type(lookup_options: int) -> bool:
|
212
|
-
return (lookup_options &
|
213
|
-
Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
|
214
|
-
|
215
|
-
@staticmethod
|
216
|
-
def is_lookup_root(lookup_options: int) -> bool:
|
217
|
-
return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
|
218
|
-
|
219
|
-
@staticmethod
|
220
|
-
def is_lookup_root_first(lookup_options: int) -> bool:
|
221
|
-
return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
|
222
|
-
|
223
|
-
@staticmethod
|
224
|
-
def is_lookup_subtypes(lookup_options: int) -> bool:
|
225
|
-
return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
|
226
|
-
|
227
211
|
def get(self, url: str, follow: bool = True,
|
228
212
|
raw: bool = False, database: bool = False, raise_for_status: bool = False, **kwargs) -> OptionalResponse:
|
229
213
|
url = self.url(url, raw, database)
|
@@ -307,7 +291,10 @@ class Portal:
|
|
307
291
|
|
308
292
|
@lru_cache(maxsize=100)
|
309
293
|
def get_schema(self, schema_name: str) -> Optional[dict]:
|
310
|
-
|
294
|
+
try:
|
295
|
+
return get_schema(self.schema_name(schema_name), portal_vapp=self.vapp, key=self.key)
|
296
|
+
except Exception:
|
297
|
+
return None
|
311
298
|
|
312
299
|
@lru_cache(maxsize=1)
|
313
300
|
def get_schemas(self) -> dict:
|
@@ -419,53 +406,204 @@ class Portal:
|
|
419
406
|
return schemas_super_type_map.get(type_name, [])
|
420
407
|
|
421
408
|
@function_cache(maxsize=100, serialize_key=True)
|
422
|
-
def get_identifying_paths(self, portal_object: dict, portal_type: Optional[str] = None
|
409
|
+
def get_identifying_paths(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None,
|
410
|
+
all: bool = True, lookup_strategy: Optional[Union[Callable, bool]] = None) -> List[str]:
|
423
411
|
"""
|
424
|
-
Returns the list of the identifying Portal (URL) paths for the given Portal object. Favors any
|
425
|
-
|
426
|
-
no other ordering defined. Returns empty list
|
412
|
+
Returns the list of the identifying Portal (URL) paths for the given Portal object. Favors any uuid
|
413
|
+
and identifier based paths and defavors aliases based paths (ala self.get_identifying_property_names);
|
414
|
+
no other ordering defined. Returns an empty list if no identifying properties or otherwise not found.
|
415
|
+
Note that this is a newer version of what was in portal_object_utils and just uses the ref_lookup_stratey
|
416
|
+
module directly, as it no longer needs to be exposed (to smaht-portal/ingester and smaht-submitr) and so
|
417
|
+
this is a first step toward internalizing it to structured_data/portal_utils/portal_object_utils usages.
|
427
418
|
"""
|
419
|
+
def is_lookup_specified_type(lookup_options: int) -> bool:
|
420
|
+
return (lookup_options & Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
|
421
|
+
def is_lookup_root(lookup_options: int) -> bool: # noqa
|
422
|
+
return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
|
423
|
+
def is_lookup_root_first(lookup_options: int) -> bool: # noqa
|
424
|
+
return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
|
425
|
+
def is_lookup_subtypes(lookup_options: int) -> bool: # noqa
|
426
|
+
return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
|
427
|
+
|
428
428
|
results = []
|
429
429
|
if not isinstance(portal_object, dict):
|
430
430
|
return results
|
431
|
-
if not isinstance(portal_type, str)
|
432
|
-
if
|
433
|
-
|
431
|
+
if not (isinstance(portal_type, str) and portal_type):
|
432
|
+
if isinstance(portal_type, dict):
|
433
|
+
# It appears that the given portal_type is an actual schema dictionary.
|
434
|
+
portal_type = self.schema_name(portal_type.get("title"))
|
435
|
+
if not (isinstance(portal_type, str) and portal_type):
|
436
|
+
if not (portal_type := self.get_schema_type(portal_object)):
|
437
|
+
return results
|
438
|
+
if not callable(lookup_strategy):
|
439
|
+
lookup_strategy = None if lookup_strategy is False else Portal._lookup_strategy
|
434
440
|
for identifying_property in self.get_identifying_property_names(portal_type):
|
435
|
-
if identifying_value := portal_object.get(identifying_property):
|
436
|
-
|
437
|
-
|
441
|
+
if not (identifying_value := portal_object.get(identifying_property)):
|
442
|
+
continue
|
443
|
+
# The get_identifying_property_names call above ensures uuid is first if it is in the object.
|
444
|
+
# And also note that ALL schemas do in fact have identifyingProperties which do in fact have
|
445
|
+
# uuid, except for a couple "Test" ones, and (for some reason) SubmittedItem; otherwise we
|
446
|
+
# might have a special case to check the Portal object explicitly for uuid, but no need.
|
447
|
+
if identifying_property == "uuid":
|
448
|
+
#
|
449
|
+
# Note this idiosyncrasy with Portal paths: the only way we do NOT get a (HTTP 301) redirect
|
450
|
+
# is if we use the lower-case-dashed-plural based version of the path, e.g. all of these:
|
451
|
+
#
|
452
|
+
# - /d13d06c1-218e-4f61-aaf0-91f226248b3c
|
453
|
+
# - /d13d06c1-218e-4f61-aaf0-91f226248b3c/
|
454
|
+
# - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c
|
455
|
+
# - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c/
|
456
|
+
# - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c
|
457
|
+
#
|
458
|
+
# Will result in a (HTTP 301) redirect to:
|
459
|
+
#
|
460
|
+
# - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c/
|
461
|
+
#
|
462
|
+
# Unfortunately, this code here has no reasonable way of getting that lower-case-dashed-plural
|
463
|
+
# based name (e.g. file-formats) from the schema/portal type name (e.g. FileFormat); as the
|
464
|
+
# information is contained, for this example, in the snovault.collection decorator for the
|
465
|
+
# endpoint definition in smaht-portal/.../types/file_format.py. Unfortunately merely because
|
466
|
+
# behind-the-scenes an extra round-trip HTTP request will occur, but happens automatically.
|
467
|
+
# And note the disction of just using /{uuid} here rather than /{type}/{uuid} as in the else
|
468
|
+
# statement below is not really necessary; just here for emphasis that this is all that's needed.
|
469
|
+
#
|
470
|
+
if all is True:
|
471
|
+
results.append(f"/{portal_type}/{identifying_value}")
|
472
|
+
results.append(f"/{identifying_value}")
|
473
|
+
elif isinstance(identifying_value, list):
|
474
|
+
for identifying_value_item in identifying_value:
|
475
|
+
if identifying_value_item:
|
438
476
|
results.append(f"/{portal_type}/{identifying_value_item}")
|
439
|
-
|
477
|
+
if all is True:
|
478
|
+
results.append(f"/{identifying_value_item}")
|
479
|
+
else:
|
480
|
+
lookup_options = Portal.LOOKUP_UNDEFINED
|
481
|
+
if schema := self.get_schema(portal_type):
|
482
|
+
if callable(lookup_strategy):
|
483
|
+
lookup_options, validator = lookup_strategy(self._portal, self.type, schema, identifying_value)
|
484
|
+
if callable(validator):
|
485
|
+
if validator(schema, identifying_property, identifying_value) is False:
|
486
|
+
continue
|
487
|
+
if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
|
488
|
+
if not re.match(pattern, identifying_value):
|
489
|
+
# If this identifying value is for a (identifying) property which has a
|
490
|
+
# pattern, and the value does NOT match the pattern, then do NOT include
|
491
|
+
# this value as an identifying path, since it cannot possibly be found.
|
492
|
+
continue
|
493
|
+
if lookup_options == Portal.LOOKUP_UNDEFINED:
|
494
|
+
lookup_options = Portal.LOOKUP_DEFAULT
|
495
|
+
if is_lookup_root_first(lookup_options):
|
440
496
|
results.append(f"/{identifying_value}")
|
441
|
-
|
442
|
-
results.append(f"/{
|
497
|
+
if is_lookup_specified_type(lookup_options) and self.type:
|
498
|
+
results.append(f"/{self.type}/{identifying_value}")
|
499
|
+
if is_lookup_root(lookup_options) and not is_lookup_root_first(lookup_options):
|
500
|
+
results.append(f"/{identifying_value}")
|
501
|
+
if is_lookup_subtypes(lookup_options):
|
502
|
+
for subtype_name in self._portal.get_schema_subtype_names(self.type):
|
503
|
+
results.append(f"/{subtype_name}/{identifying_value}")
|
443
504
|
return results
|
444
505
|
|
445
506
|
@function_cache(maxsize=100, serialize_key=True)
|
446
|
-
def get_identifying_property_names(self, schema: Union[str, dict]
|
507
|
+
def get_identifying_property_names(self, schema: Union[str, dict],
|
508
|
+
portal_object: Optional[dict] = None) -> List[str]:
|
447
509
|
"""
|
448
|
-
Returns the list of identifying property names for the given Portal schema, which may
|
449
|
-
|
510
|
+
Returns the list of identifying property names for the given Portal schema, which may be
|
511
|
+
either a schema name or a schema object. If a Portal object is also given then restricts this
|
512
|
+
set of identifying properties to those which actually have values within this Portal object.
|
513
|
+
Favors the uuid and identifier property names and defavors the aliases property name; no other
|
514
|
+
ordering imposed. Returns empty list if no identifying properties or otherwise not found.
|
450
515
|
"""
|
451
516
|
results = []
|
452
517
|
if isinstance(schema, str):
|
453
|
-
|
454
|
-
if not (schema := self.get_schema(schema)):
|
455
|
-
return results
|
456
|
-
except Exception:
|
518
|
+
if not (schema := self.get_schema(schema)):
|
457
519
|
return results
|
458
520
|
elif not isinstance(schema, dict):
|
459
521
|
return results
|
460
522
|
if not (identifying_properties := get_identifying_properties(schema)):
|
461
523
|
return results
|
462
|
-
identifying_properties =
|
463
|
-
|
524
|
+
identifying_properties = list(set(identifying_properties)) # paranoid dedup
|
525
|
+
identifying_properties = [*identifying_properties] # copy so as not to change schema if given
|
526
|
+
favored_identifying_properties = ["uuid", "identifier"]
|
527
|
+
defavored_identifying_properties = ["aliases"]
|
528
|
+
for favored_identifying_property in reversed(favored_identifying_properties):
|
464
529
|
if favored_identifying_property in identifying_properties:
|
465
530
|
identifying_properties.remove(favored_identifying_property)
|
466
531
|
identifying_properties.insert(0, favored_identifying_property)
|
532
|
+
for defavored_identifying_property in defavored_identifying_properties:
|
533
|
+
if defavored_identifying_property in identifying_properties:
|
534
|
+
identifying_properties.remove(defavored_identifying_property)
|
535
|
+
identifying_properties.append(defavored_identifying_property)
|
536
|
+
if isinstance(portal_object, dict):
|
537
|
+
for identifying_property in [*identifying_properties]:
|
538
|
+
if portal_object.get(identifying_property) is None:
|
539
|
+
identifying_properties.remove(identifying_property)
|
467
540
|
return identifying_properties
|
468
541
|
|
542
|
+
@staticmethod
|
543
|
+
def _lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
|
544
|
+
#
|
545
|
+
# Note this slight odd situation WRT object lookups by submitted_id and accession:
|
546
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
547
|
+
# PATH | EXAMPLE | LOOKUP RESULT |
|
548
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
549
|
+
# /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
550
|
+
# /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
551
|
+
# /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
552
|
+
# /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
553
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
554
|
+
# /accession | /SMAFSFXF1RO4 | FOUND |
|
555
|
+
# /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
|
556
|
+
# /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
|
557
|
+
# /File/accession | /File/SMAFSFXF1RO4 | FOUND |
|
558
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
559
|
+
#
|
560
|
+
def ref_validator(schema: Optional[dict],
|
561
|
+
property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
|
562
|
+
"""
|
563
|
+
Returns False iff objects of type represented by the given schema, CANNOT be referenced with
|
564
|
+
a Portal path using the given property name and its given property value, otherwise returns None.
|
565
|
+
|
566
|
+
For example, if the schema is for UnalignedReads and the property name is accession, then we will
|
567
|
+
return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
|
568
|
+
will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
|
569
|
+
will continue executing its default behavior, which is to check other ways in which the given type
|
570
|
+
CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
|
571
|
+
and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
572
|
+
|
573
|
+
The goal (in structured_data) being to detect if a type is being referenced in such a way that
|
574
|
+
CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
|
575
|
+
if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
|
576
|
+
identifying property for the given type.
|
577
|
+
"""
|
578
|
+
if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
|
579
|
+
if (property_format == "accession") and (property_name == "accession"):
|
580
|
+
if not Portal._is_accession_id(property_value):
|
581
|
+
return False
|
582
|
+
return None
|
583
|
+
|
584
|
+
DEFAULT_RESULT = (Portal.LOOKUP_DEFAULT, ref_validator)
|
585
|
+
if not value:
|
586
|
+
return DEFAULT_RESULT
|
587
|
+
if not schema:
|
588
|
+
if not isinstance(portal, Portal) or not (schema := portal.get_schema(type_name)):
|
589
|
+
return DEFAULT_RESULT
|
590
|
+
if schema_properties := schema.get("properties"):
|
591
|
+
if schema_properties.get("accession") and Portal._is_accession_id(value):
|
592
|
+
# Case: lookup by accession (only by root).
|
593
|
+
return (Portal.LOOKUP_ROOT, ref_validator)
|
594
|
+
elif schema_property_info_submitted_id := schema_properties.get("submitted_id"):
|
595
|
+
if schema_property_pattern_submitted_id := schema_property_info_submitted_id.get("pattern"):
|
596
|
+
if re.match(schema_property_pattern_submitted_id, value):
|
597
|
+
# Case: lookup by submitted_id (only by specified type).
|
598
|
+
return (Portal.LOOKUP_SPECIFIED_TYPE, ref_validator)
|
599
|
+
return DEFAULT_RESULT
|
600
|
+
|
601
|
+
@staticmethod
|
602
|
+
def _is_accession_id(value: str) -> bool:
|
603
|
+
# This is here for now because of problems with circular dependencies.
|
604
|
+
# See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
|
605
|
+
return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
|
606
|
+
|
469
607
|
def url(self, url: str, raw: bool = False, database: bool = False) -> str:
|
470
608
|
if not isinstance(url, str) or not url:
|
471
609
|
return "/"
|
@@ -2,39 +2,45 @@ import re
|
|
2
2
|
from typing import Optional
|
3
3
|
from dcicutils.structured_data import Portal
|
4
4
|
|
5
|
+
# This function is exposed (to smaht-portal/ingester and smaht-submitr) only because previously,
|
6
|
+
# before it was fully developed, we had differing behaviors; but this has been unified; so this
|
7
|
+
# could now be internalized to structured_data, and portal_object_utils (TODO).
|
8
|
+
|
5
9
|
|
6
10
|
def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
|
7
11
|
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
# /
|
13
|
-
# /
|
14
|
-
#
|
15
|
-
# /
|
16
|
-
#
|
17
|
-
# /
|
18
|
-
# /
|
12
|
+
# Note this slight odd situation WRT object lookups by submitted_id and accession:
|
13
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
14
|
+
# PATH | EXAMPLE | LOOKUP RESULT |
|
15
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
16
|
+
# /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
17
|
+
# /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
18
|
+
# /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
19
|
+
# /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
20
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
21
|
+
# /accession | /SMAFSFXF1RO4 | FOUND |
|
22
|
+
# /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
|
23
|
+
# /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
|
24
|
+
# /File/accession | /File/SMAFSFXF1RO4 | FOUND |
|
25
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
19
26
|
#
|
20
27
|
def ref_validator(schema: Optional[dict],
|
21
28
|
property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
|
22
29
|
"""
|
23
|
-
Returns False iff
|
24
|
-
the given property name
|
30
|
+
Returns False iff objects of type represented by the given schema, CANNOT be referenced with
|
31
|
+
a Portal path using the given property name and its given property value, otherwise returns None.
|
25
32
|
|
26
|
-
For example, if the schema is for
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
the type and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
33
|
+
For example, if the schema is for UnalignedReads and the property name is accession, then we will
|
34
|
+
return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
|
35
|
+
will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
|
36
|
+
will continue executing its default behavior, which is to check other ways in which the given type
|
37
|
+
CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
|
38
|
+
and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
33
39
|
|
34
|
-
The goal (in structured_data) being to detect if a type is being referenced in such
|
35
|
-
|
36
|
-
|
37
|
-
|
40
|
+
The goal (in structured_data) being to detect if a type is being referenced in such a way that
|
41
|
+
CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
|
42
|
+
if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
|
43
|
+
identifying property for the given type.
|
38
44
|
"""
|
39
45
|
if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
|
40
46
|
if (property_format == "accession") and (property_name == "accession"):
|
@@ -62,6 +68,6 @@ def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str
|
|
62
68
|
|
63
69
|
|
64
70
|
# This is here for now because of problems with circular dependencies.
|
65
|
-
# See: smaht-portal/.../schema_formats.py
|
71
|
+
# See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
|
66
72
|
def _is_accession_id(value: str) -> bool:
|
67
73
|
return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
|
@@ -47,8 +47,8 @@ dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
|
|
47
47
|
dcicutils/misc_utils.py,sha256=zHwsxxEn24muLBP7mDvMa8I9VdMejwW8HMuCL5xbhhw,107690
|
48
48
|
dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
|
49
49
|
dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
|
50
|
-
dcicutils/portal_object_utils.py,sha256=
|
51
|
-
dcicutils/portal_utils.py,sha256=
|
50
|
+
dcicutils/portal_object_utils.py,sha256=ryNoSFKEdaj4Y5MPryVanK2Hg32-Kg6r0BwvOPf2Ov0,11227
|
51
|
+
dcicutils/portal_utils.py,sha256=ySXVOgMkfJN2x5o5ZpTw9PamdcvCDLstIj1m0Gmwu-o,44070
|
52
52
|
dcicutils/progress_bar.py,sha256=UT7lxb-rVF_gp4yjY2Tg4eun1naaH__hB4_v3O85bcE,19468
|
53
53
|
dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
|
54
54
|
dcicutils/qa_checkers.py,sha256=cdXjeL0jCDFDLT8VR8Px78aS10hwNISOO5G_Zv2TZ6M,20534
|
@@ -66,15 +66,15 @@ dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,2
|
|
66
66
|
dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
|
67
67
|
dcicutils/structured_data.py,sha256=sm8x08ckPZcIcyBaSlQRGrOD3YL9d09gz-xB3_TAWGE,64516
|
68
68
|
dcicutils/submitr/progress_constants.py,sha256=5bxyX77ql8qEJearfHEvsvXl7D0GuUODW0T65mbRmnE,2895
|
69
|
-
dcicutils/submitr/ref_lookup_strategy.py,sha256=
|
69
|
+
dcicutils/submitr/ref_lookup_strategy.py,sha256=VJN-Oo0LLna6Vo2cu47eC-eU-yUC9NFlQP29xajejVU,4741
|
70
70
|
dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
|
71
71
|
dcicutils/tmpfile_utils.py,sha256=irmN6Otvtxyum-7qr5h9GIzDs9rtFFyUsGQyqJXd_y4,2997
|
72
72
|
dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
|
73
73
|
dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
|
74
74
|
dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
|
75
75
|
dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
|
76
|
-
dcicutils-8.8.6.
|
77
|
-
dcicutils-8.8.6.
|
78
|
-
dcicutils-8.8.6.
|
79
|
-
dcicutils-8.8.6.
|
80
|
-
dcicutils-8.8.6.
|
76
|
+
dcicutils-8.8.6.1b5.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
|
77
|
+
dcicutils-8.8.6.1b5.dist-info/METADATA,sha256=eDrvr1n8DzjE9FOjKSWjHho43PyTQWvejmH-V4X-DXQ,3439
|
78
|
+
dcicutils-8.8.6.1b5.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
79
|
+
dcicutils-8.8.6.1b5.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
|
80
|
+
dcicutils-8.8.6.1b5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|