dcicutils 8.8.6.1b3__py3-none-any.whl → 8.8.6.1b5__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- dcicutils/portal_object_utils.py +28 -90
- dcicutils/portal_utils.py +180 -42
- dcicutils/submitr/ref_lookup_strategy.py +31 -25
- {dcicutils-8.8.6.1b3.dist-info → dcicutils-8.8.6.1b5.dist-info}/METADATA +1 -1
- {dcicutils-8.8.6.1b3.dist-info → dcicutils-8.8.6.1b5.dist-info}/RECORD +8 -8
- {dcicutils-8.8.6.1b3.dist-info → dcicutils-8.8.6.1b5.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.8.6.1b3.dist-info → dcicutils-8.8.6.1b5.dist-info}/WHEEL +0 -0
- {dcicutils-8.8.6.1b3.dist-info → dcicutils-8.8.6.1b5.dist-info}/entry_points.txt +0 -0
dcicutils/portal_object_utils.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
from copy import deepcopy
|
2
2
|
from functools import lru_cache
|
3
|
-
import re
|
4
3
|
from typing import Any, Callable, List, Optional, Tuple, Type, Union
|
5
4
|
from dcicutils.data_readers import RowReader
|
6
5
|
from dcicutils.misc_utils import create_readonly_object
|
@@ -14,11 +13,9 @@ class PortalObject:
|
|
14
13
|
|
15
14
|
_PROPERTY_DELETION_SENTINEL = RowReader.CELL_DELETION_SENTINEL
|
16
15
|
|
17
|
-
def __init__(self, data: dict, portal: Portal = None,
|
18
|
-
schema: Optional[Union[dict, Schema]] = None, type: Optional[str] = None) -> None:
|
16
|
+
def __init__(self, data: dict, portal: Optional[Portal] = None, type: Optional[str] = None) -> None:
|
19
17
|
self._data = data if isinstance(data, dict) else {}
|
20
18
|
self._portal = portal if isinstance(portal, Portal) else None
|
21
|
-
self._schema = schema if isinstance(schema, dict) else (schema.data if isinstance(schema, Schema) else None)
|
22
19
|
self._type = type if isinstance(type, str) else ""
|
23
20
|
|
24
21
|
@property
|
@@ -32,7 +29,7 @@ class PortalObject:
|
|
32
29
|
@property
|
33
30
|
@lru_cache(maxsize=1)
|
34
31
|
def type(self) -> str:
|
35
|
-
return self._type or Portal.get_schema_type(self._data) or
|
32
|
+
return self._type or Portal.get_schema_type(self._data) or ""
|
36
33
|
|
37
34
|
@property
|
38
35
|
@lru_cache(maxsize=1)
|
@@ -47,7 +44,7 @@ class PortalObject:
|
|
47
44
|
@property
|
48
45
|
@lru_cache(maxsize=1)
|
49
46
|
def schema(self) -> Optional[dict]:
|
50
|
-
return self.
|
47
|
+
return self._portal.get_schema(self.type) if self._portal else None
|
51
48
|
|
52
49
|
def copy(self) -> PortalObject:
|
53
50
|
return PortalObject(deepcopy(self.data), portal=self.portal, type=self.type)
|
@@ -59,39 +56,29 @@ class PortalObject:
|
|
59
56
|
Returns the list of all identifying property names of this Portal object which actually have values.
|
60
57
|
Implicitly include "uuid" and "identifier" properties as identifying properties if they are actually
|
61
58
|
properties in the object schema, and favor these (first); defavor "aliases"; no other ordering defined.
|
59
|
+
Changed (2024-05-26) to use portal_utils.get_identifying_property_names; migrating some intricate stuff there.
|
62
60
|
"""
|
63
|
-
|
64
|
-
|
65
|
-
identifying_properties = []
|
66
|
-
for identifying_property in schema_identifying_properties:
|
67
|
-
if identifying_property not in ["uuid", "identifier", "aliases"]:
|
68
|
-
if self._data.get(identifying_property):
|
69
|
-
identifying_properties.append(identifying_property)
|
70
|
-
if self._data.get("identifier"):
|
71
|
-
identifying_properties.insert(0, "identifier")
|
72
|
-
if self._data.get("uuid"):
|
73
|
-
identifying_properties.insert(0, "uuid")
|
74
|
-
if "aliases" in schema_identifying_properties and self._data.get("aliases"):
|
75
|
-
identifying_properties.append("aliases")
|
76
|
-
return identifying_properties or None
|
61
|
+
# Migrating to and unifying this in portal_utils.Portal.get_identifying_paths (2024-05-26).
|
62
|
+
return self._portal.get_identifying_property_names(self.type, portal_object=self._data) if self._portal else []
|
77
63
|
|
78
64
|
@lru_cache(maxsize=8192)
|
79
65
|
def lookup(self, raw: bool = False,
|
80
66
|
ref_lookup_strategy: Optional[Callable] = None) -> Tuple[Optional[PortalObject], Optional[str], int]:
|
67
|
+
if not (identifying_paths := self._get_identifying_paths(ref_lookup_strategy=ref_lookup_strategy)):
|
68
|
+
return None, None, 0
|
81
69
|
nlookups = 0
|
82
70
|
first_identifying_path = None
|
83
71
|
try:
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
)
|
72
|
+
for identifying_path in identifying_paths:
|
73
|
+
if not first_identifying_path:
|
74
|
+
first_identifying_path = identifying_path
|
75
|
+
nlookups += 1
|
76
|
+
if self._portal and (item := self._portal.get(identifying_path, raw=raw)) and (item.status_code == 200):
|
77
|
+
return (
|
78
|
+
PortalObject(item.json(), portal=self._portal, type=self.type if raw else None),
|
79
|
+
identifying_path,
|
80
|
+
nlookups
|
81
|
+
)
|
95
82
|
except Exception:
|
96
83
|
pass
|
97
84
|
return None, first_identifying_path, nlookups
|
@@ -158,65 +145,16 @@ class PortalObject:
|
|
158
145
|
return diffs
|
159
146
|
|
160
147
|
@lru_cache(maxsize=1)
|
161
|
-
def _get_identifying_paths(self,
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
return identifying_paths
|
172
|
-
for identifying_property in identifying_properties:
|
173
|
-
if identifying_value := self._data.get(identifying_property):
|
174
|
-
if identifying_property == "uuid":
|
175
|
-
if self.type:
|
176
|
-
identifying_paths.append(f"/{self.type}/{identifying_value}")
|
177
|
-
identifying_paths.append(f"/{identifying_value}")
|
178
|
-
# For now at least we include the path both with and without the schema type component,
|
179
|
-
# as for some identifying values, it works (only) with, and some, it works (only) without.
|
180
|
-
# For example: If we have FileSet with "accession", an identifying property, with value
|
181
|
-
# SMAFSFXF1RO4 then /SMAFSFXF1RO4 works but /FileSet/SMAFSFXF1RO4 does not; and
|
182
|
-
# conversely using "submitted_id", also an identifying property, with value
|
183
|
-
# UW_FILE-SET_COLO-829BL_HI-C_1 then /UW_FILE-SET_COLO-829BL_HI-C_1 does
|
184
|
-
# not work but /FileSet/UW_FILE-SET_COLO-829BL_HI-C_1 does work.
|
185
|
-
elif isinstance(identifying_value, list):
|
186
|
-
for identifying_value_item in identifying_value:
|
187
|
-
if self.type:
|
188
|
-
identifying_paths.append(f"/{self.type}/{identifying_value_item}")
|
189
|
-
identifying_paths.append(f"/{identifying_value_item}")
|
190
|
-
else:
|
191
|
-
# TODO: Import from somewhere ...
|
192
|
-
lookup_options = 0
|
193
|
-
if schema := self.schema:
|
194
|
-
# TODO: Hook into the ref_lookup_strategy thing in structured_data to make
|
195
|
-
# sure we check accession format (since it does not have a pattern).
|
196
|
-
if callable(ref_lookup_strategy):
|
197
|
-
lookup_options, ref_validator = ref_lookup_strategy(
|
198
|
-
self._portal, self.type, schema, identifying_value)
|
199
|
-
if callable(ref_validator):
|
200
|
-
if ref_validator(schema, identifying_property, identifying_value) is False:
|
201
|
-
continue
|
202
|
-
if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
|
203
|
-
if not re.match(pattern, identifying_value):
|
204
|
-
# If this identifying value is for a (identifying) property which has a
|
205
|
-
# pattern, and the value does NOT match the pattern, then do NOT include
|
206
|
-
# this value as an identifying path, since it cannot possibly be found.
|
207
|
-
continue
|
208
|
-
if not lookup_options:
|
209
|
-
lookup_options = Portal.LOOKUP_DEFAULT
|
210
|
-
if Portal.is_lookup_root_first(lookup_options):
|
211
|
-
identifying_paths.append(f"/{identifying_value}")
|
212
|
-
if Portal.is_lookup_specified_type(lookup_options) and self.type:
|
213
|
-
identifying_paths.append(f"/{self.type}/{identifying_value}")
|
214
|
-
if Portal.is_lookup_root(lookup_options) and not Portal.is_lookup_root_first(lookup_options):
|
215
|
-
identifying_paths.append(f"/{identifying_value}")
|
216
|
-
if Portal.is_lookup_subtypes(lookup_options):
|
217
|
-
for subtype_name in self._portal.get_schema_subtype_names(self.type):
|
218
|
-
identifying_paths.append(f"/{subtype_name}/{identifying_value}")
|
219
|
-
return identifying_paths or None
|
148
|
+
def _get_identifying_paths(self, all: bool = True,
|
149
|
+
ref_lookup_strategy: Optional[Callable] = None) -> Optional[List[str]]:
|
150
|
+
if not self._portal and (uuid := self.uuid):
|
151
|
+
if all is True and (type := self.type):
|
152
|
+
return [f"/{type}/{uuid}", f"/{uuid}"]
|
153
|
+
return [f"/{uuid}"]
|
154
|
+
# Migrating to and unifying this in portal_utils.Portal.get_identifying_paths (2024-05-26).
|
155
|
+
return self._portal.get_identifying_paths(self._data,
|
156
|
+
portal_type=self.schema, all=all,
|
157
|
+
lookup_strategy=ref_lookup_strategy) if self._portal else None
|
220
158
|
|
221
159
|
def _normalized_refs(self, refs: List[dict]) -> Tuple[PortalObject, int]:
|
222
160
|
"""
|
dcicutils/portal_utils.py
CHANGED
@@ -50,15 +50,16 @@ class Portal:
|
|
50
50
|
FILE_TYPE_SCHEMA_NAME = "File"
|
51
51
|
|
52
52
|
# Object lookup strategies; on a per-reference (type/value) basis, used currently ONLY by
|
53
|
-
# structured_data.py; controlled by an optional
|
53
|
+
# structured_data.py; controlled by an optional lookup_strategy callable; default is
|
54
54
|
# lookup at root path but after the specified type path lookup, and then lookup all subtypes;
|
55
55
|
# can choose to lookup root path first, or not lookup root path at all, or not lookup
|
56
|
-
# subtypes at all; the
|
56
|
+
# subtypes at all; the lookup_strategy callable if specified should take a type_name
|
57
57
|
# and value (string) arguements and return an integer of any of the below ORed together.
|
58
58
|
# The main purpose of this is optimization; to minimize portal lookups; since for example,
|
59
59
|
# currently at least, /{type}/{accession} does not work but /{accession} does; so we
|
60
60
|
# currently (smaht-portal/.../ingestion_processors) use LOOKUP_ROOT_FIRST for this.
|
61
61
|
# And current usage NEVER has LOOKUP_SUBTYPES turned OFF; but support just in case.
|
62
|
+
LOOKUP_UNDEFINED = 0
|
62
63
|
LOOKUP_SPECIFIED_TYPE = 0x0001
|
63
64
|
LOOKUP_ROOT = 0x0002
|
64
65
|
LOOKUP_ROOT_FIRST = 0x0004 | LOOKUP_ROOT
|
@@ -207,23 +208,6 @@ class Portal:
|
|
207
208
|
def vapp(self) -> Optional[TestApp]:
|
208
209
|
return self._vapp
|
209
210
|
|
210
|
-
@staticmethod
|
211
|
-
def is_lookup_specified_type(lookup_options: int) -> bool:
|
212
|
-
return (lookup_options &
|
213
|
-
Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
|
214
|
-
|
215
|
-
@staticmethod
|
216
|
-
def is_lookup_root(lookup_options: int) -> bool:
|
217
|
-
return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
|
218
|
-
|
219
|
-
@staticmethod
|
220
|
-
def is_lookup_root_first(lookup_options: int) -> bool:
|
221
|
-
return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
|
222
|
-
|
223
|
-
@staticmethod
|
224
|
-
def is_lookup_subtypes(lookup_options: int) -> bool:
|
225
|
-
return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
|
226
|
-
|
227
211
|
def get(self, url: str, follow: bool = True,
|
228
212
|
raw: bool = False, database: bool = False, raise_for_status: bool = False, **kwargs) -> OptionalResponse:
|
229
213
|
url = self.url(url, raw, database)
|
@@ -307,7 +291,10 @@ class Portal:
|
|
307
291
|
|
308
292
|
@lru_cache(maxsize=100)
|
309
293
|
def get_schema(self, schema_name: str) -> Optional[dict]:
|
310
|
-
|
294
|
+
try:
|
295
|
+
return get_schema(self.schema_name(schema_name), portal_vapp=self.vapp, key=self.key)
|
296
|
+
except Exception:
|
297
|
+
return None
|
311
298
|
|
312
299
|
@lru_cache(maxsize=1)
|
313
300
|
def get_schemas(self) -> dict:
|
@@ -419,53 +406,204 @@ class Portal:
|
|
419
406
|
return schemas_super_type_map.get(type_name, [])
|
420
407
|
|
421
408
|
@function_cache(maxsize=100, serialize_key=True)
|
422
|
-
def get_identifying_paths(self, portal_object: dict, portal_type: Optional[str] = None
|
409
|
+
def get_identifying_paths(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None,
|
410
|
+
all: bool = True, lookup_strategy: Optional[Union[Callable, bool]] = None) -> List[str]:
|
423
411
|
"""
|
424
|
-
Returns the list of the identifying Portal (URL) paths for the given Portal object. Favors any
|
425
|
-
|
426
|
-
no other ordering defined. Returns empty list
|
412
|
+
Returns the list of the identifying Portal (URL) paths for the given Portal object. Favors any uuid
|
413
|
+
and identifier based paths and defavors aliases based paths (ala self.get_identifying_property_names);
|
414
|
+
no other ordering defined. Returns an empty list if no identifying properties or otherwise not found.
|
415
|
+
Note that this is a newer version of what was in portal_object_utils and just uses the ref_lookup_stratey
|
416
|
+
module directly, as it no longer needs to be exposed (to smaht-portal/ingester and smaht-submitr) and so
|
417
|
+
this is a first step toward internalizing it to structured_data/portal_utils/portal_object_utils usages.
|
427
418
|
"""
|
419
|
+
def is_lookup_specified_type(lookup_options: int) -> bool:
|
420
|
+
return (lookup_options & Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
|
421
|
+
def is_lookup_root(lookup_options: int) -> bool: # noqa
|
422
|
+
return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
|
423
|
+
def is_lookup_root_first(lookup_options: int) -> bool: # noqa
|
424
|
+
return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
|
425
|
+
def is_lookup_subtypes(lookup_options: int) -> bool: # noqa
|
426
|
+
return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
|
427
|
+
|
428
428
|
results = []
|
429
429
|
if not isinstance(portal_object, dict):
|
430
430
|
return results
|
431
|
-
if not isinstance(portal_type, str)
|
432
|
-
if
|
433
|
-
|
431
|
+
if not (isinstance(portal_type, str) and portal_type):
|
432
|
+
if isinstance(portal_type, dict):
|
433
|
+
# It appears that the given portal_type is an actual schema dictionary.
|
434
|
+
portal_type = self.schema_name(portal_type.get("title"))
|
435
|
+
if not (isinstance(portal_type, str) and portal_type):
|
436
|
+
if not (portal_type := self.get_schema_type(portal_object)):
|
437
|
+
return results
|
438
|
+
if not callable(lookup_strategy):
|
439
|
+
lookup_strategy = None if lookup_strategy is False else Portal._lookup_strategy
|
434
440
|
for identifying_property in self.get_identifying_property_names(portal_type):
|
435
|
-
if identifying_value := portal_object.get(identifying_property):
|
436
|
-
|
437
|
-
|
441
|
+
if not (identifying_value := portal_object.get(identifying_property)):
|
442
|
+
continue
|
443
|
+
# The get_identifying_property_names call above ensures uuid is first if it is in the object.
|
444
|
+
# And also note that ALL schemas do in fact have identifyingProperties which do in fact have
|
445
|
+
# uuid, except for a couple "Test" ones, and (for some reason) SubmittedItem; otherwise we
|
446
|
+
# might have a special case to check the Portal object explicitly for uuid, but no need.
|
447
|
+
if identifying_property == "uuid":
|
448
|
+
#
|
449
|
+
# Note this idiosyncrasy with Portal paths: the only way we do NOT get a (HTTP 301) redirect
|
450
|
+
# is if we use the lower-case-dashed-plural based version of the path, e.g. all of these:
|
451
|
+
#
|
452
|
+
# - /d13d06c1-218e-4f61-aaf0-91f226248b3c
|
453
|
+
# - /d13d06c1-218e-4f61-aaf0-91f226248b3c/
|
454
|
+
# - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c
|
455
|
+
# - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c/
|
456
|
+
# - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c
|
457
|
+
#
|
458
|
+
# Will result in a (HTTP 301) redirect to:
|
459
|
+
#
|
460
|
+
# - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c/
|
461
|
+
#
|
462
|
+
# Unfortunately, this code here has no reasonable way of getting that lower-case-dashed-plural
|
463
|
+
# based name (e.g. file-formats) from the schema/portal type name (e.g. FileFormat); as the
|
464
|
+
# information is contained, for this example, in the snovault.collection decorator for the
|
465
|
+
# endpoint definition in smaht-portal/.../types/file_format.py. Unfortunately merely because
|
466
|
+
# behind-the-scenes an extra round-trip HTTP request will occur, but happens automatically.
|
467
|
+
# And note the disction of just using /{uuid} here rather than /{type}/{uuid} as in the else
|
468
|
+
# statement below is not really necessary; just here for emphasis that this is all that's needed.
|
469
|
+
#
|
470
|
+
if all is True:
|
471
|
+
results.append(f"/{portal_type}/{identifying_value}")
|
472
|
+
results.append(f"/{identifying_value}")
|
473
|
+
elif isinstance(identifying_value, list):
|
474
|
+
for identifying_value_item in identifying_value:
|
475
|
+
if identifying_value_item:
|
438
476
|
results.append(f"/{portal_type}/{identifying_value_item}")
|
439
|
-
|
477
|
+
if all is True:
|
478
|
+
results.append(f"/{identifying_value_item}")
|
479
|
+
else:
|
480
|
+
lookup_options = Portal.LOOKUP_UNDEFINED
|
481
|
+
if schema := self.get_schema(portal_type):
|
482
|
+
if callable(lookup_strategy):
|
483
|
+
lookup_options, validator = lookup_strategy(self._portal, self.type, schema, identifying_value)
|
484
|
+
if callable(validator):
|
485
|
+
if validator(schema, identifying_property, identifying_value) is False:
|
486
|
+
continue
|
487
|
+
if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
|
488
|
+
if not re.match(pattern, identifying_value):
|
489
|
+
# If this identifying value is for a (identifying) property which has a
|
490
|
+
# pattern, and the value does NOT match the pattern, then do NOT include
|
491
|
+
# this value as an identifying path, since it cannot possibly be found.
|
492
|
+
continue
|
493
|
+
if lookup_options == Portal.LOOKUP_UNDEFINED:
|
494
|
+
lookup_options = Portal.LOOKUP_DEFAULT
|
495
|
+
if is_lookup_root_first(lookup_options):
|
440
496
|
results.append(f"/{identifying_value}")
|
441
|
-
|
442
|
-
results.append(f"/{
|
497
|
+
if is_lookup_specified_type(lookup_options) and self.type:
|
498
|
+
results.append(f"/{self.type}/{identifying_value}")
|
499
|
+
if is_lookup_root(lookup_options) and not is_lookup_root_first(lookup_options):
|
500
|
+
results.append(f"/{identifying_value}")
|
501
|
+
if is_lookup_subtypes(lookup_options):
|
502
|
+
for subtype_name in self._portal.get_schema_subtype_names(self.type):
|
503
|
+
results.append(f"/{subtype_name}/{identifying_value}")
|
443
504
|
return results
|
444
505
|
|
445
506
|
@function_cache(maxsize=100, serialize_key=True)
|
446
|
-
def get_identifying_property_names(self, schema: Union[str, dict]
|
507
|
+
def get_identifying_property_names(self, schema: Union[str, dict],
|
508
|
+
portal_object: Optional[dict] = None) -> List[str]:
|
447
509
|
"""
|
448
|
-
Returns the list of identifying property names for the given Portal schema, which may
|
449
|
-
|
510
|
+
Returns the list of identifying property names for the given Portal schema, which may be
|
511
|
+
either a schema name or a schema object. If a Portal object is also given then restricts this
|
512
|
+
set of identifying properties to those which actually have values within this Portal object.
|
513
|
+
Favors the uuid and identifier property names and defavors the aliases property name; no other
|
514
|
+
ordering imposed. Returns empty list if no identifying properties or otherwise not found.
|
450
515
|
"""
|
451
516
|
results = []
|
452
517
|
if isinstance(schema, str):
|
453
|
-
|
454
|
-
if not (schema := self.get_schema(schema)):
|
455
|
-
return results
|
456
|
-
except Exception:
|
518
|
+
if not (schema := self.get_schema(schema)):
|
457
519
|
return results
|
458
520
|
elif not isinstance(schema, dict):
|
459
521
|
return results
|
460
522
|
if not (identifying_properties := get_identifying_properties(schema)):
|
461
523
|
return results
|
462
|
-
identifying_properties =
|
463
|
-
|
524
|
+
identifying_properties = list(set(identifying_properties)) # paranoid dedup
|
525
|
+
identifying_properties = [*identifying_properties] # copy so as not to change schema if given
|
526
|
+
favored_identifying_properties = ["uuid", "identifier"]
|
527
|
+
defavored_identifying_properties = ["aliases"]
|
528
|
+
for favored_identifying_property in reversed(favored_identifying_properties):
|
464
529
|
if favored_identifying_property in identifying_properties:
|
465
530
|
identifying_properties.remove(favored_identifying_property)
|
466
531
|
identifying_properties.insert(0, favored_identifying_property)
|
532
|
+
for defavored_identifying_property in defavored_identifying_properties:
|
533
|
+
if defavored_identifying_property in identifying_properties:
|
534
|
+
identifying_properties.remove(defavored_identifying_property)
|
535
|
+
identifying_properties.append(defavored_identifying_property)
|
536
|
+
if isinstance(portal_object, dict):
|
537
|
+
for identifying_property in [*identifying_properties]:
|
538
|
+
if portal_object.get(identifying_property) is None:
|
539
|
+
identifying_properties.remove(identifying_property)
|
467
540
|
return identifying_properties
|
468
541
|
|
542
|
+
@staticmethod
|
543
|
+
def _lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
|
544
|
+
#
|
545
|
+
# Note this slight odd situation WRT object lookups by submitted_id and accession:
|
546
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
547
|
+
# PATH | EXAMPLE | LOOKUP RESULT |
|
548
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
549
|
+
# /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
550
|
+
# /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
551
|
+
# /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
552
|
+
# /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
553
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
554
|
+
# /accession | /SMAFSFXF1RO4 | FOUND |
|
555
|
+
# /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
|
556
|
+
# /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
|
557
|
+
# /File/accession | /File/SMAFSFXF1RO4 | FOUND |
|
558
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
559
|
+
#
|
560
|
+
def ref_validator(schema: Optional[dict],
|
561
|
+
property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
|
562
|
+
"""
|
563
|
+
Returns False iff objects of type represented by the given schema, CANNOT be referenced with
|
564
|
+
a Portal path using the given property name and its given property value, otherwise returns None.
|
565
|
+
|
566
|
+
For example, if the schema is for UnalignedReads and the property name is accession, then we will
|
567
|
+
return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
|
568
|
+
will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
|
569
|
+
will continue executing its default behavior, which is to check other ways in which the given type
|
570
|
+
CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
|
571
|
+
and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
572
|
+
|
573
|
+
The goal (in structured_data) being to detect if a type is being referenced in such a way that
|
574
|
+
CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
|
575
|
+
if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
|
576
|
+
identifying property for the given type.
|
577
|
+
"""
|
578
|
+
if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
|
579
|
+
if (property_format == "accession") and (property_name == "accession"):
|
580
|
+
if not Portal._is_accession_id(property_value):
|
581
|
+
return False
|
582
|
+
return None
|
583
|
+
|
584
|
+
DEFAULT_RESULT = (Portal.LOOKUP_DEFAULT, ref_validator)
|
585
|
+
if not value:
|
586
|
+
return DEFAULT_RESULT
|
587
|
+
if not schema:
|
588
|
+
if not isinstance(portal, Portal) or not (schema := portal.get_schema(type_name)):
|
589
|
+
return DEFAULT_RESULT
|
590
|
+
if schema_properties := schema.get("properties"):
|
591
|
+
if schema_properties.get("accession") and Portal._is_accession_id(value):
|
592
|
+
# Case: lookup by accession (only by root).
|
593
|
+
return (Portal.LOOKUP_ROOT, ref_validator)
|
594
|
+
elif schema_property_info_submitted_id := schema_properties.get("submitted_id"):
|
595
|
+
if schema_property_pattern_submitted_id := schema_property_info_submitted_id.get("pattern"):
|
596
|
+
if re.match(schema_property_pattern_submitted_id, value):
|
597
|
+
# Case: lookup by submitted_id (only by specified type).
|
598
|
+
return (Portal.LOOKUP_SPECIFIED_TYPE, ref_validator)
|
599
|
+
return DEFAULT_RESULT
|
600
|
+
|
601
|
+
@staticmethod
|
602
|
+
def _is_accession_id(value: str) -> bool:
|
603
|
+
# This is here for now because of problems with circular dependencies.
|
604
|
+
# See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
|
605
|
+
return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
|
606
|
+
|
469
607
|
def url(self, url: str, raw: bool = False, database: bool = False) -> str:
|
470
608
|
if not isinstance(url, str) or not url:
|
471
609
|
return "/"
|
@@ -2,39 +2,45 @@ import re
|
|
2
2
|
from typing import Optional
|
3
3
|
from dcicutils.structured_data import Portal
|
4
4
|
|
5
|
+
# This function is exposed (to smaht-portal/ingester and smaht-submitr) only because previously,
|
6
|
+
# before it was fully developed, we had differing behaviors; but this has been unified; so this
|
7
|
+
# could now be internalized to structured_data, and portal_object_utils (TODO).
|
8
|
+
|
5
9
|
|
6
10
|
def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
|
7
11
|
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
# /
|
13
|
-
# /
|
14
|
-
#
|
15
|
-
# /
|
16
|
-
#
|
17
|
-
# /
|
18
|
-
# /
|
12
|
+
# Note this slight odd situation WRT object lookups by submitted_id and accession:
|
13
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
14
|
+
# PATH | EXAMPLE | LOOKUP RESULT |
|
15
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
16
|
+
# /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
17
|
+
# /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
18
|
+
# /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
19
|
+
# /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
20
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
21
|
+
# /accession | /SMAFSFXF1RO4 | FOUND |
|
22
|
+
# /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
|
23
|
+
# /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
|
24
|
+
# /File/accession | /File/SMAFSFXF1RO4 | FOUND |
|
25
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
19
26
|
#
|
20
27
|
def ref_validator(schema: Optional[dict],
|
21
28
|
property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
|
22
29
|
"""
|
23
|
-
Returns False iff
|
24
|
-
the given property name
|
30
|
+
Returns False iff objects of type represented by the given schema, CANNOT be referenced with
|
31
|
+
a Portal path using the given property name and its given property value, otherwise returns None.
|
25
32
|
|
26
|
-
For example, if the schema is for
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
the type and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
33
|
+
For example, if the schema is for UnalignedReads and the property name is accession, then we will
|
34
|
+
return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
|
35
|
+
will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
|
36
|
+
will continue executing its default behavior, which is to check other ways in which the given type
|
37
|
+
CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
|
38
|
+
and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
33
39
|
|
34
|
-
The goal (in structured_data) being to detect if a type is being referenced in such
|
35
|
-
|
36
|
-
|
37
|
-
|
40
|
+
The goal (in structured_data) being to detect if a type is being referenced in such a way that
|
41
|
+
CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
|
42
|
+
if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
|
43
|
+
identifying property for the given type.
|
38
44
|
"""
|
39
45
|
if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
|
40
46
|
if (property_format == "accession") and (property_name == "accession"):
|
@@ -62,6 +68,6 @@ def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str
|
|
62
68
|
|
63
69
|
|
64
70
|
# This is here for now because of problems with circular dependencies.
|
65
|
-
# See: smaht-portal/.../schema_formats.py
|
71
|
+
# See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
|
66
72
|
def _is_accession_id(value: str) -> bool:
|
67
73
|
return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
|
@@ -47,8 +47,8 @@ dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
|
|
47
47
|
dcicutils/misc_utils.py,sha256=zHwsxxEn24muLBP7mDvMa8I9VdMejwW8HMuCL5xbhhw,107690
|
48
48
|
dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
|
49
49
|
dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
|
50
|
-
dcicutils/portal_object_utils.py,sha256=
|
51
|
-
dcicutils/portal_utils.py,sha256=
|
50
|
+
dcicutils/portal_object_utils.py,sha256=ryNoSFKEdaj4Y5MPryVanK2Hg32-Kg6r0BwvOPf2Ov0,11227
|
51
|
+
dcicutils/portal_utils.py,sha256=ySXVOgMkfJN2x5o5ZpTw9PamdcvCDLstIj1m0Gmwu-o,44070
|
52
52
|
dcicutils/progress_bar.py,sha256=UT7lxb-rVF_gp4yjY2Tg4eun1naaH__hB4_v3O85bcE,19468
|
53
53
|
dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
|
54
54
|
dcicutils/qa_checkers.py,sha256=cdXjeL0jCDFDLT8VR8Px78aS10hwNISOO5G_Zv2TZ6M,20534
|
@@ -66,15 +66,15 @@ dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,2
|
|
66
66
|
dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
|
67
67
|
dcicutils/structured_data.py,sha256=sm8x08ckPZcIcyBaSlQRGrOD3YL9d09gz-xB3_TAWGE,64516
|
68
68
|
dcicutils/submitr/progress_constants.py,sha256=5bxyX77ql8qEJearfHEvsvXl7D0GuUODW0T65mbRmnE,2895
|
69
|
-
dcicutils/submitr/ref_lookup_strategy.py,sha256=
|
69
|
+
dcicutils/submitr/ref_lookup_strategy.py,sha256=VJN-Oo0LLna6Vo2cu47eC-eU-yUC9NFlQP29xajejVU,4741
|
70
70
|
dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
|
71
71
|
dcicutils/tmpfile_utils.py,sha256=irmN6Otvtxyum-7qr5h9GIzDs9rtFFyUsGQyqJXd_y4,2997
|
72
72
|
dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
|
73
73
|
dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
|
74
74
|
dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
|
75
75
|
dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
|
76
|
-
dcicutils-8.8.6.
|
77
|
-
dcicutils-8.8.6.
|
78
|
-
dcicutils-8.8.6.
|
79
|
-
dcicutils-8.8.6.
|
80
|
-
dcicutils-8.8.6.
|
76
|
+
dcicutils-8.8.6.1b5.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
|
77
|
+
dcicutils-8.8.6.1b5.dist-info/METADATA,sha256=eDrvr1n8DzjE9FOjKSWjHho43PyTQWvejmH-V4X-DXQ,3439
|
78
|
+
dcicutils-8.8.6.1b5.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
79
|
+
dcicutils-8.8.6.1b5.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
|
80
|
+
dcicutils-8.8.6.1b5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|