dcicutils 8.9.0.0b0__py3-none-any.whl → 8.9.0.1b2__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- dcicutils/command_utils.py +69 -1
- dcicutils/creds_utils.py +1 -1
- dcicutils/ff_utils.py +4 -1
- dcicutils/file_utils.py +250 -41
- dcicutils/http_utils.py +39 -0
- dcicutils/misc_utils.py +82 -5
- dcicutils/portal_object_utils.py +24 -89
- dcicutils/portal_utils.py +249 -37
- dcicutils/schema_utils.py +1 -1
- dcicutils/scripts/view_portal_object.py +87 -5
- dcicutils/structured_data.py +59 -17
- dcicutils/submitr/ref_lookup_strategy.py +31 -25
- dcicutils/tmpfile_utils.py +50 -10
- dcicutils/zip_utils.py +27 -0
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/METADATA +6 -4
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/RECORD +19 -18
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/WHEEL +0 -0
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/entry_points.txt +0 -0
dcicutils/portal_object_utils.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
from copy import deepcopy
|
2
2
|
from functools import lru_cache
|
3
|
-
import re
|
4
3
|
from typing import Any, Callable, List, Optional, Tuple, Type, Union
|
5
4
|
from dcicutils.data_readers import RowReader
|
6
5
|
from dcicutils.misc_utils import create_readonly_object
|
@@ -14,11 +13,9 @@ class PortalObject:
|
|
14
13
|
|
15
14
|
_PROPERTY_DELETION_SENTINEL = RowReader.CELL_DELETION_SENTINEL
|
16
15
|
|
17
|
-
def __init__(self, data: dict, portal: Portal = None,
|
18
|
-
schema: Optional[Union[dict, Schema]] = None, type: Optional[str] = None) -> None:
|
16
|
+
def __init__(self, data: dict, portal: Optional[Portal] = None, type: Optional[str] = None) -> None:
|
19
17
|
self._data = data if isinstance(data, dict) else {}
|
20
18
|
self._portal = portal if isinstance(portal, Portal) else None
|
21
|
-
self._schema = schema if isinstance(schema, dict) else (schema.data if isinstance(schema, Schema) else None)
|
22
19
|
self._type = type if isinstance(type, str) else ""
|
23
20
|
|
24
21
|
@property
|
@@ -32,7 +29,7 @@ class PortalObject:
|
|
32
29
|
@property
|
33
30
|
@lru_cache(maxsize=1)
|
34
31
|
def type(self) -> str:
|
35
|
-
return self._type or Portal.get_schema_type(self._data) or
|
32
|
+
return self._type or Portal.get_schema_type(self._data) or ""
|
36
33
|
|
37
34
|
@property
|
38
35
|
@lru_cache(maxsize=1)
|
@@ -47,7 +44,7 @@ class PortalObject:
|
|
47
44
|
@property
|
48
45
|
@lru_cache(maxsize=1)
|
49
46
|
def schema(self) -> Optional[dict]:
|
50
|
-
return self.
|
47
|
+
return self._portal.get_schema(self.type) if self._portal else None
|
51
48
|
|
52
49
|
def copy(self) -> PortalObject:
|
53
50
|
return PortalObject(deepcopy(self.data), portal=self.portal, type=self.type)
|
@@ -59,39 +56,29 @@ class PortalObject:
|
|
59
56
|
Returns the list of all identifying property names of this Portal object which actually have values.
|
60
57
|
Implicitly include "uuid" and "identifier" properties as identifying properties if they are actually
|
61
58
|
properties in the object schema, and favor these (first); defavor "aliases"; no other ordering defined.
|
59
|
+
Changed (2024-05-26) to use portal_utils.get_identifying_property_names; migrating some intricate stuff there.
|
62
60
|
"""
|
63
|
-
|
64
|
-
|
65
|
-
identifying_properties = []
|
66
|
-
for identifying_property in schema_identifying_properties:
|
67
|
-
if identifying_property not in ["uuid", "identifier", "aliases"]:
|
68
|
-
if self._data.get(identifying_property):
|
69
|
-
identifying_properties.append(identifying_property)
|
70
|
-
if self._data.get("identifier"):
|
71
|
-
identifying_properties.insert(0, "identifier")
|
72
|
-
if self._data.get("uuid"):
|
73
|
-
identifying_properties.insert(0, "uuid")
|
74
|
-
if "aliases" in schema_identifying_properties and self._data.get("aliases"):
|
75
|
-
identifying_properties.append("aliases")
|
76
|
-
return identifying_properties or None
|
61
|
+
# Migrating to and unifying this in portal_utils.Portal.get_identifying_paths (2024-05-26).
|
62
|
+
return self._portal.get_identifying_property_names(self.type, portal_object=self._data) if self._portal else []
|
77
63
|
|
78
64
|
@lru_cache(maxsize=8192)
|
79
65
|
def lookup(self, raw: bool = False,
|
80
66
|
ref_lookup_strategy: Optional[Callable] = None) -> Tuple[Optional[PortalObject], Optional[str], int]:
|
67
|
+
if not (identifying_paths := self._get_identifying_paths(ref_lookup_strategy=ref_lookup_strategy)):
|
68
|
+
return None, None, 0
|
81
69
|
nlookups = 0
|
82
70
|
first_identifying_path = None
|
83
71
|
try:
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
)
|
72
|
+
for identifying_path in identifying_paths:
|
73
|
+
if not first_identifying_path:
|
74
|
+
first_identifying_path = identifying_path
|
75
|
+
nlookups += 1
|
76
|
+
if self._portal and (item := self._portal.get(identifying_path, raw=raw)) and (item.status_code == 200):
|
77
|
+
return (
|
78
|
+
PortalObject(item.json(), portal=self._portal, type=self.type if raw else None),
|
79
|
+
identifying_path,
|
80
|
+
nlookups
|
81
|
+
)
|
95
82
|
except Exception:
|
96
83
|
pass
|
97
84
|
return None, first_identifying_path, nlookups
|
@@ -159,64 +146,12 @@ class PortalObject:
|
|
159
146
|
|
160
147
|
@lru_cache(maxsize=1)
|
161
148
|
def _get_identifying_paths(self, ref_lookup_strategy: Optional[Callable] = None) -> Optional[List[str]]:
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
if self.type:
|
169
|
-
identifying_paths.append(f"/{self.type}/{self.uuid}")
|
170
|
-
identifying_paths.append(f"/{self.uuid}")
|
171
|
-
return identifying_paths
|
172
|
-
for identifying_property in identifying_properties:
|
173
|
-
if identifying_value := self._data.get(identifying_property):
|
174
|
-
if identifying_property == "uuid":
|
175
|
-
if self.type:
|
176
|
-
identifying_paths.append(f"/{self.type}/{identifying_value}")
|
177
|
-
identifying_paths.append(f"/{identifying_value}")
|
178
|
-
# For now at least we include the path both with and without the schema type component,
|
179
|
-
# as for some identifying values, it works (only) with, and some, it works (only) without.
|
180
|
-
# For example: If we have FileSet with "accession", an identifying property, with value
|
181
|
-
# SMAFSFXF1RO4 then /SMAFSFXF1RO4 works but /FileSet/SMAFSFXF1RO4 does not; and
|
182
|
-
# conversely using "submitted_id", also an identifying property, with value
|
183
|
-
# UW_FILE-SET_COLO-829BL_HI-C_1 then /UW_FILE-SET_COLO-829BL_HI-C_1 does
|
184
|
-
# not work but /FileSet/UW_FILE-SET_COLO-829BL_HI-C_1 does work.
|
185
|
-
elif isinstance(identifying_value, list):
|
186
|
-
for identifying_value_item in identifying_value:
|
187
|
-
if self.type:
|
188
|
-
identifying_paths.append(f"/{self.type}/{identifying_value_item}")
|
189
|
-
identifying_paths.append(f"/{identifying_value_item}")
|
190
|
-
else:
|
191
|
-
# TODO: Import from somewhere ...
|
192
|
-
lookup_options = 0
|
193
|
-
if schema := self.schema:
|
194
|
-
# TODO: Hook into the ref_lookup_strategy thing in structured_data to make
|
195
|
-
# sure we check accession format (since it does not have a pattern).
|
196
|
-
if callable(ref_lookup_strategy):
|
197
|
-
lookup_options, ref_validator = ref_lookup_strategy(
|
198
|
-
self._portal, self.type, schema, identifying_value)
|
199
|
-
if callable(ref_validator):
|
200
|
-
if ref_validator(schema, identifying_property, identifying_value) is False:
|
201
|
-
continue
|
202
|
-
if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
|
203
|
-
if not re.match(pattern, identifying_value):
|
204
|
-
# If this identifying value is for a (identifying) property which has a
|
205
|
-
# pattern, and the value does NOT match the pattern, then do NOT include
|
206
|
-
# this value as an identifying path, since it cannot possibly be found.
|
207
|
-
continue
|
208
|
-
if not lookup_options:
|
209
|
-
lookup_options = Portal.LOOKUP_DEFAULT
|
210
|
-
if Portal.is_lookup_root_first(lookup_options):
|
211
|
-
identifying_paths.append(f"/{identifying_value}")
|
212
|
-
if Portal.is_lookup_specified_type(lookup_options) and self.type:
|
213
|
-
identifying_paths.append(f"/{self.type}/{identifying_value}")
|
214
|
-
if Portal.is_lookup_root(lookup_options) and not Portal.is_lookup_root_first(lookup_options):
|
215
|
-
identifying_paths.append(f"/{identifying_value}")
|
216
|
-
if Portal.is_lookup_subtypes(lookup_options):
|
217
|
-
for subtype_name in self._portal.get_schema_subtype_names(self.type):
|
218
|
-
identifying_paths.append(f"/{subtype_name}/{identifying_value}")
|
219
|
-
return identifying_paths or None
|
149
|
+
if not self._portal and (uuid := self.uuid):
|
150
|
+
return [f"/{uuid}"]
|
151
|
+
# Migrating to and unifying this in portal_utils.Portal.get_identifying_paths (2024-05-26).
|
152
|
+
return self._portal.get_identifying_paths(self._data,
|
153
|
+
portal_type=self.schema,
|
154
|
+
lookup_strategy=ref_lookup_strategy) if self._portal else None
|
220
155
|
|
221
156
|
def _normalized_refs(self, refs: List[dict]) -> Tuple[PortalObject, int]:
|
222
157
|
"""
|
dcicutils/portal_utils.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from collections import deque
|
2
2
|
from functools import lru_cache
|
3
|
+
from dcicutils.function_cache_decorator import function_cache
|
3
4
|
import io
|
4
5
|
import json
|
5
6
|
from pyramid.config import Configurator as PyramidConfigurator
|
@@ -16,8 +17,9 @@ from uuid import uuid4 as uuid
|
|
16
17
|
from webtest.app import TestApp, TestResponse
|
17
18
|
from wsgiref.simple_server import make_server as wsgi_make_server
|
18
19
|
from dcicutils.common import APP_SMAHT, OrchestratedApp, ORCHESTRATED_APPS
|
19
|
-
from dcicutils.ff_utils import get_metadata, get_schema, patch_metadata, post_metadata
|
20
|
+
from dcicutils.ff_utils import delete_metadata, get_metadata, get_schema, patch_metadata, post_metadata, purge_metadata
|
20
21
|
from dcicutils.misc_utils import to_camel_case, VirtualApp
|
22
|
+
from dcicutils.schema_utils import get_identifying_properties
|
21
23
|
from dcicutils.tmpfile_utils import temporary_file
|
22
24
|
|
23
25
|
Portal = Type["Portal"] # Forward type reference for type hints.
|
@@ -48,15 +50,16 @@ class Portal:
|
|
48
50
|
FILE_TYPE_SCHEMA_NAME = "File"
|
49
51
|
|
50
52
|
# Object lookup strategies; on a per-reference (type/value) basis, used currently ONLY by
|
51
|
-
# structured_data.py; controlled by an optional
|
53
|
+
# structured_data.py; controlled by an optional lookup_strategy callable; default is
|
52
54
|
# lookup at root path but after the specified type path lookup, and then lookup all subtypes;
|
53
55
|
# can choose to lookup root path first, or not lookup root path at all, or not lookup
|
54
|
-
# subtypes at all; the
|
56
|
+
# subtypes at all; the lookup_strategy callable if specified should take a type_name
|
55
57
|
# and value (string) arguements and return an integer of any of the below ORed together.
|
56
58
|
# The main purpose of this is optimization; to minimize portal lookups; since for example,
|
57
59
|
# currently at least, /{type}/{accession} does not work but /{accession} does; so we
|
58
60
|
# currently (smaht-portal/.../ingestion_processors) use LOOKUP_ROOT_FIRST for this.
|
59
61
|
# And current usage NEVER has LOOKUP_SUBTYPES turned OFF; but support just in case.
|
62
|
+
LOOKUP_UNDEFINED = 0
|
60
63
|
LOOKUP_SPECIFIED_TYPE = 0x0001
|
61
64
|
LOOKUP_ROOT = 0x0002
|
62
65
|
LOOKUP_ROOT_FIRST = 0x0004 | LOOKUP_ROOT
|
@@ -205,23 +208,6 @@ class Portal:
|
|
205
208
|
def vapp(self) -> Optional[TestApp]:
|
206
209
|
return self._vapp
|
207
210
|
|
208
|
-
@staticmethod
|
209
|
-
def is_lookup_specified_type(lookup_options: int) -> bool:
|
210
|
-
return (lookup_options &
|
211
|
-
Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
|
212
|
-
|
213
|
-
@staticmethod
|
214
|
-
def is_lookup_root(lookup_options: int) -> bool:
|
215
|
-
return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
|
216
|
-
|
217
|
-
@staticmethod
|
218
|
-
def is_lookup_root_first(lookup_options: int) -> bool:
|
219
|
-
return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
|
220
|
-
|
221
|
-
@staticmethod
|
222
|
-
def is_lookup_subtypes(lookup_options: int) -> bool:
|
223
|
-
return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
|
224
|
-
|
225
211
|
def get(self, url: str, follow: bool = True,
|
226
212
|
raw: bool = False, database: bool = False, raise_for_status: bool = False, **kwargs) -> OptionalResponse:
|
227
213
|
url = self.url(url, raw, database)
|
@@ -294,6 +280,20 @@ class Portal:
|
|
294
280
|
add_on="check_only=True" if check_only else "")
|
295
281
|
return self.post(f"/{object_type}{'?check_only=True' if check_only else ''}", data).json()
|
296
282
|
|
283
|
+
def delete_metadata(self, object_id: str) -> Optional[dict]:
|
284
|
+
if isinstance(object_id, str) and object_id:
|
285
|
+
if self.key:
|
286
|
+
return delete_metadata(obj_id=object_id, key=self.key)
|
287
|
+
else:
|
288
|
+
return self.patch_metadata(object_id, {"status": "deleted"})
|
289
|
+
return None
|
290
|
+
|
291
|
+
def purge_metadata(self, object_id: str) -> Optional[dict]:
|
292
|
+
if isinstance(object_id, str) and object_id:
|
293
|
+
if self.key:
|
294
|
+
return purge_metadata(obj_id=object_id, key=self.key)
|
295
|
+
return None
|
296
|
+
|
297
297
|
def get_health(self) -> OptionalResponse:
|
298
298
|
return self.get("/health")
|
299
299
|
|
@@ -305,7 +305,10 @@ class Portal:
|
|
305
305
|
|
306
306
|
@lru_cache(maxsize=100)
|
307
307
|
def get_schema(self, schema_name: str) -> Optional[dict]:
|
308
|
-
|
308
|
+
try:
|
309
|
+
return get_schema(self.schema_name(schema_name), portal_vapp=self.vapp, key=self.key)
|
310
|
+
except Exception:
|
311
|
+
return None
|
309
312
|
|
310
313
|
@lru_cache(maxsize=1)
|
311
314
|
def get_schemas(self) -> dict:
|
@@ -416,6 +419,215 @@ class Portal:
|
|
416
419
|
return []
|
417
420
|
return schemas_super_type_map.get(type_name, [])
|
418
421
|
|
422
|
+
@function_cache(maxsize=100, serialize_key=True)
|
423
|
+
def get_identifying_paths(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None,
|
424
|
+
first_only: bool = False,
|
425
|
+
lookup_strategy: Optional[Union[Callable, bool]] = None) -> List[str]:
|
426
|
+
"""
|
427
|
+
Returns the list of the identifying Portal (URL) paths for the given Portal object. Favors any uuid
|
428
|
+
and identifier based paths and defavors aliases based paths (ala self.get_identifying_property_names);
|
429
|
+
no other ordering defined. Returns an empty list if no identifying properties or otherwise not found.
|
430
|
+
Note that this is a newer version of what was in portal_object_utils and just uses the ref_lookup_stratey
|
431
|
+
module directly, as it no longer needs to be exposed (to smaht-portal/ingester and smaht-submitr) and so
|
432
|
+
this is a first step toward internalizing it to structured_data/portal_utils/portal_object_utils usages.
|
433
|
+
"""
|
434
|
+
def is_lookup_specified_type(lookup_options: int) -> bool:
|
435
|
+
return (lookup_options & Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
|
436
|
+
def is_lookup_root(lookup_options: int) -> bool: # noqa
|
437
|
+
return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
|
438
|
+
def is_lookup_root_first(lookup_options: int) -> bool: # noqa
|
439
|
+
return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
|
440
|
+
def is_lookup_subtypes(lookup_options: int) -> bool: # noqa
|
441
|
+
return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
|
442
|
+
|
443
|
+
results = []
|
444
|
+
if not isinstance(portal_object, dict):
|
445
|
+
return results
|
446
|
+
if not (isinstance(portal_type, str) and portal_type):
|
447
|
+
if isinstance(portal_type, dict):
|
448
|
+
# It appears that the given portal_type is an actual schema dictionary.
|
449
|
+
portal_type = self.schema_name(portal_type.get("title"))
|
450
|
+
if not (isinstance(portal_type, str) and portal_type):
|
451
|
+
if not (portal_type := self.get_schema_type(portal_object)):
|
452
|
+
return results
|
453
|
+
if not callable(lookup_strategy):
|
454
|
+
lookup_strategy = None if lookup_strategy is False else Portal._lookup_strategy
|
455
|
+
for identifying_property in self.get_identifying_property_names(portal_type):
|
456
|
+
if not (identifying_value := portal_object.get(identifying_property)):
|
457
|
+
continue
|
458
|
+
# The get_identifying_property_names call above ensures uuid is first if it is in the object.
|
459
|
+
# And also note that ALL schemas do in fact have identifyingProperties which do in fact have
|
460
|
+
# uuid, except for a couple "Test" ones, and (for some reason) SubmittedItem; otherwise we
|
461
|
+
# might have a special case to check the Portal object explicitly for uuid, but no need.
|
462
|
+
if identifying_property == "uuid":
|
463
|
+
#
|
464
|
+
# Note this idiosyncrasy with Portal paths: the only way we do NOT get a (HTTP 301) redirect
|
465
|
+
# is if we use the lower-case-dashed-plural based version of the path, e.g. all of these:
|
466
|
+
#
|
467
|
+
# - /d13d06c1-218e-4f61-aaf0-91f226248b3c
|
468
|
+
# - /d13d06c1-218e-4f61-aaf0-91f226248b3c/
|
469
|
+
# - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c
|
470
|
+
# - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c/
|
471
|
+
# - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c
|
472
|
+
#
|
473
|
+
# Will result in a (HTTP 301) redirect to:
|
474
|
+
#
|
475
|
+
# - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c/
|
476
|
+
#
|
477
|
+
# Unfortunately, this code here has no reasonable way of getting that lower-case-dashed-plural
|
478
|
+
# based name (e.g. file-formats) from the schema/portal type name (e.g. FileFormat); as the
|
479
|
+
# information is contained, for this example, in the snovault.collection decorator for the
|
480
|
+
# endpoint definition in smaht-portal/.../types/file_format.py. Unfortunately merely because
|
481
|
+
# behind-the-scenes an extra round-trip HTTP request will occur, but happens automatically.
|
482
|
+
# And note the disction of just using /{uuid} here rather than /{type}/{uuid} as in the else
|
483
|
+
# statement below is not really necessary; just here for emphasis that this is all that's needed.
|
484
|
+
#
|
485
|
+
if first_only is True:
|
486
|
+
results.append(f"/{portal_type}/{identifying_value}")
|
487
|
+
else:
|
488
|
+
results.append(f"/{identifying_value}")
|
489
|
+
elif isinstance(identifying_value, list):
|
490
|
+
for identifying_value_item in identifying_value:
|
491
|
+
if identifying_value_item:
|
492
|
+
results.append(f"/{portal_type}/{identifying_value_item}")
|
493
|
+
else:
|
494
|
+
lookup_options = Portal.LOOKUP_UNDEFINED
|
495
|
+
if schema := self.get_schema(portal_type):
|
496
|
+
if callable(lookup_strategy):
|
497
|
+
lookup_options, validator = lookup_strategy(self, portal_type, schema, identifying_value)
|
498
|
+
if callable(validator):
|
499
|
+
if validator(schema, identifying_property, identifying_value) is False:
|
500
|
+
continue
|
501
|
+
if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
|
502
|
+
if not re.match(pattern, identifying_value):
|
503
|
+
# If this identifying value is for a (identifying) property which has a
|
504
|
+
# pattern, and the value does NOT match the pattern, then do NOT include
|
505
|
+
# this value as an identifying path, since it cannot possibly be found.
|
506
|
+
continue
|
507
|
+
if lookup_options == Portal.LOOKUP_UNDEFINED:
|
508
|
+
lookup_options = Portal.LOOKUP_DEFAULT
|
509
|
+
if is_lookup_root_first(lookup_options):
|
510
|
+
results.append(f"/{identifying_value}")
|
511
|
+
if is_lookup_specified_type(lookup_options) and portal_type:
|
512
|
+
results.append(f"/{portal_type}/{identifying_value}")
|
513
|
+
if is_lookup_root(lookup_options) and not is_lookup_root_first(lookup_options):
|
514
|
+
results.append(f"/{identifying_value}")
|
515
|
+
if is_lookup_subtypes(lookup_options):
|
516
|
+
for subtype_name in self.get_schema_subtype_names(portal_type):
|
517
|
+
results.append(f"/{subtype_name}/{identifying_value}")
|
518
|
+
if (first_only is True) and results:
|
519
|
+
return results
|
520
|
+
return results
|
521
|
+
|
522
|
+
@function_cache(maxsize=100, serialize_key=True)
|
523
|
+
def get_identifying_path(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None,
|
524
|
+
lookup_strategy: Optional[Union[Callable, bool]] = None) -> Optional[str]:
|
525
|
+
if identifying_paths := self.get_identifying_paths(portal_object, portal_type, first_only=True,
|
526
|
+
lookup_strategy=lookup_strategy):
|
527
|
+
return identifying_paths[0]
|
528
|
+
return None
|
529
|
+
|
530
|
+
@function_cache(maxsize=100, serialize_key=True)
|
531
|
+
def get_identifying_property_names(self, schema: Union[str, dict],
|
532
|
+
portal_object: Optional[dict] = None) -> List[str]:
|
533
|
+
"""
|
534
|
+
Returns the list of identifying property names for the given Portal schema, which may be
|
535
|
+
either a schema name or a schema object. If a Portal object is also given then restricts this
|
536
|
+
set of identifying properties to those which actually have values within this Portal object.
|
537
|
+
Favors the uuid and identifier property names and defavors the aliases property name; no other
|
538
|
+
ordering imposed. Returns empty list if no identifying properties or otherwise not found.
|
539
|
+
"""
|
540
|
+
results = []
|
541
|
+
if isinstance(schema, str):
|
542
|
+
if not (schema := self.get_schema(schema)):
|
543
|
+
return results
|
544
|
+
elif not isinstance(schema, dict):
|
545
|
+
return results
|
546
|
+
if not (identifying_properties := get_identifying_properties(schema)):
|
547
|
+
return results
|
548
|
+
identifying_properties = list(set(identifying_properties)) # paranoid dedup
|
549
|
+
identifying_properties = [*identifying_properties] # copy so as not to change schema if given
|
550
|
+
favored_identifying_properties = ["uuid", "identifier"]
|
551
|
+
defavored_identifying_properties = ["aliases"]
|
552
|
+
for favored_identifying_property in reversed(favored_identifying_properties):
|
553
|
+
if favored_identifying_property in identifying_properties:
|
554
|
+
identifying_properties.remove(favored_identifying_property)
|
555
|
+
identifying_properties.insert(0, favored_identifying_property)
|
556
|
+
for defavored_identifying_property in defavored_identifying_properties:
|
557
|
+
if defavored_identifying_property in identifying_properties:
|
558
|
+
identifying_properties.remove(defavored_identifying_property)
|
559
|
+
identifying_properties.append(defavored_identifying_property)
|
560
|
+
if isinstance(portal_object, dict):
|
561
|
+
for identifying_property in [*identifying_properties]:
|
562
|
+
if portal_object.get(identifying_property) is None:
|
563
|
+
identifying_properties.remove(identifying_property)
|
564
|
+
return identifying_properties
|
565
|
+
|
566
|
+
@staticmethod
|
567
|
+
def _lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
|
568
|
+
#
|
569
|
+
# Note this slightly odd situation WRT object lookups by submitted_id and accession:
|
570
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
571
|
+
# PATH | EXAMPLE | LOOKUP RESULT |
|
572
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
573
|
+
# /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
574
|
+
# /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
575
|
+
# /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
576
|
+
# /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
577
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
578
|
+
# /accession | /SMAFSFXF1RO4 | FOUND |
|
579
|
+
# /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
|
580
|
+
# /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
|
581
|
+
# /File/accession | /File/SMAFSFXF1RO4 | FOUND |
|
582
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
583
|
+
#
|
584
|
+
def ref_validator(schema: Optional[dict],
|
585
|
+
property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
|
586
|
+
"""
|
587
|
+
Returns False iff objects of type represented by the given schema, CANNOT be referenced with
|
588
|
+
a Portal path using the given property name and its given property value, otherwise returns None.
|
589
|
+
|
590
|
+
For example, if the schema is for UnalignedReads and the property name is accession, then we will
|
591
|
+
return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
|
592
|
+
will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
|
593
|
+
will continue executing its default behavior, which is to check other ways in which the given type
|
594
|
+
CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
|
595
|
+
and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
596
|
+
|
597
|
+
The goal (in structured_data) being to detect if a type is being referenced in such a way that
|
598
|
+
CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
|
599
|
+
if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
|
600
|
+
identifying property for the given type.
|
601
|
+
"""
|
602
|
+
if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
|
603
|
+
if (property_format == "accession") and (property_name == "accession"):
|
604
|
+
if not Portal._is_accession_id(property_value):
|
605
|
+
return False
|
606
|
+
return None
|
607
|
+
|
608
|
+
DEFAULT_RESULT = (Portal.LOOKUP_DEFAULT, ref_validator)
|
609
|
+
if not value:
|
610
|
+
return DEFAULT_RESULT
|
611
|
+
if not schema:
|
612
|
+
if not isinstance(portal, Portal) or not (schema := portal.get_schema(type_name)):
|
613
|
+
return DEFAULT_RESULT
|
614
|
+
if schema_properties := schema.get("properties"):
|
615
|
+
if schema_properties.get("accession") and Portal._is_accession_id(value):
|
616
|
+
# Case: lookup by accession (only by root).
|
617
|
+
return (Portal.LOOKUP_ROOT, ref_validator)
|
618
|
+
elif schema_property_info_submitted_id := schema_properties.get("submitted_id"):
|
619
|
+
if schema_property_pattern_submitted_id := schema_property_info_submitted_id.get("pattern"):
|
620
|
+
if re.match(schema_property_pattern_submitted_id, value):
|
621
|
+
# Case: lookup by submitted_id (only by specified type).
|
622
|
+
return (Portal.LOOKUP_SPECIFIED_TYPE, ref_validator)
|
623
|
+
return DEFAULT_RESULT
|
624
|
+
|
625
|
+
@staticmethod
|
626
|
+
def _is_accession_id(value: str) -> bool:
|
627
|
+
# This is here for now because of problems with circular dependencies.
|
628
|
+
# See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
|
629
|
+
return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
|
630
|
+
|
419
631
|
def url(self, url: str, raw: bool = False, database: bool = False) -> str:
|
420
632
|
if not isinstance(url, str) or not url:
|
421
633
|
return "/"
|
@@ -516,6 +728,22 @@ class Portal:
|
|
516
728
|
response = TestResponseWrapper(response)
|
517
729
|
return response
|
518
730
|
|
731
|
+
@staticmethod
|
732
|
+
def _create_vapp(arg: Union[TestApp, VirtualApp, PyramidRouter, str] = None) -> TestApp:
|
733
|
+
if isinstance(arg, TestApp):
|
734
|
+
return arg
|
735
|
+
elif isinstance(arg, VirtualApp):
|
736
|
+
if not isinstance(arg.wrapped_app, TestApp):
|
737
|
+
raise Exception("Portal._create_vapp VirtualApp argument error.")
|
738
|
+
return arg.wrapped_app
|
739
|
+
if isinstance(arg, PyramidRouter):
|
740
|
+
router = arg
|
741
|
+
elif isinstance(arg, str) or not arg:
|
742
|
+
router = pyramid_get_app(arg or "development.ini", "app")
|
743
|
+
else:
|
744
|
+
raise Exception("Portal._create_vapp argument error.")
|
745
|
+
return TestApp(router, {"HTTP_ACCEPT": Portal.MIME_TYPE_JSON, "REMOTE_USER": "TEST"})
|
746
|
+
|
519
747
|
@staticmethod
|
520
748
|
def create_for_testing(arg: Optional[Union[str, bool, List[dict], dict, Callable]] = None) -> Portal:
|
521
749
|
if isinstance(arg, list) or isinstance(arg, dict) or isinstance(arg, Callable):
|
@@ -547,22 +775,6 @@ class Portal:
|
|
547
775
|
with temporary_file(content=minimal_ini_for_testing, suffix=".ini") as ini_file:
|
548
776
|
return Portal(ini_file)
|
549
777
|
|
550
|
-
@staticmethod
|
551
|
-
def _create_vapp(arg: Union[TestApp, VirtualApp, PyramidRouter, str] = None) -> TestApp:
|
552
|
-
if isinstance(arg, TestApp):
|
553
|
-
return arg
|
554
|
-
elif isinstance(arg, VirtualApp):
|
555
|
-
if not isinstance(arg.wrapped_app, TestApp):
|
556
|
-
raise Exception("Portal._create_vapp VirtualApp argument error.")
|
557
|
-
return arg.wrapped_app
|
558
|
-
if isinstance(arg, PyramidRouter):
|
559
|
-
router = arg
|
560
|
-
elif isinstance(arg, str) or not arg:
|
561
|
-
router = pyramid_get_app(arg or "development.ini", "app")
|
562
|
-
else:
|
563
|
-
raise Exception("Portal._create_vapp argument error.")
|
564
|
-
return TestApp(router, {"HTTP_ACCEPT": Portal.MIME_TYPE_JSON, "REMOTE_USER": "TEST"})
|
565
|
-
|
566
778
|
@staticmethod
|
567
779
|
def _create_router_for_testing(endpoints: Optional[List[Dict[str, Union[str, Callable]]]] = None) -> PyramidRouter:
|
568
780
|
if isinstance(endpoints, dict):
|
dcicutils/schema_utils.py
CHANGED
@@ -190,7 +190,7 @@ def get_one_of_formats(schema: Dict[str, Any]) -> List[str]:
|
|
190
190
|
|
191
191
|
def is_link(property_schema: Dict[str, Any]) -> bool:
|
192
192
|
"""Is property schema a link?"""
|
193
|
-
return property_schema.get(SchemaConstants.LINK_TO
|
193
|
+
return bool(property_schema.get(SchemaConstants.LINK_TO))
|
194
194
|
|
195
195
|
|
196
196
|
def get_enum(property_schema: Dict[str, Any]) -> List[str]:
|