dcicutils 8.8.6.1b11__tar.gz → 8.9.0__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/PKG-INFO +1 -1
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/portal_object_utils.py +89 -24
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/portal_utils.py +36 -234
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/schema_utils.py +16 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/structured_data.py +11 -32
- dcicutils-8.9.0/dcicutils/submitr/ref_lookup_strategy.py +67 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/pyproject.toml +1 -1
- dcicutils-8.8.6.1b11/dcicutils/submitr/ref_lookup_strategy.py +0 -73
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/LICENSE.txt +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/README.rst +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/__init__.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/base.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/beanstalk_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/bundle_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/captured_output.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/cloudformation_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/codebuild_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/command_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/common.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/contribution_scripts.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/contribution_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/creds_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/data_readers.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/data_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/datetime_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/deployment_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/diff_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/docker_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/ecr_scripts.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/ecr_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/ecs_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/env_base.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/env_manager.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/env_scripts.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/env_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/env_utils_legacy.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/es_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/exceptions.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/ff_mocks.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/ff_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/file_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/function_cache_decorator.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/glacier_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/http_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/jh_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/kibana/dashboards.json +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/kibana/readme.md +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/lang_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/license_policies/c4-infrastructure.jsonc +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/license_policies/c4-python-infrastructure.jsonc +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/license_policies/park-lab-common-server.jsonc +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/license_policies/park-lab-common.jsonc +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/license_policies/park-lab-pipeline.jsonc +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/license_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/log_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/misc_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/obfuscation_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/opensearch_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/progress_bar.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/project_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/qa_checkers.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/qa_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/redis_tools.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/redis_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/s3_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/scripts/publish_to_pypi.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/scripts/run_license_checker.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/scripts/view_portal_object.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/secrets_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/sheet_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/snapshot_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/ssl_certificate_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/submitr/progress_constants.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/task_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/tmpfile_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/trace_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/validation_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/variant_utils.py +0 -0
- {dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/zip_utils.py +0 -0
@@ -1,5 +1,6 @@
|
|
1
1
|
from copy import deepcopy
|
2
2
|
from functools import lru_cache
|
3
|
+
import re
|
3
4
|
from typing import Any, Callable, List, Optional, Tuple, Type, Union
|
4
5
|
from dcicutils.data_readers import RowReader
|
5
6
|
from dcicutils.misc_utils import create_readonly_object
|
@@ -13,9 +14,11 @@ class PortalObject:
|
|
13
14
|
|
14
15
|
_PROPERTY_DELETION_SENTINEL = RowReader.CELL_DELETION_SENTINEL
|
15
16
|
|
16
|
-
def __init__(self, data: dict, portal:
|
17
|
+
def __init__(self, data: dict, portal: Portal = None,
|
18
|
+
schema: Optional[Union[dict, Schema]] = None, type: Optional[str] = None) -> None:
|
17
19
|
self._data = data if isinstance(data, dict) else {}
|
18
20
|
self._portal = portal if isinstance(portal, Portal) else None
|
21
|
+
self._schema = schema if isinstance(schema, dict) else (schema.data if isinstance(schema, Schema) else None)
|
19
22
|
self._type = type if isinstance(type, str) else ""
|
20
23
|
|
21
24
|
@property
|
@@ -29,7 +32,7 @@ class PortalObject:
|
|
29
32
|
@property
|
30
33
|
@lru_cache(maxsize=1)
|
31
34
|
def type(self) -> str:
|
32
|
-
return self._type or Portal.get_schema_type(self._data) or ""
|
35
|
+
return self._type or Portal.get_schema_type(self._data) or (Schema(self._schema).type if self._schema else "")
|
33
36
|
|
34
37
|
@property
|
35
38
|
@lru_cache(maxsize=1)
|
@@ -44,7 +47,7 @@ class PortalObject:
|
|
44
47
|
@property
|
45
48
|
@lru_cache(maxsize=1)
|
46
49
|
def schema(self) -> Optional[dict]:
|
47
|
-
return self._portal.get_schema(self.type) if self._portal else None
|
50
|
+
return self._schema if self._schema else (self._portal.get_schema(self.type) if self._portal else None)
|
48
51
|
|
49
52
|
def copy(self) -> PortalObject:
|
50
53
|
return PortalObject(deepcopy(self.data), portal=self.portal, type=self.type)
|
@@ -56,29 +59,39 @@ class PortalObject:
|
|
56
59
|
Returns the list of all identifying property names of this Portal object which actually have values.
|
57
60
|
Implicitly include "uuid" and "identifier" properties as identifying properties if they are actually
|
58
61
|
properties in the object schema, and favor these (first); defavor "aliases"; no other ordering defined.
|
59
|
-
Changed (2024-05-26) to use portal_utils.get_identifying_property_names; migrating some intricate stuff there.
|
60
62
|
"""
|
61
|
-
|
62
|
-
|
63
|
+
if not (schema := self.schema) or not (schema_identifying_properties := schema.get("identifyingProperties")):
|
64
|
+
return None
|
65
|
+
identifying_properties = []
|
66
|
+
for identifying_property in schema_identifying_properties:
|
67
|
+
if identifying_property not in ["uuid", "identifier", "aliases"]:
|
68
|
+
if self._data.get(identifying_property):
|
69
|
+
identifying_properties.append(identifying_property)
|
70
|
+
if self._data.get("identifier"):
|
71
|
+
identifying_properties.insert(0, "identifier")
|
72
|
+
if self._data.get("uuid"):
|
73
|
+
identifying_properties.insert(0, "uuid")
|
74
|
+
if "aliases" in schema_identifying_properties and self._data.get("aliases"):
|
75
|
+
identifying_properties.append("aliases")
|
76
|
+
return identifying_properties or None
|
63
77
|
|
64
78
|
@lru_cache(maxsize=8192)
|
65
79
|
def lookup(self, raw: bool = False,
|
66
80
|
ref_lookup_strategy: Optional[Callable] = None) -> Tuple[Optional[PortalObject], Optional[str], int]:
|
67
|
-
if not (identifying_paths := self._get_identifying_paths(ref_lookup_strategy=ref_lookup_strategy)):
|
68
|
-
return None, None, 0
|
69
81
|
nlookups = 0
|
70
82
|
first_identifying_path = None
|
71
83
|
try:
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
84
|
+
if identifying_paths := self._get_identifying_paths(ref_lookup_strategy=ref_lookup_strategy):
|
85
|
+
for identifying_path in identifying_paths:
|
86
|
+
if not first_identifying_path:
|
87
|
+
first_identifying_path = identifying_path
|
88
|
+
nlookups += 1
|
89
|
+
if (value := self._portal.get(identifying_path, raw=raw)) and (value.status_code == 200):
|
90
|
+
return (
|
91
|
+
PortalObject(value.json(), portal=self._portal, type=self.type if raw else None),
|
92
|
+
identifying_path,
|
93
|
+
nlookups
|
94
|
+
)
|
82
95
|
except Exception:
|
83
96
|
pass
|
84
97
|
return None, first_identifying_path, nlookups
|
@@ -146,12 +159,64 @@ class PortalObject:
|
|
146
159
|
|
147
160
|
@lru_cache(maxsize=1)
|
148
161
|
def _get_identifying_paths(self, ref_lookup_strategy: Optional[Callable] = None) -> Optional[List[str]]:
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
162
|
+
"""
|
163
|
+
Returns a list of the possible Portal URL paths identifying this Portal object.
|
164
|
+
"""
|
165
|
+
identifying_paths = []
|
166
|
+
if not (identifying_properties := self.identifying_properties):
|
167
|
+
if self.uuid:
|
168
|
+
if self.type:
|
169
|
+
identifying_paths.append(f"/{self.type}/{self.uuid}")
|
170
|
+
identifying_paths.append(f"/{self.uuid}")
|
171
|
+
return identifying_paths
|
172
|
+
for identifying_property in identifying_properties:
|
173
|
+
if identifying_value := self._data.get(identifying_property):
|
174
|
+
if identifying_property == "uuid":
|
175
|
+
if self.type:
|
176
|
+
identifying_paths.append(f"/{self.type}/{identifying_value}")
|
177
|
+
identifying_paths.append(f"/{identifying_value}")
|
178
|
+
# For now at least we include the path both with and without the schema type component,
|
179
|
+
# as for some identifying values, it works (only) with, and some, it works (only) without.
|
180
|
+
# For example: If we have FileSet with "accession", an identifying property, with value
|
181
|
+
# SMAFSFXF1RO4 then /SMAFSFXF1RO4 works but /FileSet/SMAFSFXF1RO4 does not; and
|
182
|
+
# conversely using "submitted_id", also an identifying property, with value
|
183
|
+
# UW_FILE-SET_COLO-829BL_HI-C_1 then /UW_FILE-SET_COLO-829BL_HI-C_1 does
|
184
|
+
# not work but /FileSet/UW_FILE-SET_COLO-829BL_HI-C_1 does work.
|
185
|
+
elif isinstance(identifying_value, list):
|
186
|
+
for identifying_value_item in identifying_value:
|
187
|
+
if self.type:
|
188
|
+
identifying_paths.append(f"/{self.type}/{identifying_value_item}")
|
189
|
+
identifying_paths.append(f"/{identifying_value_item}")
|
190
|
+
else:
|
191
|
+
# TODO: Import from somewhere ...
|
192
|
+
lookup_options = 0
|
193
|
+
if schema := self.schema:
|
194
|
+
# TODO: Hook into the ref_lookup_strategy thing in structured_data to make
|
195
|
+
# sure we check accession format (since it does not have a pattern).
|
196
|
+
if callable(ref_lookup_strategy):
|
197
|
+
lookup_options, ref_validator = ref_lookup_strategy(
|
198
|
+
self._portal, self.type, schema, identifying_value)
|
199
|
+
if callable(ref_validator):
|
200
|
+
if ref_validator(schema, identifying_property, identifying_value) is False:
|
201
|
+
continue
|
202
|
+
if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
|
203
|
+
if not re.match(pattern, identifying_value):
|
204
|
+
# If this identifying value is for a (identifying) property which has a
|
205
|
+
# pattern, and the value does NOT match the pattern, then do NOT include
|
206
|
+
# this value as an identifying path, since it cannot possibly be found.
|
207
|
+
continue
|
208
|
+
if not lookup_options:
|
209
|
+
lookup_options = Portal.LOOKUP_DEFAULT
|
210
|
+
if Portal.is_lookup_root_first(lookup_options):
|
211
|
+
identifying_paths.append(f"/{identifying_value}")
|
212
|
+
if Portal.is_lookup_specified_type(lookup_options) and self.type:
|
213
|
+
identifying_paths.append(f"/{self.type}/{identifying_value}")
|
214
|
+
if Portal.is_lookup_root(lookup_options) and not Portal.is_lookup_root_first(lookup_options):
|
215
|
+
identifying_paths.append(f"/{identifying_value}")
|
216
|
+
if Portal.is_lookup_subtypes(lookup_options):
|
217
|
+
for subtype_name in self._portal.get_schema_subtype_names(self.type):
|
218
|
+
identifying_paths.append(f"/{subtype_name}/{identifying_value}")
|
219
|
+
return identifying_paths or None
|
155
220
|
|
156
221
|
def _normalized_refs(self, refs: List[dict]) -> Tuple[PortalObject, int]:
|
157
222
|
"""
|
@@ -1,6 +1,5 @@
|
|
1
1
|
from collections import deque
|
2
2
|
from functools import lru_cache
|
3
|
-
from dcicutils.function_cache_decorator import function_cache
|
4
3
|
import io
|
5
4
|
import json
|
6
5
|
from pyramid.config import Configurator as PyramidConfigurator
|
@@ -19,7 +18,6 @@ from wsgiref.simple_server import make_server as wsgi_make_server
|
|
19
18
|
from dcicutils.common import APP_SMAHT, OrchestratedApp, ORCHESTRATED_APPS
|
20
19
|
from dcicutils.ff_utils import get_metadata, get_schema, patch_metadata, post_metadata
|
21
20
|
from dcicutils.misc_utils import to_camel_case, VirtualApp
|
22
|
-
from dcicutils.schema_utils import get_identifying_properties
|
23
21
|
from dcicutils.tmpfile_utils import temporary_file
|
24
22
|
|
25
23
|
Portal = Type["Portal"] # Forward type reference for type hints.
|
@@ -50,16 +48,15 @@ class Portal:
|
|
50
48
|
FILE_TYPE_SCHEMA_NAME = "File"
|
51
49
|
|
52
50
|
# Object lookup strategies; on a per-reference (type/value) basis, used currently ONLY by
|
53
|
-
# structured_data.py; controlled by an optional
|
51
|
+
# structured_data.py; controlled by an optional ref_lookup_strategy callable; default is
|
54
52
|
# lookup at root path but after the specified type path lookup, and then lookup all subtypes;
|
55
53
|
# can choose to lookup root path first, or not lookup root path at all, or not lookup
|
56
|
-
# subtypes at all; the
|
54
|
+
# subtypes at all; the ref_lookup_strategy callable if specified should take a type_name
|
57
55
|
# and value (string) arguements and return an integer of any of the below ORed together.
|
58
56
|
# The main purpose of this is optimization; to minimize portal lookups; since for example,
|
59
57
|
# currently at least, /{type}/{accession} does not work but /{accession} does; so we
|
60
58
|
# currently (smaht-portal/.../ingestion_processors) use LOOKUP_ROOT_FIRST for this.
|
61
59
|
# And current usage NEVER has LOOKUP_SUBTYPES turned OFF; but support just in case.
|
62
|
-
LOOKUP_UNDEFINED = 0
|
63
60
|
LOOKUP_SPECIFIED_TYPE = 0x0001
|
64
61
|
LOOKUP_ROOT = 0x0002
|
65
62
|
LOOKUP_ROOT_FIRST = 0x0004 | LOOKUP_ROOT
|
@@ -208,6 +205,23 @@ class Portal:
|
|
208
205
|
def vapp(self) -> Optional[TestApp]:
|
209
206
|
return self._vapp
|
210
207
|
|
208
|
+
@staticmethod
|
209
|
+
def is_lookup_specified_type(lookup_options: int) -> bool:
|
210
|
+
return (lookup_options &
|
211
|
+
Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
|
212
|
+
|
213
|
+
@staticmethod
|
214
|
+
def is_lookup_root(lookup_options: int) -> bool:
|
215
|
+
return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
|
216
|
+
|
217
|
+
@staticmethod
|
218
|
+
def is_lookup_root_first(lookup_options: int) -> bool:
|
219
|
+
return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
|
220
|
+
|
221
|
+
@staticmethod
|
222
|
+
def is_lookup_subtypes(lookup_options: int) -> bool:
|
223
|
+
return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
|
224
|
+
|
211
225
|
def get(self, url: str, follow: bool = True,
|
212
226
|
raw: bool = False, database: bool = False, raise_for_status: bool = False, **kwargs) -> OptionalResponse:
|
213
227
|
url = self.url(url, raw, database)
|
@@ -291,10 +305,7 @@ class Portal:
|
|
291
305
|
|
292
306
|
@lru_cache(maxsize=100)
|
293
307
|
def get_schema(self, schema_name: str) -> Optional[dict]:
|
294
|
-
|
295
|
-
return get_schema(self.schema_name(schema_name), portal_vapp=self.vapp, key=self.key)
|
296
|
-
except Exception:
|
297
|
-
return None
|
308
|
+
return get_schema(self.schema_name(schema_name), portal_vapp=self.vapp, key=self.key)
|
298
309
|
|
299
310
|
@lru_cache(maxsize=1)
|
300
311
|
def get_schemas(self) -> dict:
|
@@ -405,215 +416,6 @@ class Portal:
|
|
405
416
|
return []
|
406
417
|
return schemas_super_type_map.get(type_name, [])
|
407
418
|
|
408
|
-
@function_cache(maxsize=100, serialize_key=True)
|
409
|
-
def get_identifying_paths(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None,
|
410
|
-
first_only: bool = False,
|
411
|
-
lookup_strategy: Optional[Union[Callable, bool]] = None) -> List[str]:
|
412
|
-
"""
|
413
|
-
Returns the list of the identifying Portal (URL) paths for the given Portal object. Favors any uuid
|
414
|
-
and identifier based paths and defavors aliases based paths (ala self.get_identifying_property_names);
|
415
|
-
no other ordering defined. Returns an empty list if no identifying properties or otherwise not found.
|
416
|
-
Note that this is a newer version of what was in portal_object_utils and just uses the ref_lookup_stratey
|
417
|
-
module directly, as it no longer needs to be exposed (to smaht-portal/ingester and smaht-submitr) and so
|
418
|
-
this is a first step toward internalizing it to structured_data/portal_utils/portal_object_utils usages.
|
419
|
-
"""
|
420
|
-
def is_lookup_specified_type(lookup_options: int) -> bool:
|
421
|
-
return (lookup_options & Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
|
422
|
-
def is_lookup_root(lookup_options: int) -> bool: # noqa
|
423
|
-
return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
|
424
|
-
def is_lookup_root_first(lookup_options: int) -> bool: # noqa
|
425
|
-
return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
|
426
|
-
def is_lookup_subtypes(lookup_options: int) -> bool: # noqa
|
427
|
-
return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
|
428
|
-
|
429
|
-
results = []
|
430
|
-
if not isinstance(portal_object, dict):
|
431
|
-
return results
|
432
|
-
if not (isinstance(portal_type, str) and portal_type):
|
433
|
-
if isinstance(portal_type, dict):
|
434
|
-
# It appears that the given portal_type is an actual schema dictionary.
|
435
|
-
portal_type = self.schema_name(portal_type.get("title"))
|
436
|
-
if not (isinstance(portal_type, str) and portal_type):
|
437
|
-
if not (portal_type := self.get_schema_type(portal_object)):
|
438
|
-
return results
|
439
|
-
if not callable(lookup_strategy):
|
440
|
-
lookup_strategy = None if lookup_strategy is False else Portal._lookup_strategy
|
441
|
-
for identifying_property in self.get_identifying_property_names(portal_type):
|
442
|
-
if not (identifying_value := portal_object.get(identifying_property)):
|
443
|
-
continue
|
444
|
-
# The get_identifying_property_names call above ensures uuid is first if it is in the object.
|
445
|
-
# And also note that ALL schemas do in fact have identifyingProperties which do in fact have
|
446
|
-
# uuid, except for a couple "Test" ones, and (for some reason) SubmittedItem; otherwise we
|
447
|
-
# might have a special case to check the Portal object explicitly for uuid, but no need.
|
448
|
-
if identifying_property == "uuid":
|
449
|
-
#
|
450
|
-
# Note this idiosyncrasy with Portal paths: the only way we do NOT get a (HTTP 301) redirect
|
451
|
-
# is if we use the lower-case-dashed-plural based version of the path, e.g. all of these:
|
452
|
-
#
|
453
|
-
# - /d13d06c1-218e-4f61-aaf0-91f226248b3c
|
454
|
-
# - /d13d06c1-218e-4f61-aaf0-91f226248b3c/
|
455
|
-
# - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c
|
456
|
-
# - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c/
|
457
|
-
# - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c
|
458
|
-
#
|
459
|
-
# Will result in a (HTTP 301) redirect to:
|
460
|
-
#
|
461
|
-
# - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c/
|
462
|
-
#
|
463
|
-
# Unfortunately, this code here has no reasonable way of getting that lower-case-dashed-plural
|
464
|
-
# based name (e.g. file-formats) from the schema/portal type name (e.g. FileFormat); as the
|
465
|
-
# information is contained, for this example, in the snovault.collection decorator for the
|
466
|
-
# endpoint definition in smaht-portal/.../types/file_format.py. Unfortunately merely because
|
467
|
-
# behind-the-scenes an extra round-trip HTTP request will occur, but happens automatically.
|
468
|
-
# And note the disction of just using /{uuid} here rather than /{type}/{uuid} as in the else
|
469
|
-
# statement below is not really necessary; just here for emphasis that this is all that's needed.
|
470
|
-
#
|
471
|
-
if first_only is True:
|
472
|
-
results.append(f"/{portal_type}/{identifying_value}")
|
473
|
-
else:
|
474
|
-
results.append(f"/{identifying_value}")
|
475
|
-
elif isinstance(identifying_value, list):
|
476
|
-
for identifying_value_item in identifying_value:
|
477
|
-
if identifying_value_item:
|
478
|
-
results.append(f"/{portal_type}/{identifying_value_item}")
|
479
|
-
else:
|
480
|
-
lookup_options = Portal.LOOKUP_UNDEFINED
|
481
|
-
if schema := self.get_schema(portal_type):
|
482
|
-
if callable(lookup_strategy):
|
483
|
-
lookup_options, validator = lookup_strategy(self, portal_type, schema, identifying_value)
|
484
|
-
if callable(validator):
|
485
|
-
if validator(schema, identifying_property, identifying_value) is False:
|
486
|
-
continue
|
487
|
-
if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
|
488
|
-
if not re.match(pattern, identifying_value):
|
489
|
-
# If this identifying value is for a (identifying) property which has a
|
490
|
-
# pattern, and the value does NOT match the pattern, then do NOT include
|
491
|
-
# this value as an identifying path, since it cannot possibly be found.
|
492
|
-
continue
|
493
|
-
if lookup_options == Portal.LOOKUP_UNDEFINED:
|
494
|
-
lookup_options = Portal.LOOKUP_DEFAULT
|
495
|
-
if is_lookup_root_first(lookup_options):
|
496
|
-
results.append(f"/{identifying_value}")
|
497
|
-
if is_lookup_specified_type(lookup_options) and portal_type:
|
498
|
-
results.append(f"/{portal_type}/{identifying_value}")
|
499
|
-
if is_lookup_root(lookup_options) and not is_lookup_root_first(lookup_options):
|
500
|
-
results.append(f"/{identifying_value}")
|
501
|
-
if is_lookup_subtypes(lookup_options):
|
502
|
-
for subtype_name in self.get_schema_subtype_names(portal_type):
|
503
|
-
results.append(f"/{subtype_name}/{identifying_value}")
|
504
|
-
if (first_only is True) and results:
|
505
|
-
return results
|
506
|
-
return results
|
507
|
-
|
508
|
-
@function_cache(maxsize=100, serialize_key=True)
|
509
|
-
def get_identifying_path(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None,
|
510
|
-
lookup_strategy: Optional[Union[Callable, bool]] = None) -> Optional[str]:
|
511
|
-
if identifying_paths := self.get_identifying_paths(portal_object, portal_type, first_only=True,
|
512
|
-
lookup_strategy=lookup_strategy):
|
513
|
-
return identifying_paths[0]
|
514
|
-
return None
|
515
|
-
|
516
|
-
@function_cache(maxsize=100, serialize_key=True)
|
517
|
-
def get_identifying_property_names(self, schema: Union[str, dict],
|
518
|
-
portal_object: Optional[dict] = None) -> List[str]:
|
519
|
-
"""
|
520
|
-
Returns the list of identifying property names for the given Portal schema, which may be
|
521
|
-
either a schema name or a schema object. If a Portal object is also given then restricts this
|
522
|
-
set of identifying properties to those which actually have values within this Portal object.
|
523
|
-
Favors the uuid and identifier property names and defavors the aliases property name; no other
|
524
|
-
ordering imposed. Returns empty list if no identifying properties or otherwise not found.
|
525
|
-
"""
|
526
|
-
results = []
|
527
|
-
if isinstance(schema, str):
|
528
|
-
if not (schema := self.get_schema(schema)):
|
529
|
-
return results
|
530
|
-
elif not isinstance(schema, dict):
|
531
|
-
return results
|
532
|
-
if not (identifying_properties := get_identifying_properties(schema)):
|
533
|
-
return results
|
534
|
-
identifying_properties = list(set(identifying_properties)) # paranoid dedup
|
535
|
-
identifying_properties = [*identifying_properties] # copy so as not to change schema if given
|
536
|
-
favored_identifying_properties = ["uuid", "identifier"]
|
537
|
-
defavored_identifying_properties = ["aliases"]
|
538
|
-
for favored_identifying_property in reversed(favored_identifying_properties):
|
539
|
-
if favored_identifying_property in identifying_properties:
|
540
|
-
identifying_properties.remove(favored_identifying_property)
|
541
|
-
identifying_properties.insert(0, favored_identifying_property)
|
542
|
-
for defavored_identifying_property in defavored_identifying_properties:
|
543
|
-
if defavored_identifying_property in identifying_properties:
|
544
|
-
identifying_properties.remove(defavored_identifying_property)
|
545
|
-
identifying_properties.append(defavored_identifying_property)
|
546
|
-
if isinstance(portal_object, dict):
|
547
|
-
for identifying_property in [*identifying_properties]:
|
548
|
-
if portal_object.get(identifying_property) is None:
|
549
|
-
identifying_properties.remove(identifying_property)
|
550
|
-
return identifying_properties
|
551
|
-
|
552
|
-
@staticmethod
|
553
|
-
def _lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
|
554
|
-
#
|
555
|
-
# Note this slightly odd situation WRT object lookups by submitted_id and accession:
|
556
|
-
# -----------------------------+-----------------------------------------------+---------------+
|
557
|
-
# PATH | EXAMPLE | LOOKUP RESULT |
|
558
|
-
# -----------------------------+-----------------------------------------------+---------------+
|
559
|
-
# /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
560
|
-
# /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
561
|
-
# /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
562
|
-
# /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
563
|
-
# -----------------------------+-----------------------------------------------+---------------+
|
564
|
-
# /accession | /SMAFSFXF1RO4 | FOUND |
|
565
|
-
# /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
|
566
|
-
# /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
|
567
|
-
# /File/accession | /File/SMAFSFXF1RO4 | FOUND |
|
568
|
-
# -----------------------------+-----------------------------------------------+---------------+
|
569
|
-
#
|
570
|
-
def ref_validator(schema: Optional[dict],
|
571
|
-
property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
|
572
|
-
"""
|
573
|
-
Returns False iff objects of type represented by the given schema, CANNOT be referenced with
|
574
|
-
a Portal path using the given property name and its given property value, otherwise returns None.
|
575
|
-
|
576
|
-
For example, if the schema is for UnalignedReads and the property name is accession, then we will
|
577
|
-
return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
|
578
|
-
will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
|
579
|
-
will continue executing its default behavior, which is to check other ways in which the given type
|
580
|
-
CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
|
581
|
-
and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
582
|
-
|
583
|
-
The goal (in structured_data) being to detect if a type is being referenced in such a way that
|
584
|
-
CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
|
585
|
-
if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
|
586
|
-
identifying property for the given type.
|
587
|
-
"""
|
588
|
-
if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
|
589
|
-
if (property_format == "accession") and (property_name == "accession"):
|
590
|
-
if not Portal._is_accession_id(property_value):
|
591
|
-
return False
|
592
|
-
return None
|
593
|
-
|
594
|
-
DEFAULT_RESULT = (Portal.LOOKUP_DEFAULT, ref_validator)
|
595
|
-
if not value:
|
596
|
-
return DEFAULT_RESULT
|
597
|
-
if not schema:
|
598
|
-
if not isinstance(portal, Portal) or not (schema := portal.get_schema(type_name)):
|
599
|
-
return DEFAULT_RESULT
|
600
|
-
if schema_properties := schema.get("properties"):
|
601
|
-
if schema_properties.get("accession") and Portal._is_accession_id(value):
|
602
|
-
# Case: lookup by accession (only by root).
|
603
|
-
return (Portal.LOOKUP_ROOT, ref_validator)
|
604
|
-
elif schema_property_info_submitted_id := schema_properties.get("submitted_id"):
|
605
|
-
if schema_property_pattern_submitted_id := schema_property_info_submitted_id.get("pattern"):
|
606
|
-
if re.match(schema_property_pattern_submitted_id, value):
|
607
|
-
# Case: lookup by submitted_id (only by specified type).
|
608
|
-
return (Portal.LOOKUP_SPECIFIED_TYPE, ref_validator)
|
609
|
-
return DEFAULT_RESULT
|
610
|
-
|
611
|
-
@staticmethod
|
612
|
-
def _is_accession_id(value: str) -> bool:
|
613
|
-
# This is here for now because of problems with circular dependencies.
|
614
|
-
# See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
|
615
|
-
return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
|
616
|
-
|
617
419
|
def url(self, url: str, raw: bool = False, database: bool = False) -> str:
|
618
420
|
if not isinstance(url, str) or not url:
|
619
421
|
return "/"
|
@@ -714,22 +516,6 @@ class Portal:
|
|
714
516
|
response = TestResponseWrapper(response)
|
715
517
|
return response
|
716
518
|
|
717
|
-
@staticmethod
|
718
|
-
def _create_vapp(arg: Union[TestApp, VirtualApp, PyramidRouter, str] = None) -> TestApp:
|
719
|
-
if isinstance(arg, TestApp):
|
720
|
-
return arg
|
721
|
-
elif isinstance(arg, VirtualApp):
|
722
|
-
if not isinstance(arg.wrapped_app, TestApp):
|
723
|
-
raise Exception("Portal._create_vapp VirtualApp argument error.")
|
724
|
-
return arg.wrapped_app
|
725
|
-
if isinstance(arg, PyramidRouter):
|
726
|
-
router = arg
|
727
|
-
elif isinstance(arg, str) or not arg:
|
728
|
-
router = pyramid_get_app(arg or "development.ini", "app")
|
729
|
-
else:
|
730
|
-
raise Exception("Portal._create_vapp argument error.")
|
731
|
-
return TestApp(router, {"HTTP_ACCEPT": Portal.MIME_TYPE_JSON, "REMOTE_USER": "TEST"})
|
732
|
-
|
733
519
|
@staticmethod
|
734
520
|
def create_for_testing(arg: Optional[Union[str, bool, List[dict], dict, Callable]] = None) -> Portal:
|
735
521
|
if isinstance(arg, list) or isinstance(arg, dict) or isinstance(arg, Callable):
|
@@ -761,6 +547,22 @@ class Portal:
|
|
761
547
|
with temporary_file(content=minimal_ini_for_testing, suffix=".ini") as ini_file:
|
762
548
|
return Portal(ini_file)
|
763
549
|
|
550
|
+
@staticmethod
|
551
|
+
def _create_vapp(arg: Union[TestApp, VirtualApp, PyramidRouter, str] = None) -> TestApp:
|
552
|
+
if isinstance(arg, TestApp):
|
553
|
+
return arg
|
554
|
+
elif isinstance(arg, VirtualApp):
|
555
|
+
if not isinstance(arg.wrapped_app, TestApp):
|
556
|
+
raise Exception("Portal._create_vapp VirtualApp argument error.")
|
557
|
+
return arg.wrapped_app
|
558
|
+
if isinstance(arg, PyramidRouter):
|
559
|
+
router = arg
|
560
|
+
elif isinstance(arg, str) or not arg:
|
561
|
+
router = pyramid_get_app(arg or "development.ini", "app")
|
562
|
+
else:
|
563
|
+
raise Exception("Portal._create_vapp argument error.")
|
564
|
+
return TestApp(router, {"HTTP_ACCEPT": Portal.MIME_TYPE_JSON, "REMOTE_USER": "TEST"})
|
565
|
+
|
764
566
|
@staticmethod
|
765
567
|
def _create_router_for_testing(endpoints: Optional[List[Dict[str, Union[str, Callable]]]] = None) -> PyramidRouter:
|
766
568
|
if isinstance(endpoints, dict):
|
@@ -24,6 +24,7 @@ class JsonSchemaConstants:
|
|
24
24
|
|
25
25
|
|
26
26
|
class EncodedSchemaConstants:
|
27
|
+
DESCRIPTION = "description"
|
27
28
|
IDENTIFYING_PROPERTIES = "identifyingProperties"
|
28
29
|
LINK_TO = "linkTo"
|
29
30
|
MERGE_REF = "$merge"
|
@@ -187,6 +188,21 @@ def get_one_of_formats(schema: Dict[str, Any]) -> List[str]:
|
|
187
188
|
]
|
188
189
|
|
189
190
|
|
191
|
+
def is_link(property_schema: Dict[str, Any]) -> bool:
|
192
|
+
"""Is property schema a link?"""
|
193
|
+
return bool(property_schema.get(SchemaConstants.LINK_TO))
|
194
|
+
|
195
|
+
|
196
|
+
def get_enum(property_schema: Dict[str, Any]) -> List[str]:
|
197
|
+
"""Return the enum of a property schema."""
|
198
|
+
return property_schema.get(SchemaConstants.ENUM, [])
|
199
|
+
|
200
|
+
|
201
|
+
def get_description(schema: Dict[str, Any]) -> str:
|
202
|
+
"""Return the description of a schema."""
|
203
|
+
return schema.get(SchemaConstants.DESCRIPTION, "")
|
204
|
+
|
205
|
+
|
190
206
|
class Schema:
|
191
207
|
|
192
208
|
def __init__(self, schema: dict, type: Optional[str] = None) -> None:
|
@@ -56,7 +56,7 @@ class StructuredDataSet:
|
|
56
56
|
remove_empty_objects_from_lists: bool = True,
|
57
57
|
ref_lookup_strategy: Optional[Callable] = None,
|
58
58
|
ref_lookup_nocache: bool = False,
|
59
|
-
norefs: bool = False,
|
59
|
+
norefs: bool = False,
|
60
60
|
progress: Optional[Callable] = None,
|
61
61
|
debug_sleep: Optional[str] = None) -> None:
|
62
62
|
self._progress = progress if callable(progress) else None
|
@@ -75,7 +75,6 @@ class StructuredDataSet:
|
|
75
75
|
self._nrows = 0
|
76
76
|
self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None
|
77
77
|
self._norefs = True if norefs is True else False
|
78
|
-
self._merge = True if merge is True else False
|
79
78
|
self._debug_sleep = None
|
80
79
|
if debug_sleep:
|
81
80
|
try:
|
@@ -99,13 +98,13 @@ class StructuredDataSet:
|
|
99
98
|
remove_empty_objects_from_lists: bool = True,
|
100
99
|
ref_lookup_strategy: Optional[Callable] = None,
|
101
100
|
ref_lookup_nocache: bool = False,
|
102
|
-
norefs: bool = False,
|
101
|
+
norefs: bool = False,
|
103
102
|
progress: Optional[Callable] = None,
|
104
103
|
debug_sleep: Optional[str] = None) -> StructuredDataSet:
|
105
104
|
return StructuredDataSet(file=file, portal=portal, schemas=schemas, autoadd=autoadd, order=order, prune=prune,
|
106
105
|
remove_empty_objects_from_lists=remove_empty_objects_from_lists,
|
107
106
|
ref_lookup_strategy=ref_lookup_strategy, ref_lookup_nocache=ref_lookup_nocache,
|
108
|
-
norefs=norefs,
|
107
|
+
norefs=norefs, progress=progress, debug_sleep=debug_sleep)
|
109
108
|
|
110
109
|
def validate(self, force: bool = False) -> None:
|
111
110
|
def data_without_deleted_properties(data: dict) -> dict:
|
@@ -351,23 +350,18 @@ class StructuredDataSet:
|
|
351
350
|
|
352
351
|
def _load_json_file(self, file: str) -> None:
|
353
352
|
with open(file) as f:
|
354
|
-
|
355
|
-
|
356
|
-
|
353
|
+
file_json = json.load(f)
|
354
|
+
schema_inferred_from_file_name = Schema.type_name(file)
|
355
|
+
if self._portal.get_schema(schema_inferred_from_file_name) is not None:
|
357
356
|
# If the JSON file name looks like a schema name then assume it
|
358
357
|
# contains an object or an array of object of that schema type.
|
359
|
-
|
360
|
-
|
361
|
-
self._add(Schema.type_name(file), item)
|
362
|
-
elif isinstance(item, dict):
|
358
|
+
self._add(Schema.type_name(file), file_json)
|
359
|
+
elif isinstance(file_json, dict):
|
363
360
|
# Otherwise if the JSON file name does not look like a schema name then
|
364
361
|
# assume it a dictionary where each property is the name of a schema, and
|
365
362
|
# which (each property) contains a list of object of that schema type.
|
366
|
-
for schema_name in
|
367
|
-
|
368
|
-
if self._merge:
|
369
|
-
item = self._merge_with_existing_portal_object(item, schema_name)
|
370
|
-
self._add(schema_name, item)
|
363
|
+
for schema_name in file_json:
|
364
|
+
self._add(schema_name, file_json[schema_name])
|
371
365
|
|
372
366
|
def _load_reader(self, reader: RowReader, type_name: str) -> None:
|
373
367
|
schema = None
|
@@ -389,14 +383,11 @@ class StructuredDataSet:
|
|
389
383
|
structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number)
|
390
384
|
if self._autoadd_properties:
|
391
385
|
self._add_properties(structured_row, self._autoadd_properties, schema)
|
392
|
-
# New merge functionality (2024-05-25).
|
393
|
-
if self._merge:
|
394
|
-
structured_row = self._merge_with_existing_portal_object(structured_row, schema_name)
|
395
386
|
if (prune_error := self._prune_structured_row(structured_row)) is not None:
|
396
387
|
self._note_error({"src": create_dict(type=schema_name, row=reader.row_number),
|
397
388
|
"error": prune_error}, "validation")
|
398
389
|
else:
|
399
|
-
self._add(type_name, structured_row)
|
390
|
+
self._add(type_name, structured_row)
|
400
391
|
if self._progress:
|
401
392
|
self._progress({
|
402
393
|
PROGRESS.LOAD_ITEM: self._nrows,
|
@@ -437,18 +428,6 @@ class StructuredDataSet:
|
|
437
428
|
if name not in structured_row and (not schema or schema.data.get("properties", {}).get(name)):
|
438
429
|
structured_row[name] = properties[name]
|
439
430
|
|
440
|
-
def _merge_with_existing_portal_object(self, portal_object: dict, portal_type: str) -> dict:
|
441
|
-
"""
|
442
|
-
Given a Portal object (presumably/in-practice from the given metadata), if there is
|
443
|
-
an existing Portal item, identified by the identifying properties for the given object,
|
444
|
-
then merges the given object into the existing one and returns the result; otherwise
|
445
|
-
just returns the given object. Note that the given object may be CHANGED in place.
|
446
|
-
"""
|
447
|
-
for identifying_path in self._portal.get_identifying_paths(portal_object, portal_type):
|
448
|
-
if existing_portal_object := self._portal.get_metadata(identifying_path, raw=True, raise_exception=False):
|
449
|
-
return merge_objects(existing_portal_object, portal_object)
|
450
|
-
return portal_object
|
451
|
-
|
452
431
|
def _is_ref_lookup_specified_type(ref_lookup_flags: int) -> bool:
|
453
432
|
return (ref_lookup_flags &
|
454
433
|
Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
|
@@ -0,0 +1,67 @@
|
|
1
|
+
import re
|
2
|
+
from typing import Optional
|
3
|
+
from dcicutils.structured_data import Portal
|
4
|
+
|
5
|
+
|
6
|
+
def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
|
7
|
+
#
|
8
|
+
# FYI: Note this situation WRT object lookups ...
|
9
|
+
#
|
10
|
+
# /{submitted_id} # NOT FOUND
|
11
|
+
# /UnalignedReads/{submitted_id} # OK
|
12
|
+
# /SubmittedFile/{submitted_id} # OK
|
13
|
+
# /File/{submitted_id} # NOT FOUND
|
14
|
+
#
|
15
|
+
# /{accession} # OK
|
16
|
+
# /UnalignedReads/{accession} # NOT FOUND
|
17
|
+
# /SubmittedFile/{accession} # NOT FOUND
|
18
|
+
# /File/{accession} # OK
|
19
|
+
#
|
20
|
+
def ref_validator(schema: Optional[dict],
|
21
|
+
property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
|
22
|
+
"""
|
23
|
+
Returns False iff the type represented by the given schema, can NOT be referenced by
|
24
|
+
the given property name with the given property value, otherwise returns None.
|
25
|
+
|
26
|
+
For example, if the schema is for the UnalignedReads type and the property name
|
27
|
+
is accession, then we will return False iff the given property value is NOT a properly
|
28
|
+
formatted accession ID. Otherwise, we will return None, which indicates that the
|
29
|
+
caller (in dcicutils.structured_data.Portal.ref_exists) will continue executing
|
30
|
+
its default behavior, which is to check other ways in which the given type can NOT
|
31
|
+
be referenced by the given value, i.e. it checks other identifying properties for
|
32
|
+
the type and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
33
|
+
|
34
|
+
The goal (in structured_data) being to detect if a type is being referenced in such
|
35
|
+
a way that cannot possibly be allowed, i.e. because none of its identifying types
|
36
|
+
are in the required form (if indeed there any requirements). Note that it is guaranteed
|
37
|
+
that the given property name is indeed an identifying property for the given type.
|
38
|
+
"""
|
39
|
+
if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
|
40
|
+
if (property_format == "accession") and (property_name == "accession"):
|
41
|
+
if not _is_accession_id(property_value):
|
42
|
+
return False
|
43
|
+
return None
|
44
|
+
|
45
|
+
DEFAULT_RESPONSE = (Portal.LOOKUP_DEFAULT, ref_validator)
|
46
|
+
|
47
|
+
if not value:
|
48
|
+
return DEFAULT_RESPONSE
|
49
|
+
if not schema:
|
50
|
+
if not isinstance(portal, Portal) or not (schema := portal.get_schema(type_name)):
|
51
|
+
return DEFAULT_RESPONSE
|
52
|
+
if schema_properties := schema.get("properties"):
|
53
|
+
if schema_properties.get("accession") and _is_accession_id(value):
|
54
|
+
# Case: lookup by accession (only by root).
|
55
|
+
return Portal.LOOKUP_ROOT, ref_validator
|
56
|
+
elif schema_property_info_submitted_id := schema_properties.get("submitted_id"):
|
57
|
+
if schema_property_pattern_submitted_id := schema_property_info_submitted_id.get("pattern"):
|
58
|
+
if re.match(schema_property_pattern_submitted_id, value):
|
59
|
+
# Case: lookup by submitted_id (only by specified type).
|
60
|
+
return Portal.LOOKUP_SPECIFIED_TYPE, ref_validator
|
61
|
+
return DEFAULT_RESPONSE
|
62
|
+
|
63
|
+
|
64
|
+
# This is here for now because of problems with circular dependencies.
|
65
|
+
# See: smaht-portal/.../schema_formats.py
|
66
|
+
def _is_accession_id(value: str) -> bool:
|
67
|
+
return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
|
@@ -1,73 +0,0 @@
|
|
1
|
-
import re
|
2
|
-
from typing import Optional
|
3
|
-
from dcicutils.structured_data import Portal
|
4
|
-
|
5
|
-
# This function is exposed (to smaht-portal/ingester and smaht-submitr) only because previously,
|
6
|
-
# before it was fully developed, we had differing behaviors; but this has been unified; so this
|
7
|
-
# could now be internalized to structured_data, and portal_object_utils (TODO).
|
8
|
-
|
9
|
-
|
10
|
-
def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
|
11
|
-
#
|
12
|
-
# Note this slight odd situation WRT object lookups by submitted_id and accession:
|
13
|
-
# -----------------------------+-----------------------------------------------+---------------+
|
14
|
-
# PATH | EXAMPLE | LOOKUP RESULT |
|
15
|
-
# -----------------------------+-----------------------------------------------+---------------+
|
16
|
-
# /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
17
|
-
# /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
18
|
-
# /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
19
|
-
# /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
20
|
-
# -----------------------------+-----------------------------------------------+---------------+
|
21
|
-
# /accession | /SMAFSFXF1RO4 | FOUND |
|
22
|
-
# /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
|
23
|
-
# /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
|
24
|
-
# /File/accession | /File/SMAFSFXF1RO4 | FOUND |
|
25
|
-
# -----------------------------+-----------------------------------------------+---------------+
|
26
|
-
#
|
27
|
-
def ref_validator(schema: Optional[dict],
|
28
|
-
property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
|
29
|
-
"""
|
30
|
-
Returns False iff objects of type represented by the given schema, CANNOT be referenced with
|
31
|
-
a Portal path using the given property name and its given property value, otherwise returns None.
|
32
|
-
|
33
|
-
For example, if the schema is for UnalignedReads and the property name is accession, then we will
|
34
|
-
return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
|
35
|
-
will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
|
36
|
-
will continue executing its default behavior, which is to check other ways in which the given type
|
37
|
-
CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
|
38
|
-
and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
39
|
-
|
40
|
-
The goal (in structured_data) being to detect if a type is being referenced in such a way that
|
41
|
-
CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
|
42
|
-
if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
|
43
|
-
identifying property for the given type.
|
44
|
-
"""
|
45
|
-
if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
|
46
|
-
if (property_format == "accession") and (property_name == "accession"):
|
47
|
-
if not _is_accession_id(property_value):
|
48
|
-
return False
|
49
|
-
return None
|
50
|
-
|
51
|
-
DEFAULT_RESPONSE = (Portal.LOOKUP_DEFAULT, ref_validator)
|
52
|
-
|
53
|
-
if not value:
|
54
|
-
return DEFAULT_RESPONSE
|
55
|
-
if not schema:
|
56
|
-
if not isinstance(portal, Portal) or not (schema := portal.get_schema(type_name)):
|
57
|
-
return DEFAULT_RESPONSE
|
58
|
-
if schema_properties := schema.get("properties"):
|
59
|
-
if schema_properties.get("accession") and _is_accession_id(value):
|
60
|
-
# Case: lookup by accession (only by root).
|
61
|
-
return Portal.LOOKUP_ROOT, ref_validator
|
62
|
-
elif schema_property_info_submitted_id := schema_properties.get("submitted_id"):
|
63
|
-
if schema_property_pattern_submitted_id := schema_property_info_submitted_id.get("pattern"):
|
64
|
-
if re.match(schema_property_pattern_submitted_id, value):
|
65
|
-
# Case: lookup by submitted_id (only by specified type).
|
66
|
-
return Portal.LOOKUP_SPECIFIED_TYPE, ref_validator
|
67
|
-
return DEFAULT_RESPONSE
|
68
|
-
|
69
|
-
|
70
|
-
# This is here for now because of problems with circular dependencies.
|
71
|
-
# See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
|
72
|
-
def _is_accession_id(value: str) -> bool:
|
73
|
-
return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/license_policies/c4-python-infrastructure.jsonc
RENAMED
File without changes
|
{dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/license_policies/park-lab-common-server.jsonc
RENAMED
File without changes
|
File without changes
|
{dcicutils-8.8.6.1b11 → dcicutils-8.9.0}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|