dcicutils 8.9.0.1b5__py3-none-any.whl → 8.10.0.0b0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- dcicutils/command_utils.py +1 -69
- dcicutils/misc_utils.py +10 -41
- dcicutils/portal_object_utils.py +89 -24
- dcicutils/portal_utils.py +37 -249
- dcicutils/schema_utils.py +50 -0
- dcicutils/structured_data.py +20 -31
- dcicutils/submitr/ref_lookup_strategy.py +25 -31
- {dcicutils-8.9.0.1b5.dist-info → dcicutils-8.10.0.0b0.dist-info}/METADATA +1 -1
- {dcicutils-8.9.0.1b5.dist-info → dcicutils-8.10.0.0b0.dist-info}/RECORD +12 -12
- {dcicutils-8.9.0.1b5.dist-info → dcicutils-8.10.0.0b0.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.9.0.1b5.dist-info → dcicutils-8.10.0.0b0.dist-info}/WHEEL +0 -0
- {dcicutils-8.9.0.1b5.dist-info → dcicutils-8.10.0.0b0.dist-info}/entry_points.txt +0 -0
dcicutils/command_utils.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
from __future__ import annotations
|
2
1
|
import contextlib
|
3
2
|
import functools
|
4
3
|
import glob
|
@@ -8,7 +7,7 @@ import re
|
|
8
7
|
import requests
|
9
8
|
import subprocess
|
10
9
|
|
11
|
-
from typing import
|
10
|
+
from typing import Optional
|
12
11
|
from .exceptions import InvalidParameterError
|
13
12
|
from .lang_utils import there_are
|
14
13
|
from .misc_utils import INPUT, PRINT, environ_bool, print_error_message, decorator
|
@@ -385,70 +384,3 @@ def script_catch_errors():
|
|
385
384
|
message = str(e) # Note: We ignore the type, which isn't intended to be shown.
|
386
385
|
PRINT(message)
|
387
386
|
exit(1)
|
388
|
-
|
389
|
-
|
390
|
-
class Question:
|
391
|
-
"""
|
392
|
-
Supports asking the user (via stdin) a yes/no question, possibly repeatedly; and after
|
393
|
-
some maximum number times of the same answer in a row (consecutively), then asks them
|
394
|
-
if they want to automatically give that same answer to any/all subsequent questions.
|
395
|
-
Supports static/global list of such Question instances, hashed (only) by the question text.
|
396
|
-
"""
|
397
|
-
_static_instances = {}
|
398
|
-
|
399
|
-
@staticmethod
|
400
|
-
def instance(question: Optional[str] = None,
|
401
|
-
max: Optional[int] = None, printf: Optional[Callable] = None) -> Question:
|
402
|
-
question = question if isinstance(question, str) else ""
|
403
|
-
if not (instance := Question._static_instances.get(question)):
|
404
|
-
Question._static_instances[question] = (instance := Question(question, max=max, printf=printf))
|
405
|
-
return instance
|
406
|
-
|
407
|
-
@staticmethod
|
408
|
-
def yes(question: Optional[str] = None,
|
409
|
-
max: Optional[int] = None, printf: Optional[Callable] = None) -> bool:
|
410
|
-
return Question.instance(question, max=max, printf=printf).ask()
|
411
|
-
|
412
|
-
def __init__(self, question: Optional[str] = None,
|
413
|
-
max: Optional[int] = None, printf: Optional[Callable] = None) -> None:
|
414
|
-
self._question = question if isinstance(question, str) else ""
|
415
|
-
self._max = max if isinstance(max, int) and max > 0 else None
|
416
|
-
self._print = printf if callable(printf) else print
|
417
|
-
self._yes_consecutive_count = 0
|
418
|
-
self._no_consecutive_count = 0
|
419
|
-
self._yes_automatic = False
|
420
|
-
self._no_automatic = False
|
421
|
-
|
422
|
-
def ask(self, question: Optional[str] = None) -> bool:
|
423
|
-
|
424
|
-
def question_automatic(value: str) -> bool:
|
425
|
-
nonlocal self
|
426
|
-
RARROW = "▶"
|
427
|
-
LARROW = "◀"
|
428
|
-
if yes_or_no(f"{RARROW}{RARROW}{RARROW}"
|
429
|
-
f" Do you want to answer {value} to all such questions?"
|
430
|
-
f" {LARROW}{LARROW}{LARROW}"):
|
431
|
-
return True
|
432
|
-
self._yes_consecutive_count = 0
|
433
|
-
self._no_consecutive_count = 0
|
434
|
-
|
435
|
-
if self._yes_automatic:
|
436
|
-
return True
|
437
|
-
elif self._no_automatic:
|
438
|
-
return False
|
439
|
-
elif yes_or_no((question if isinstance(question, str) else "") or self._question or "Undefined question"):
|
440
|
-
self._yes_consecutive_count += 1
|
441
|
-
self._no_consecutive_count = 0
|
442
|
-
if (self._no_consecutive_count == 0) and self._max and (self._yes_consecutive_count >= self._max):
|
443
|
-
# Have reached the maximum number of consecutive YES answers; ask if YES to all subsequent.
|
444
|
-
if question_automatic("YES"):
|
445
|
-
self._yes_automatic = True
|
446
|
-
return True
|
447
|
-
else:
|
448
|
-
self._no_consecutive_count += 1
|
449
|
-
self._yes_consecutive_count = 0
|
450
|
-
if (self._yes_consecutive_count == 0) and self._max and (self._no_consecutive_count >= self._max):
|
451
|
-
# Have reached the maximum number of consecutive NO answers; ask if NO to all subsequent.
|
452
|
-
if question_automatic("NO"):
|
453
|
-
self._no_automatic = True
|
454
|
-
return False
|
dcicutils/misc_utils.py
CHANGED
@@ -4,7 +4,6 @@ This file contains functions that might be generally useful.
|
|
4
4
|
|
5
5
|
from collections import namedtuple
|
6
6
|
import appdirs
|
7
|
-
from copy import deepcopy
|
8
7
|
import contextlib
|
9
8
|
import datetime
|
10
9
|
import functools
|
@@ -2200,58 +2199,28 @@ def merge_key_value_dict_lists(x, y):
|
|
2200
2199
|
return [key_value_dict(k, v) for k, v in merged.items()]
|
2201
2200
|
|
2202
2201
|
|
2203
|
-
def merge_objects(target: Union[dict, List[Any]], source: Union[dict, List[Any]],
|
2204
|
-
full: bool = False, # deprecated
|
2205
|
-
expand_lists: Optional[bool] = None,
|
2206
|
-
primitive_lists: bool = False,
|
2207
|
-
copy: bool = False, _recursing: bool = False) -> Union[dict, List[Any]]:
|
2202
|
+
def merge_objects(target: Union[dict, List[Any]], source: Union[dict, List[Any]], full: bool = False) -> dict:
|
2208
2203
|
"""
|
2209
|
-
Merges the given source dictionary or list into the target dictionary or list
|
2210
|
-
|
2211
|
-
argument is True
|
2212
|
-
|
2213
|
-
If the expand_lists argument is True then any target lists longer than the
|
2214
|
-
source be will be filled out with the last element(s) of the source; the full
|
2215
|
-
argument (is deprecated and) is a synomym for this. The default is False.
|
2216
|
-
|
2217
|
-
If the primitive_lists argument is True then lists of primitives (i.e. lists in which
|
2218
|
-
NONE of its elements are dictionaries, lists, or tuples) will themselves be treated
|
2219
|
-
like primitives, meaning the whole of a source list will replace the corresponding
|
2220
|
-
target; otherwise they will be merged normally, meaning each element of a source list
|
2221
|
-
will be merged, recursively, into the corresponding target list. The default is False.
|
2204
|
+
Merges the given source dictionary or list into the target dictionary or list.
|
2205
|
+
This MAY well change the given target (dictionary or list) IN PLACE.
|
2206
|
+
The the full argument is True then any target lists longer than the
|
2207
|
+
source be will be filled out with the last element(s) of the source.
|
2222
2208
|
"""
|
2223
|
-
def is_primitive_list(value: Any) -> bool: # noqa
|
2224
|
-
if not isinstance(value, list):
|
2225
|
-
return False
|
2226
|
-
for item in value:
|
2227
|
-
if isinstance(item, (dict, list, tuple)):
|
2228
|
-
return False
|
2229
|
-
return True
|
2230
|
-
|
2231
2209
|
if target is None:
|
2232
2210
|
return source
|
2233
|
-
if expand_lists not in (True, False):
|
2234
|
-
expand_lists = full is True
|
2235
|
-
if (copy is True) and (_recursing is not True):
|
2236
|
-
target = deepcopy(target)
|
2237
2211
|
if isinstance(target, dict) and isinstance(source, dict) and source:
|
2238
2212
|
for key, value in source.items():
|
2239
|
-
|
2240
|
-
(key in target) and is_primitive_list(target[key]) and is_primitive_list(value)): # noqa
|
2241
|
-
target[key] = value
|
2242
|
-
else:
|
2243
|
-
target[key] = merge_objects(target[key], value,
|
2244
|
-
expand_lists=expand_lists, _recursing=True) if key in target else value
|
2213
|
+
target[key] = merge_objects(target[key], value, full) if key in target else value
|
2245
2214
|
elif isinstance(target, list) and isinstance(source, list) and source:
|
2246
2215
|
for i in range(max(len(source), len(target))):
|
2247
2216
|
if i < len(target):
|
2248
2217
|
if i < len(source):
|
2249
|
-
target[i] = merge_objects(target[i], source[i],
|
2250
|
-
elif
|
2251
|
-
target[i] = merge_objects(target[i], source[len(source) - 1],
|
2218
|
+
target[i] = merge_objects(target[i], source[i], full)
|
2219
|
+
elif full:
|
2220
|
+
target[i] = merge_objects(target[i], source[len(source) - 1], full)
|
2252
2221
|
else:
|
2253
2222
|
target.append(source[i])
|
2254
|
-
elif source
|
2223
|
+
elif source:
|
2255
2224
|
target = source
|
2256
2225
|
return target
|
2257
2226
|
|
dcicutils/portal_object_utils.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from copy import deepcopy
|
2
2
|
from functools import lru_cache
|
3
|
+
import re
|
3
4
|
from typing import Any, Callable, List, Optional, Tuple, Type, Union
|
4
5
|
from dcicutils.data_readers import RowReader
|
5
6
|
from dcicutils.misc_utils import create_readonly_object
|
@@ -13,9 +14,11 @@ class PortalObject:
|
|
13
14
|
|
14
15
|
_PROPERTY_DELETION_SENTINEL = RowReader.CELL_DELETION_SENTINEL
|
15
16
|
|
16
|
-
def __init__(self, data: dict, portal:
|
17
|
+
def __init__(self, data: dict, portal: Portal = None,
|
18
|
+
schema: Optional[Union[dict, Schema]] = None, type: Optional[str] = None) -> None:
|
17
19
|
self._data = data if isinstance(data, dict) else {}
|
18
20
|
self._portal = portal if isinstance(portal, Portal) else None
|
21
|
+
self._schema = schema if isinstance(schema, dict) else (schema.data if isinstance(schema, Schema) else None)
|
19
22
|
self._type = type if isinstance(type, str) else ""
|
20
23
|
|
21
24
|
@property
|
@@ -29,7 +32,7 @@ class PortalObject:
|
|
29
32
|
@property
|
30
33
|
@lru_cache(maxsize=1)
|
31
34
|
def type(self) -> str:
|
32
|
-
return self._type or Portal.get_schema_type(self._data) or ""
|
35
|
+
return self._type or Portal.get_schema_type(self._data) or (Schema(self._schema).type if self._schema else "")
|
33
36
|
|
34
37
|
@property
|
35
38
|
@lru_cache(maxsize=1)
|
@@ -44,7 +47,7 @@ class PortalObject:
|
|
44
47
|
@property
|
45
48
|
@lru_cache(maxsize=1)
|
46
49
|
def schema(self) -> Optional[dict]:
|
47
|
-
return self._portal.get_schema(self.type) if self._portal else None
|
50
|
+
return self._schema if self._schema else (self._portal.get_schema(self.type) if self._portal else None)
|
48
51
|
|
49
52
|
def copy(self) -> PortalObject:
|
50
53
|
return PortalObject(deepcopy(self.data), portal=self.portal, type=self.type)
|
@@ -56,29 +59,39 @@ class PortalObject:
|
|
56
59
|
Returns the list of all identifying property names of this Portal object which actually have values.
|
57
60
|
Implicitly include "uuid" and "identifier" properties as identifying properties if they are actually
|
58
61
|
properties in the object schema, and favor these (first); defavor "aliases"; no other ordering defined.
|
59
|
-
Changed (2024-05-26) to use portal_utils.get_identifying_property_names; migrating some intricate stuff there.
|
60
62
|
"""
|
61
|
-
|
62
|
-
|
63
|
+
if not (schema := self.schema) or not (schema_identifying_properties := schema.get("identifyingProperties")):
|
64
|
+
return None
|
65
|
+
identifying_properties = []
|
66
|
+
for identifying_property in schema_identifying_properties:
|
67
|
+
if identifying_property not in ["uuid", "identifier", "aliases"]:
|
68
|
+
if self._data.get(identifying_property):
|
69
|
+
identifying_properties.append(identifying_property)
|
70
|
+
if self._data.get("identifier"):
|
71
|
+
identifying_properties.insert(0, "identifier")
|
72
|
+
if self._data.get("uuid"):
|
73
|
+
identifying_properties.insert(0, "uuid")
|
74
|
+
if "aliases" in schema_identifying_properties and self._data.get("aliases"):
|
75
|
+
identifying_properties.append("aliases")
|
76
|
+
return identifying_properties or None
|
63
77
|
|
64
78
|
@lru_cache(maxsize=8192)
|
65
79
|
def lookup(self, raw: bool = False,
|
66
80
|
ref_lookup_strategy: Optional[Callable] = None) -> Tuple[Optional[PortalObject], Optional[str], int]:
|
67
|
-
if not (identifying_paths := self._get_identifying_paths(ref_lookup_strategy=ref_lookup_strategy)):
|
68
|
-
return None, None, 0
|
69
81
|
nlookups = 0
|
70
82
|
first_identifying_path = None
|
71
83
|
try:
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
84
|
+
if identifying_paths := self._get_identifying_paths(ref_lookup_strategy=ref_lookup_strategy):
|
85
|
+
for identifying_path in identifying_paths:
|
86
|
+
if not first_identifying_path:
|
87
|
+
first_identifying_path = identifying_path
|
88
|
+
nlookups += 1
|
89
|
+
if (value := self._portal.get(identifying_path, raw=raw)) and (value.status_code == 200):
|
90
|
+
return (
|
91
|
+
PortalObject(value.json(), portal=self._portal, type=self.type if raw else None),
|
92
|
+
identifying_path,
|
93
|
+
nlookups
|
94
|
+
)
|
82
95
|
except Exception:
|
83
96
|
pass
|
84
97
|
return None, first_identifying_path, nlookups
|
@@ -146,12 +159,64 @@ class PortalObject:
|
|
146
159
|
|
147
160
|
@lru_cache(maxsize=1)
|
148
161
|
def _get_identifying_paths(self, ref_lookup_strategy: Optional[Callable] = None) -> Optional[List[str]]:
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
162
|
+
"""
|
163
|
+
Returns a list of the possible Portal URL paths identifying this Portal object.
|
164
|
+
"""
|
165
|
+
identifying_paths = []
|
166
|
+
if not (identifying_properties := self.identifying_properties):
|
167
|
+
if self.uuid:
|
168
|
+
if self.type:
|
169
|
+
identifying_paths.append(f"/{self.type}/{self.uuid}")
|
170
|
+
identifying_paths.append(f"/{self.uuid}")
|
171
|
+
return identifying_paths
|
172
|
+
for identifying_property in identifying_properties:
|
173
|
+
if identifying_value := self._data.get(identifying_property):
|
174
|
+
if identifying_property == "uuid":
|
175
|
+
if self.type:
|
176
|
+
identifying_paths.append(f"/{self.type}/{identifying_value}")
|
177
|
+
identifying_paths.append(f"/{identifying_value}")
|
178
|
+
# For now at least we include the path both with and without the schema type component,
|
179
|
+
# as for some identifying values, it works (only) with, and some, it works (only) without.
|
180
|
+
# For example: If we have FileSet with "accession", an identifying property, with value
|
181
|
+
# SMAFSFXF1RO4 then /SMAFSFXF1RO4 works but /FileSet/SMAFSFXF1RO4 does not; and
|
182
|
+
# conversely using "submitted_id", also an identifying property, with value
|
183
|
+
# UW_FILE-SET_COLO-829BL_HI-C_1 then /UW_FILE-SET_COLO-829BL_HI-C_1 does
|
184
|
+
# not work but /FileSet/UW_FILE-SET_COLO-829BL_HI-C_1 does work.
|
185
|
+
elif isinstance(identifying_value, list):
|
186
|
+
for identifying_value_item in identifying_value:
|
187
|
+
if self.type:
|
188
|
+
identifying_paths.append(f"/{self.type}/{identifying_value_item}")
|
189
|
+
identifying_paths.append(f"/{identifying_value_item}")
|
190
|
+
else:
|
191
|
+
# TODO: Import from somewhere ...
|
192
|
+
lookup_options = 0
|
193
|
+
if schema := self.schema:
|
194
|
+
# TODO: Hook into the ref_lookup_strategy thing in structured_data to make
|
195
|
+
# sure we check accession format (since it does not have a pattern).
|
196
|
+
if callable(ref_lookup_strategy):
|
197
|
+
lookup_options, ref_validator = ref_lookup_strategy(
|
198
|
+
self._portal, self.type, schema, identifying_value)
|
199
|
+
if callable(ref_validator):
|
200
|
+
if ref_validator(schema, identifying_property, identifying_value) is False:
|
201
|
+
continue
|
202
|
+
if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
|
203
|
+
if not re.match(pattern, identifying_value):
|
204
|
+
# If this identifying value is for a (identifying) property which has a
|
205
|
+
# pattern, and the value does NOT match the pattern, then do NOT include
|
206
|
+
# this value as an identifying path, since it cannot possibly be found.
|
207
|
+
continue
|
208
|
+
if not lookup_options:
|
209
|
+
lookup_options = Portal.LOOKUP_DEFAULT
|
210
|
+
if Portal.is_lookup_root_first(lookup_options):
|
211
|
+
identifying_paths.append(f"/{identifying_value}")
|
212
|
+
if Portal.is_lookup_specified_type(lookup_options) and self.type:
|
213
|
+
identifying_paths.append(f"/{self.type}/{identifying_value}")
|
214
|
+
if Portal.is_lookup_root(lookup_options) and not Portal.is_lookup_root_first(lookup_options):
|
215
|
+
identifying_paths.append(f"/{identifying_value}")
|
216
|
+
if Portal.is_lookup_subtypes(lookup_options):
|
217
|
+
for subtype_name in self._portal.get_schema_subtype_names(self.type):
|
218
|
+
identifying_paths.append(f"/{subtype_name}/{identifying_value}")
|
219
|
+
return identifying_paths or None
|
155
220
|
|
156
221
|
def _normalized_refs(self, refs: List[dict]) -> Tuple[PortalObject, int]:
|
157
222
|
"""
|
dcicutils/portal_utils.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
from collections import deque
|
2
2
|
from functools import lru_cache
|
3
|
-
from dcicutils.function_cache_decorator import function_cache
|
4
3
|
import io
|
5
4
|
import json
|
6
5
|
from pyramid.config import Configurator as PyramidConfigurator
|
@@ -17,9 +16,8 @@ from uuid import uuid4 as uuid
|
|
17
16
|
from webtest.app import TestApp, TestResponse
|
18
17
|
from wsgiref.simple_server import make_server as wsgi_make_server
|
19
18
|
from dcicutils.common import APP_SMAHT, OrchestratedApp, ORCHESTRATED_APPS
|
20
|
-
from dcicutils.ff_utils import
|
19
|
+
from dcicutils.ff_utils import get_metadata, get_schema, patch_metadata, post_metadata
|
21
20
|
from dcicutils.misc_utils import to_camel_case, VirtualApp
|
22
|
-
from dcicutils.schema_utils import get_identifying_properties
|
23
21
|
from dcicutils.tmpfile_utils import temporary_file
|
24
22
|
|
25
23
|
Portal = Type["Portal"] # Forward type reference for type hints.
|
@@ -50,16 +48,15 @@ class Portal:
|
|
50
48
|
FILE_TYPE_SCHEMA_NAME = "File"
|
51
49
|
|
52
50
|
# Object lookup strategies; on a per-reference (type/value) basis, used currently ONLY by
|
53
|
-
# structured_data.py; controlled by an optional
|
51
|
+
# structured_data.py; controlled by an optional ref_lookup_strategy callable; default is
|
54
52
|
# lookup at root path but after the specified type path lookup, and then lookup all subtypes;
|
55
53
|
# can choose to lookup root path first, or not lookup root path at all, or not lookup
|
56
|
-
# subtypes at all; the
|
54
|
+
# subtypes at all; the ref_lookup_strategy callable if specified should take a type_name
|
57
55
|
# and value (string) arguements and return an integer of any of the below ORed together.
|
58
56
|
# The main purpose of this is optimization; to minimize portal lookups; since for example,
|
59
57
|
# currently at least, /{type}/{accession} does not work but /{accession} does; so we
|
60
58
|
# currently (smaht-portal/.../ingestion_processors) use LOOKUP_ROOT_FIRST for this.
|
61
59
|
# And current usage NEVER has LOOKUP_SUBTYPES turned OFF; but support just in case.
|
62
|
-
LOOKUP_UNDEFINED = 0
|
63
60
|
LOOKUP_SPECIFIED_TYPE = 0x0001
|
64
61
|
LOOKUP_ROOT = 0x0002
|
65
62
|
LOOKUP_ROOT_FIRST = 0x0004 | LOOKUP_ROOT
|
@@ -208,6 +205,23 @@ class Portal:
|
|
208
205
|
def vapp(self) -> Optional[TestApp]:
|
209
206
|
return self._vapp
|
210
207
|
|
208
|
+
@staticmethod
|
209
|
+
def is_lookup_specified_type(lookup_options: int) -> bool:
|
210
|
+
return (lookup_options &
|
211
|
+
Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
|
212
|
+
|
213
|
+
@staticmethod
|
214
|
+
def is_lookup_root(lookup_options: int) -> bool:
|
215
|
+
return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
|
216
|
+
|
217
|
+
@staticmethod
|
218
|
+
def is_lookup_root_first(lookup_options: int) -> bool:
|
219
|
+
return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
|
220
|
+
|
221
|
+
@staticmethod
|
222
|
+
def is_lookup_subtypes(lookup_options: int) -> bool:
|
223
|
+
return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
|
224
|
+
|
211
225
|
def get(self, url: str, follow: bool = True,
|
212
226
|
raw: bool = False, database: bool = False, raise_for_status: bool = False, **kwargs) -> OptionalResponse:
|
213
227
|
url = self.url(url, raw, database)
|
@@ -280,20 +294,6 @@ class Portal:
|
|
280
294
|
add_on="check_only=True" if check_only else "")
|
281
295
|
return self.post(f"/{object_type}{'?check_only=True' if check_only else ''}", data).json()
|
282
296
|
|
283
|
-
def delete_metadata(self, object_id: str) -> Optional[dict]:
|
284
|
-
if isinstance(object_id, str) and object_id:
|
285
|
-
if self.key:
|
286
|
-
return delete_metadata(obj_id=object_id, key=self.key)
|
287
|
-
else:
|
288
|
-
return self.patch_metadata(object_id, {"status": "deleted"})
|
289
|
-
return None
|
290
|
-
|
291
|
-
def purge_metadata(self, object_id: str) -> Optional[dict]:
|
292
|
-
if isinstance(object_id, str) and object_id:
|
293
|
-
if self.key:
|
294
|
-
return purge_metadata(obj_id=object_id, key=self.key)
|
295
|
-
return None
|
296
|
-
|
297
297
|
def get_health(self) -> OptionalResponse:
|
298
298
|
return self.get("/health")
|
299
299
|
|
@@ -305,10 +305,7 @@ class Portal:
|
|
305
305
|
|
306
306
|
@lru_cache(maxsize=100)
|
307
307
|
def get_schema(self, schema_name: str) -> Optional[dict]:
|
308
|
-
|
309
|
-
return get_schema(self.schema_name(schema_name), portal_vapp=self.vapp, key=self.key)
|
310
|
-
except Exception:
|
311
|
-
return None
|
308
|
+
return get_schema(self.schema_name(schema_name), portal_vapp=self.vapp, key=self.key)
|
312
309
|
|
313
310
|
@lru_cache(maxsize=1)
|
314
311
|
def get_schemas(self) -> dict:
|
@@ -419,215 +416,6 @@ class Portal:
|
|
419
416
|
return []
|
420
417
|
return schemas_super_type_map.get(type_name, [])
|
421
418
|
|
422
|
-
@function_cache(maxsize=100, serialize_key=True)
|
423
|
-
def get_identifying_paths(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None,
|
424
|
-
first_only: bool = False,
|
425
|
-
lookup_strategy: Optional[Union[Callable, bool]] = None) -> List[str]:
|
426
|
-
"""
|
427
|
-
Returns the list of the identifying Portal (URL) paths for the given Portal object. Favors any uuid
|
428
|
-
and identifier based paths and defavors aliases based paths (ala self.get_identifying_property_names);
|
429
|
-
no other ordering defined. Returns an empty list if no identifying properties or otherwise not found.
|
430
|
-
Note that this is a newer version of what was in portal_object_utils and just uses the ref_lookup_stratey
|
431
|
-
module directly, as it no longer needs to be exposed (to smaht-portal/ingester and smaht-submitr) and so
|
432
|
-
this is a first step toward internalizing it to structured_data/portal_utils/portal_object_utils usages.
|
433
|
-
"""
|
434
|
-
def is_lookup_specified_type(lookup_options: int) -> bool:
|
435
|
-
return (lookup_options & Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
|
436
|
-
def is_lookup_root(lookup_options: int) -> bool: # noqa
|
437
|
-
return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
|
438
|
-
def is_lookup_root_first(lookup_options: int) -> bool: # noqa
|
439
|
-
return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
|
440
|
-
def is_lookup_subtypes(lookup_options: int) -> bool: # noqa
|
441
|
-
return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
|
442
|
-
|
443
|
-
results = []
|
444
|
-
if not isinstance(portal_object, dict):
|
445
|
-
return results
|
446
|
-
if not (isinstance(portal_type, str) and portal_type):
|
447
|
-
if isinstance(portal_type, dict):
|
448
|
-
# It appears that the given portal_type is an actual schema dictionary.
|
449
|
-
portal_type = self.schema_name(portal_type.get("title"))
|
450
|
-
if not (isinstance(portal_type, str) and portal_type):
|
451
|
-
if not (portal_type := self.get_schema_type(portal_object)):
|
452
|
-
return results
|
453
|
-
if not callable(lookup_strategy):
|
454
|
-
lookup_strategy = None if lookup_strategy is False else Portal._lookup_strategy
|
455
|
-
for identifying_property in self.get_identifying_property_names(portal_type):
|
456
|
-
if not (identifying_value := portal_object.get(identifying_property)):
|
457
|
-
continue
|
458
|
-
# The get_identifying_property_names call above ensures uuid is first if it is in the object.
|
459
|
-
# And also note that ALL schemas do in fact have identifyingProperties which do in fact have
|
460
|
-
# uuid, except for a couple "Test" ones, and (for some reason) SubmittedItem; otherwise we
|
461
|
-
# might have a special case to check the Portal object explicitly for uuid, but no need.
|
462
|
-
if identifying_property == "uuid":
|
463
|
-
#
|
464
|
-
# Note this idiosyncrasy with Portal paths: the only way we do NOT get a (HTTP 301) redirect
|
465
|
-
# is if we use the lower-case-dashed-plural based version of the path, e.g. all of these:
|
466
|
-
#
|
467
|
-
# - /d13d06c1-218e-4f61-aaf0-91f226248b3c
|
468
|
-
# - /d13d06c1-218e-4f61-aaf0-91f226248b3c/
|
469
|
-
# - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c
|
470
|
-
# - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c/
|
471
|
-
# - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c
|
472
|
-
#
|
473
|
-
# Will result in a (HTTP 301) redirect to:
|
474
|
-
#
|
475
|
-
# - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c/
|
476
|
-
#
|
477
|
-
# Unfortunately, this code here has no reasonable way of getting that lower-case-dashed-plural
|
478
|
-
# based name (e.g. file-formats) from the schema/portal type name (e.g. FileFormat); as the
|
479
|
-
# information is contained, for this example, in the snovault.collection decorator for the
|
480
|
-
# endpoint definition in smaht-portal/.../types/file_format.py. Unfortunately merely because
|
481
|
-
# behind-the-scenes an extra round-trip HTTP request will occur, but happens automatically.
|
482
|
-
# And note the disction of just using /{uuid} here rather than /{type}/{uuid} as in the else
|
483
|
-
# statement below is not really necessary; just here for emphasis that this is all that's needed.
|
484
|
-
#
|
485
|
-
if first_only is True:
|
486
|
-
results.append(f"/{portal_type}/{identifying_value}")
|
487
|
-
else:
|
488
|
-
results.append(f"/{identifying_value}")
|
489
|
-
elif isinstance(identifying_value, list):
|
490
|
-
for identifying_value_item in identifying_value:
|
491
|
-
if identifying_value_item:
|
492
|
-
results.append(f"/{portal_type}/{identifying_value_item}")
|
493
|
-
else:
|
494
|
-
lookup_options = Portal.LOOKUP_UNDEFINED
|
495
|
-
if schema := self.get_schema(portal_type):
|
496
|
-
if callable(lookup_strategy):
|
497
|
-
lookup_options, validator = lookup_strategy(self, portal_type, schema, identifying_value)
|
498
|
-
if callable(validator):
|
499
|
-
if validator(schema, identifying_property, identifying_value) is False:
|
500
|
-
continue
|
501
|
-
if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
|
502
|
-
if not re.match(pattern, identifying_value):
|
503
|
-
# If this identifying value is for a (identifying) property which has a
|
504
|
-
# pattern, and the value does NOT match the pattern, then do NOT include
|
505
|
-
# this value as an identifying path, since it cannot possibly be found.
|
506
|
-
continue
|
507
|
-
if lookup_options == Portal.LOOKUP_UNDEFINED:
|
508
|
-
lookup_options = Portal.LOOKUP_DEFAULT
|
509
|
-
if is_lookup_root_first(lookup_options):
|
510
|
-
results.append(f"/{identifying_value}")
|
511
|
-
if is_lookup_specified_type(lookup_options) and portal_type:
|
512
|
-
results.append(f"/{portal_type}/{identifying_value}")
|
513
|
-
if is_lookup_root(lookup_options) and not is_lookup_root_first(lookup_options):
|
514
|
-
results.append(f"/{identifying_value}")
|
515
|
-
if is_lookup_subtypes(lookup_options):
|
516
|
-
for subtype_name in self.get_schema_subtype_names(portal_type):
|
517
|
-
results.append(f"/{subtype_name}/{identifying_value}")
|
518
|
-
if (first_only is True) and results:
|
519
|
-
return results
|
520
|
-
return results
|
521
|
-
|
522
|
-
@function_cache(maxsize=100, serialize_key=True)
|
523
|
-
def get_identifying_path(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None,
|
524
|
-
lookup_strategy: Optional[Union[Callable, bool]] = None) -> Optional[str]:
|
525
|
-
if identifying_paths := self.get_identifying_paths(portal_object, portal_type, first_only=True,
|
526
|
-
lookup_strategy=lookup_strategy):
|
527
|
-
return identifying_paths[0]
|
528
|
-
return None
|
529
|
-
|
530
|
-
@function_cache(maxsize=100, serialize_key=True)
|
531
|
-
def get_identifying_property_names(self, schema: Union[str, dict],
|
532
|
-
portal_object: Optional[dict] = None) -> List[str]:
|
533
|
-
"""
|
534
|
-
Returns the list of identifying property names for the given Portal schema, which may be
|
535
|
-
either a schema name or a schema object. If a Portal object is also given then restricts this
|
536
|
-
set of identifying properties to those which actually have values within this Portal object.
|
537
|
-
Favors the uuid and identifier property names and defavors the aliases property name; no other
|
538
|
-
ordering imposed. Returns empty list if no identifying properties or otherwise not found.
|
539
|
-
"""
|
540
|
-
results = []
|
541
|
-
if isinstance(schema, str):
|
542
|
-
if not (schema := self.get_schema(schema)):
|
543
|
-
return results
|
544
|
-
elif not isinstance(schema, dict):
|
545
|
-
return results
|
546
|
-
if not (identifying_properties := get_identifying_properties(schema)):
|
547
|
-
return results
|
548
|
-
identifying_properties = list(set(identifying_properties)) # paranoid dedup
|
549
|
-
identifying_properties = [*identifying_properties] # copy so as not to change schema if given
|
550
|
-
favored_identifying_properties = ["uuid", "identifier"]
|
551
|
-
defavored_identifying_properties = ["aliases"]
|
552
|
-
for favored_identifying_property in reversed(favored_identifying_properties):
|
553
|
-
if favored_identifying_property in identifying_properties:
|
554
|
-
identifying_properties.remove(favored_identifying_property)
|
555
|
-
identifying_properties.insert(0, favored_identifying_property)
|
556
|
-
for defavored_identifying_property in defavored_identifying_properties:
|
557
|
-
if defavored_identifying_property in identifying_properties:
|
558
|
-
identifying_properties.remove(defavored_identifying_property)
|
559
|
-
identifying_properties.append(defavored_identifying_property)
|
560
|
-
if isinstance(portal_object, dict):
|
561
|
-
for identifying_property in [*identifying_properties]:
|
562
|
-
if portal_object.get(identifying_property) is None:
|
563
|
-
identifying_properties.remove(identifying_property)
|
564
|
-
return identifying_properties
|
565
|
-
|
566
|
-
@staticmethod
|
567
|
-
def _lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
|
568
|
-
#
|
569
|
-
# Note this slightly odd situation WRT object lookups by submitted_id and accession:
|
570
|
-
# -----------------------------+-----------------------------------------------+---------------+
|
571
|
-
# PATH | EXAMPLE | LOOKUP RESULT |
|
572
|
-
# -----------------------------+-----------------------------------------------+---------------+
|
573
|
-
# /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
574
|
-
# /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
575
|
-
# /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
576
|
-
# /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
577
|
-
# -----------------------------+-----------------------------------------------+---------------+
|
578
|
-
# /accession | /SMAFSFXF1RO4 | FOUND |
|
579
|
-
# /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
|
580
|
-
# /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
|
581
|
-
# /File/accession | /File/SMAFSFXF1RO4 | FOUND |
|
582
|
-
# -----------------------------+-----------------------------------------------+---------------+
|
583
|
-
#
|
584
|
-
def ref_validator(schema: Optional[dict],
|
585
|
-
property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
|
586
|
-
"""
|
587
|
-
Returns False iff objects of type represented by the given schema, CANNOT be referenced with
|
588
|
-
a Portal path using the given property name and its given property value, otherwise returns None.
|
589
|
-
|
590
|
-
For example, if the schema is for UnalignedReads and the property name is accession, then we will
|
591
|
-
return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
|
592
|
-
will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
|
593
|
-
will continue executing its default behavior, which is to check other ways in which the given type
|
594
|
-
CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
|
595
|
-
and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
596
|
-
|
597
|
-
The goal (in structured_data) being to detect if a type is being referenced in such a way that
|
598
|
-
CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
|
599
|
-
if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
|
600
|
-
identifying property for the given type.
|
601
|
-
"""
|
602
|
-
if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
|
603
|
-
if (property_format == "accession") and (property_name == "accession"):
|
604
|
-
if not Portal._is_accession_id(property_value):
|
605
|
-
return False
|
606
|
-
return None
|
607
|
-
|
608
|
-
DEFAULT_RESULT = (Portal.LOOKUP_DEFAULT, ref_validator)
|
609
|
-
if not value:
|
610
|
-
return DEFAULT_RESULT
|
611
|
-
if not schema:
|
612
|
-
if not isinstance(portal, Portal) or not (schema := portal.get_schema(type_name)):
|
613
|
-
return DEFAULT_RESULT
|
614
|
-
if schema_properties := schema.get("properties"):
|
615
|
-
if schema_properties.get("accession") and Portal._is_accession_id(value):
|
616
|
-
# Case: lookup by accession (only by root).
|
617
|
-
return (Portal.LOOKUP_ROOT, ref_validator)
|
618
|
-
elif schema_property_info_submitted_id := schema_properties.get("submitted_id"):
|
619
|
-
if schema_property_pattern_submitted_id := schema_property_info_submitted_id.get("pattern"):
|
620
|
-
if re.match(schema_property_pattern_submitted_id, value):
|
621
|
-
# Case: lookup by submitted_id (only by specified type).
|
622
|
-
return (Portal.LOOKUP_SPECIFIED_TYPE, ref_validator)
|
623
|
-
return DEFAULT_RESULT
|
624
|
-
|
625
|
-
@staticmethod
|
626
|
-
def _is_accession_id(value: str) -> bool:
|
627
|
-
# This is here for now because of problems with circular dependencies.
|
628
|
-
# See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
|
629
|
-
return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
|
630
|
-
|
631
419
|
def url(self, url: str, raw: bool = False, database: bool = False) -> str:
|
632
420
|
if not isinstance(url, str) or not url:
|
633
421
|
return "/"
|
@@ -728,22 +516,6 @@ class Portal:
|
|
728
516
|
response = TestResponseWrapper(response)
|
729
517
|
return response
|
730
518
|
|
731
|
-
@staticmethod
|
732
|
-
def _create_vapp(arg: Union[TestApp, VirtualApp, PyramidRouter, str] = None) -> TestApp:
|
733
|
-
if isinstance(arg, TestApp):
|
734
|
-
return arg
|
735
|
-
elif isinstance(arg, VirtualApp):
|
736
|
-
if not isinstance(arg.wrapped_app, TestApp):
|
737
|
-
raise Exception("Portal._create_vapp VirtualApp argument error.")
|
738
|
-
return arg.wrapped_app
|
739
|
-
if isinstance(arg, PyramidRouter):
|
740
|
-
router = arg
|
741
|
-
elif isinstance(arg, str) or not arg:
|
742
|
-
router = pyramid_get_app(arg or "development.ini", "app")
|
743
|
-
else:
|
744
|
-
raise Exception("Portal._create_vapp argument error.")
|
745
|
-
return TestApp(router, {"HTTP_ACCEPT": Portal.MIME_TYPE_JSON, "REMOTE_USER": "TEST"})
|
746
|
-
|
747
519
|
@staticmethod
|
748
520
|
def create_for_testing(arg: Optional[Union[str, bool, List[dict], dict, Callable]] = None) -> Portal:
|
749
521
|
if isinstance(arg, list) or isinstance(arg, dict) or isinstance(arg, Callable):
|
@@ -775,6 +547,22 @@ class Portal:
|
|
775
547
|
with temporary_file(content=minimal_ini_for_testing, suffix=".ini") as ini_file:
|
776
548
|
return Portal(ini_file)
|
777
549
|
|
550
|
+
@staticmethod
|
551
|
+
def _create_vapp(arg: Union[TestApp, VirtualApp, PyramidRouter, str] = None) -> TestApp:
|
552
|
+
if isinstance(arg, TestApp):
|
553
|
+
return arg
|
554
|
+
elif isinstance(arg, VirtualApp):
|
555
|
+
if not isinstance(arg.wrapped_app, TestApp):
|
556
|
+
raise Exception("Portal._create_vapp VirtualApp argument error.")
|
557
|
+
return arg.wrapped_app
|
558
|
+
if isinstance(arg, PyramidRouter):
|
559
|
+
router = arg
|
560
|
+
elif isinstance(arg, str) or not arg:
|
561
|
+
router = pyramid_get_app(arg or "development.ini", "app")
|
562
|
+
else:
|
563
|
+
raise Exception("Portal._create_vapp argument error.")
|
564
|
+
return TestApp(router, {"HTTP_ACCEPT": Portal.MIME_TYPE_JSON, "REMOTE_USER": "TEST"})
|
565
|
+
|
778
566
|
@staticmethod
|
779
567
|
def _create_router_for_testing(endpoints: Optional[List[Dict[str, Union[str, Callable]]]] = None) -> PyramidRouter:
|
780
568
|
if isinstance(endpoints, dict):
|
dcicutils/schema_utils.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
import os
|
2
2
|
from typing import Any, Dict, List, Optional, Tuple
|
3
|
+
|
3
4
|
from dcicutils.misc_utils import to_camel_case
|
4
5
|
|
5
6
|
|
@@ -8,6 +9,7 @@ class JsonSchemaConstants:
|
|
8
9
|
ARRAY = "array"
|
9
10
|
BOOLEAN = "boolean"
|
10
11
|
DEFAULT = "default"
|
12
|
+
DEPENDENT_REQUIRED = "dependentRequired"
|
11
13
|
ENUM = "enum"
|
12
14
|
FORMAT = "format"
|
13
15
|
INTEGER = "integer"
|
@@ -29,6 +31,10 @@ class EncodedSchemaConstants:
|
|
29
31
|
LINK_TO = "linkTo"
|
30
32
|
MERGE_REF = "$merge"
|
31
33
|
MIXIN_PROPERTIES = "mixinProperties"
|
34
|
+
SUBMISSION_COMMENT = "submissionComment"
|
35
|
+
SUBMISSION_EXAMPLES = "submissionExamples"
|
36
|
+
SUBMITTER_REQUIRED = "submitterRequired"
|
37
|
+
SUGGESTED_ENUM = "suggested_enum"
|
32
38
|
UNIQUE_KEY = "uniqueKey"
|
33
39
|
|
34
40
|
|
@@ -203,6 +209,50 @@ def get_description(schema: Dict[str, Any]) -> str:
|
|
203
209
|
return schema.get(SchemaConstants.DESCRIPTION, "")
|
204
210
|
|
205
211
|
|
212
|
+
def is_submitter_required(schema: Dict[str, Any]) -> bool:
|
213
|
+
"""Return True if the schema is marked as required for submitters.
|
214
|
+
|
215
|
+
Specifically, required for external (i.e. non-admin) submitters.
|
216
|
+
|
217
|
+
This is typically validated within the context of a oneOf, anyOf,
|
218
|
+
or allOf schema on an item type which is used within the team and
|
219
|
+
by external submitters, and is tricky to pick up on automatically.
|
220
|
+
"""
|
221
|
+
return schema.get(SchemaConstants.SUBMITTER_REQUIRED, False)
|
222
|
+
|
223
|
+
|
224
|
+
def get_submission_comment(schema: Dict[str, Any]) -> str:
|
225
|
+
"""Return the submission comment for a property.
|
226
|
+
|
227
|
+
Custom property that can be manually added to a schema to provide
|
228
|
+
additional context for submitters.
|
229
|
+
"""
|
230
|
+
return schema.get(SchemaConstants.SUBMISSION_COMMENT, "")
|
231
|
+
|
232
|
+
|
233
|
+
def get_submission_examples(schema: Dict[str, Any]) -> List[str]:
|
234
|
+
"""Return the submission example for a property.
|
235
|
+
|
236
|
+
Custom property that can be manually added to a schema to provide
|
237
|
+
an example for submitters.
|
238
|
+
"""
|
239
|
+
return schema.get(SchemaConstants.SUBMISSION_EXAMPLES, [])
|
240
|
+
|
241
|
+
|
242
|
+
def get_suggested_enum(schema: Dict[str, Any]) -> List[str]:
|
243
|
+
"""Return the suggested enum for a property.
|
244
|
+
|
245
|
+
Custom property that can be manually added to a schema to provide
|
246
|
+
a suggested list of values for submitters.
|
247
|
+
"""
|
248
|
+
return schema.get(SchemaConstants.SUGGESTED_ENUM, [])
|
249
|
+
|
250
|
+
|
251
|
+
def get_dependent_required(schema: Dict[str, Any]) -> Dict[str, List[str]]:
|
252
|
+
"""Return the dependent required properties of a schema."""
|
253
|
+
return schema.get(SchemaConstants.DEPENDENT_REQUIRED, {})
|
254
|
+
|
255
|
+
|
206
256
|
class Schema:
|
207
257
|
|
208
258
|
def __init__(self, schema: dict, type: Optional[str] = None) -> None:
|
dcicutils/structured_data.py
CHANGED
@@ -11,6 +11,7 @@ from webtest.app import TestApp
|
|
11
11
|
from dcicutils.common import OrchestratedApp
|
12
12
|
from dcicutils.data_readers import CsvReader, Excel, RowReader
|
13
13
|
from dcicutils.datetime_utils import normalize_date_string, normalize_datetime_string
|
14
|
+
from dcicutils.file_utils import search_for_file
|
14
15
|
from dcicutils.misc_utils import (create_dict, create_readonly_object, is_uuid, load_json_if,
|
15
16
|
merge_objects, remove_empty_properties, right_trim, split_string,
|
16
17
|
to_boolean, to_enum, to_float, to_integer, VirtualApp)
|
@@ -55,7 +56,7 @@ class StructuredDataSet:
|
|
55
56
|
remove_empty_objects_from_lists: bool = True,
|
56
57
|
ref_lookup_strategy: Optional[Callable] = None,
|
57
58
|
ref_lookup_nocache: bool = False,
|
58
|
-
norefs: bool = False,
|
59
|
+
norefs: bool = False,
|
59
60
|
progress: Optional[Callable] = None,
|
60
61
|
debug_sleep: Optional[str] = None) -> None:
|
61
62
|
self._progress = progress if callable(progress) else None
|
@@ -74,7 +75,6 @@ class StructuredDataSet:
|
|
74
75
|
self._nrows = 0
|
75
76
|
self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None
|
76
77
|
self._norefs = True if norefs is True else False
|
77
|
-
self._merge = True if merge is True else False # New merge functionality (2024-05-25)
|
78
78
|
self._debug_sleep = None
|
79
79
|
if debug_sleep:
|
80
80
|
try:
|
@@ -98,13 +98,13 @@ class StructuredDataSet:
|
|
98
98
|
remove_empty_objects_from_lists: bool = True,
|
99
99
|
ref_lookup_strategy: Optional[Callable] = None,
|
100
100
|
ref_lookup_nocache: bool = False,
|
101
|
-
norefs: bool = False,
|
101
|
+
norefs: bool = False,
|
102
102
|
progress: Optional[Callable] = None,
|
103
103
|
debug_sleep: Optional[str] = None) -> StructuredDataSet:
|
104
104
|
return StructuredDataSet(file=file, portal=portal, schemas=schemas, autoadd=autoadd, order=order, prune=prune,
|
105
105
|
remove_empty_objects_from_lists=remove_empty_objects_from_lists,
|
106
106
|
ref_lookup_strategy=ref_lookup_strategy, ref_lookup_nocache=ref_lookup_nocache,
|
107
|
-
norefs=norefs,
|
107
|
+
norefs=norefs, progress=progress, debug_sleep=debug_sleep)
|
108
108
|
|
109
109
|
def validate(self, force: bool = False) -> None:
|
110
110
|
def data_without_deleted_properties(data: dict) -> dict:
|
@@ -208,6 +208,14 @@ class StructuredDataSet:
|
|
208
208
|
result.append({"type": type_name, "file": file_name})
|
209
209
|
return result
|
210
210
|
|
211
|
+
def upload_files_located(self,
|
212
|
+
location: Union[str, Optional[List[str]]] = None, recursive: bool = False) -> List[str]:
|
213
|
+
upload_files = copy.deepcopy(self.upload_files)
|
214
|
+
for upload_file in upload_files:
|
215
|
+
if file_path := search_for_file(upload_file["file"], location, recursive=recursive, single=True):
|
216
|
+
upload_file["path"] = file_path
|
217
|
+
return upload_files
|
218
|
+
|
211
219
|
@property
|
212
220
|
def nrows(self) -> int:
|
213
221
|
return self._nrows
|
@@ -342,23 +350,18 @@ class StructuredDataSet:
|
|
342
350
|
|
343
351
|
def _load_json_file(self, file: str) -> None:
|
344
352
|
with open(file) as f:
|
345
|
-
|
346
|
-
|
347
|
-
|
353
|
+
file_json = json.load(f)
|
354
|
+
schema_inferred_from_file_name = Schema.type_name(file)
|
355
|
+
if self._portal.get_schema(schema_inferred_from_file_name) is not None:
|
348
356
|
# If the JSON file name looks like a schema name then assume it
|
349
357
|
# contains an object or an array of object of that schema type.
|
350
|
-
|
351
|
-
|
352
|
-
self._add(Schema.type_name(file), data)
|
353
|
-
elif isinstance(data, dict):
|
358
|
+
self._add(Schema.type_name(file), file_json)
|
359
|
+
elif isinstance(file_json, dict):
|
354
360
|
# Otherwise if the JSON file name does not look like a schema name then
|
355
361
|
# assume it a dictionary where each property is the name of a schema, and
|
356
362
|
# which (each property) contains a list of object of that schema type.
|
357
|
-
for schema_name in
|
358
|
-
|
359
|
-
if self._merge: # New merge functionality (2024-05-25)
|
360
|
-
item = self._merge_with_existing_portal_object(item, schema_name)
|
361
|
-
self._add(schema_name, item)
|
363
|
+
for schema_name in file_json:
|
364
|
+
self._add(schema_name, file_json[schema_name])
|
362
365
|
|
363
366
|
def _load_reader(self, reader: RowReader, type_name: str) -> None:
|
364
367
|
schema = None
|
@@ -380,13 +383,11 @@ class StructuredDataSet:
|
|
380
383
|
structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number)
|
381
384
|
if self._autoadd_properties:
|
382
385
|
self._add_properties(structured_row, self._autoadd_properties, schema)
|
383
|
-
if self._merge: # New merge functionality (2024-05-25)
|
384
|
-
structured_row = self._merge_with_existing_portal_object(structured_row, schema_name)
|
385
386
|
if (prune_error := self._prune_structured_row(structured_row)) is not None:
|
386
387
|
self._note_error({"src": create_dict(type=schema_name, row=reader.row_number),
|
387
388
|
"error": prune_error}, "validation")
|
388
389
|
else:
|
389
|
-
self._add(type_name, structured_row)
|
390
|
+
self._add(type_name, structured_row)
|
390
391
|
if self._progress:
|
391
392
|
self._progress({
|
392
393
|
PROGRESS.LOAD_ITEM: self._nrows,
|
@@ -427,18 +428,6 @@ class StructuredDataSet:
|
|
427
428
|
if name not in structured_row and (not schema or schema.data.get("properties", {}).get(name)):
|
428
429
|
structured_row[name] = properties[name]
|
429
430
|
|
430
|
-
def _merge_with_existing_portal_object(self, portal_object: dict, portal_type: str) -> dict:
|
431
|
-
"""
|
432
|
-
Given a Portal object (presumably/in-practice from the given metadata), if there is
|
433
|
-
an existing Portal item, identified by the identifying properties for the given object,
|
434
|
-
then merges the given object into the existing one and returns the result; otherwise
|
435
|
-
just returns the given object. Note that the given object may be CHANGED in place.
|
436
|
-
"""
|
437
|
-
for identifying_path in self._portal.get_identifying_paths(portal_object, portal_type):
|
438
|
-
if existing_portal_object := self._portal.get_metadata(identifying_path, raw=True, raise_exception=False):
|
439
|
-
return merge_objects(existing_portal_object, portal_object, primitive_lists=True)
|
440
|
-
return portal_object
|
441
|
-
|
442
431
|
def _is_ref_lookup_specified_type(ref_lookup_flags: int) -> bool:
|
443
432
|
return (ref_lookup_flags &
|
444
433
|
Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
|
@@ -2,45 +2,39 @@ import re
|
|
2
2
|
from typing import Optional
|
3
3
|
from dcicutils.structured_data import Portal
|
4
4
|
|
5
|
-
# This function is exposed (to smaht-portal/ingester and smaht-submitr) only because previously,
|
6
|
-
# before it was fully developed, we had differing behaviors; but this has been unified; so this
|
7
|
-
# could now be internalized to structured_data, and portal_object_utils (TODO).
|
8
|
-
|
9
5
|
|
10
6
|
def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
|
11
7
|
#
|
12
|
-
# Note this
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
# /submitted_id
|
17
|
-
# /
|
18
|
-
#
|
19
|
-
# /
|
20
|
-
#
|
21
|
-
# /accession
|
22
|
-
# /
|
23
|
-
# /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
|
24
|
-
# /File/accession | /File/SMAFSFXF1RO4 | FOUND |
|
25
|
-
# -----------------------------+-----------------------------------------------+---------------+
|
8
|
+
# FYI: Note this situation WRT object lookups ...
|
9
|
+
#
|
10
|
+
# /{submitted_id} # NOT FOUND
|
11
|
+
# /UnalignedReads/{submitted_id} # OK
|
12
|
+
# /SubmittedFile/{submitted_id} # OK
|
13
|
+
# /File/{submitted_id} # NOT FOUND
|
14
|
+
#
|
15
|
+
# /{accession} # OK
|
16
|
+
# /UnalignedReads/{accession} # NOT FOUND
|
17
|
+
# /SubmittedFile/{accession} # NOT FOUND
|
18
|
+
# /File/{accession} # OK
|
26
19
|
#
|
27
20
|
def ref_validator(schema: Optional[dict],
|
28
21
|
property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
|
29
22
|
"""
|
30
|
-
Returns False iff
|
31
|
-
|
23
|
+
Returns False iff the type represented by the given schema, can NOT be referenced by
|
24
|
+
the given property name with the given property value, otherwise returns None.
|
32
25
|
|
33
|
-
For example, if the schema is for UnalignedReads and the property name
|
34
|
-
return False iff the given property value is NOT a properly
|
35
|
-
will return None, which indicates that the
|
36
|
-
|
37
|
-
|
38
|
-
|
26
|
+
For example, if the schema is for the UnalignedReads type and the property name
|
27
|
+
is accession, then we will return False iff the given property value is NOT a properly
|
28
|
+
formatted accession ID. Otherwise, we will return None, which indicates that the
|
29
|
+
caller (in dcicutils.structured_data.Portal.ref_exists) will continue executing
|
30
|
+
its default behavior, which is to check other ways in which the given type can NOT
|
31
|
+
be referenced by the given value, i.e. it checks other identifying properties for
|
32
|
+
the type and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
39
33
|
|
40
|
-
The goal (in structured_data) being to detect if a type is being referenced in such
|
41
|
-
|
42
|
-
if indeed there any requirements.
|
43
|
-
identifying property for the given type.
|
34
|
+
The goal (in structured_data) being to detect if a type is being referenced in such
|
35
|
+
a way that cannot possibly be allowed, i.e. because none of its identifying types
|
36
|
+
are in the required form (if indeed there any requirements). Note that it is guaranteed
|
37
|
+
that the given property name is indeed an identifying property for the given type.
|
44
38
|
"""
|
45
39
|
if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
|
46
40
|
if (property_format == "accession") and (property_name == "accession"):
|
@@ -68,6 +62,6 @@ def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str
|
|
68
62
|
|
69
63
|
|
70
64
|
# This is here for now because of problems with circular dependencies.
|
71
|
-
# See: smaht-portal/.../schema_formats.py
|
65
|
+
# See: smaht-portal/.../schema_formats.py
|
72
66
|
def _is_accession_id(value: str) -> bool:
|
73
67
|
return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
|
@@ -5,7 +5,7 @@ dcicutils/bundle_utils.py,sha256=ZVQcqlt7Yly8-YbL3A-5DW859_hMWpTL6dXtknEYZIw,346
|
|
5
5
|
dcicutils/captured_output.py,sha256=0hP7sPwleMaYXQAvCfJOxG8Z8T_JJYy8ADp8A5ZoblE,3295
|
6
6
|
dcicutils/cloudformation_utils.py,sha256=MtWJrSTXyiImgbPHgRvfH9bWso20ZPLTFJAfhDQSVj4,13786
|
7
7
|
dcicutils/codebuild_utils.py,sha256=CKpmhJ-Z8gYbkt1I2zyMlKtFdsg7T8lqrx3V5ieta-U,1155
|
8
|
-
dcicutils/command_utils.py,sha256=
|
8
|
+
dcicutils/command_utils.py,sha256=JExll5TMqIcmuiGvuS8q4XDUvoEfi2oSH0E2FVF6suU,15285
|
9
9
|
dcicutils/common.py,sha256=YE8Mt5-vaZWWz4uaChSVhqGFbFtW5QKtnIyOr4zG4vM,3955
|
10
10
|
dcicutils/contribution_scripts.py,sha256=0k5Gw1TumcD5SAcXVkDd6-yvuMEw-jUp5Kfb7FJH6XQ,2015
|
11
11
|
dcicutils/contribution_utils.py,sha256=vYLS1JUB3sKd24BUxZ29qUBqYeQBLK9cwo8x3k64uPg,25653
|
@@ -44,11 +44,11 @@ dcicutils/license_policies/park-lab-gpl-pipeline.jsonc,sha256=vLZkwm3Js-kjV44nug
|
|
44
44
|
dcicutils/license_policies/park-lab-pipeline.jsonc,sha256=9qlY0ASy3iUMQlr3gorVcXrSfRHnVGbLhkS427UaRy4,283
|
45
45
|
dcicutils/license_utils.py,sha256=d1cq6iwv5Ju-VjdoINi6q7CPNNL7Oz6rcJdLMY38RX0,46978
|
46
46
|
dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
|
47
|
-
dcicutils/misc_utils.py,sha256
|
47
|
+
dcicutils/misc_utils.py,sha256=zHwsxxEn24muLBP7mDvMa8I9VdMejwW8HMuCL5xbhhw,107690
|
48
48
|
dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
|
49
49
|
dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
|
50
|
-
dcicutils/portal_object_utils.py,sha256=
|
51
|
-
dcicutils/portal_utils.py,sha256=
|
50
|
+
dcicutils/portal_object_utils.py,sha256=gDXRgPsRvqCFwbC8WatsuflAxNiigOnqr0Hi93k3AgE,15422
|
51
|
+
dcicutils/portal_utils.py,sha256=DYyE5o15GekDgzpJWas9iS7klAYbjJZUPW0G42McArk,30779
|
52
52
|
dcicutils/progress_bar.py,sha256=UT7lxb-rVF_gp4yjY2Tg4eun1naaH__hB4_v3O85bcE,19468
|
53
53
|
dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
|
54
54
|
dcicutils/qa_checkers.py,sha256=cdXjeL0jCDFDLT8VR8Px78aS10hwNISOO5G_Zv2TZ6M,20534
|
@@ -56,7 +56,7 @@ dcicutils/qa_utils.py,sha256=TT0SiJWiuxYvbsIyhK9VO4uV_suxhB6CpuC4qPacCzQ,160208
|
|
56
56
|
dcicutils/redis_tools.py,sha256=qkcSNMtvqkpvts-Cm9gWhneK523Q_oHwhNUud1be1qk,7055
|
57
57
|
dcicutils/redis_utils.py,sha256=VJ-7g8pOZqR1ZCtdcjKz3-6as2DMUcs1b1zG6wSprH4,6462
|
58
58
|
dcicutils/s3_utils.py,sha256=LauLFQGvZLfpBJ81tYMikjLd3SJRz2R_FrL1n4xSlyI,28868
|
59
|
-
dcicutils/schema_utils.py,sha256=
|
59
|
+
dcicutils/schema_utils.py,sha256=GmRm-XqZKJ6qine16SQF1txcby9WougDav_sYmKNs9E,12400
|
60
60
|
dcicutils/scripts/publish_to_pypi.py,sha256=LFzNHIQK2EXFr88YcfctyA_WKEBFc1ElnSjWrCXedPM,13889
|
61
61
|
dcicutils/scripts/run_license_checker.py,sha256=z2keYnRDZsHQbTeo1XORAXSXNJK5axVzL5LjiNqZ7jE,4184
|
62
62
|
dcicutils/scripts/view_portal_object.py,sha256=HZzM44BDcGycO9XTOTZyP-F7PRMZaZrnFfiqiT7Qvqg,29777
|
@@ -64,17 +64,17 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
|
|
64
64
|
dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
|
65
65
|
dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
|
66
66
|
dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
|
67
|
-
dcicutils/structured_data.py,sha256=
|
67
|
+
dcicutils/structured_data.py,sha256=XOMxrmkJohdCAyCJU09uI8ivthTKrtSSYReFbC9VYMs,63058
|
68
68
|
dcicutils/submitr/progress_constants.py,sha256=5bxyX77ql8qEJearfHEvsvXl7D0GuUODW0T65mbRmnE,2895
|
69
|
-
dcicutils/submitr/ref_lookup_strategy.py,sha256=
|
69
|
+
dcicutils/submitr/ref_lookup_strategy.py,sha256=Js2cVznTmgjciLWBPLCvMiwLIHXjDn3jww-gJPjYuFw,3467
|
70
70
|
dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
|
71
71
|
dcicutils/tmpfile_utils.py,sha256=irmN6Otvtxyum-7qr5h9GIzDs9rtFFyUsGQyqJXd_y4,2997
|
72
72
|
dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
|
73
73
|
dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
|
74
74
|
dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
|
75
75
|
dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
|
76
|
-
dcicutils-8.
|
77
|
-
dcicutils-8.
|
78
|
-
dcicutils-8.
|
79
|
-
dcicutils-8.
|
80
|
-
dcicutils-8.
|
76
|
+
dcicutils-8.10.0.0b0.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
|
77
|
+
dcicutils-8.10.0.0b0.dist-info/METADATA,sha256=vcIAVrY7xnIajSS-yHlYgIrl9PGr2EmjUnnG9jDctEQ,3440
|
78
|
+
dcicutils-8.10.0.0b0.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
79
|
+
dcicutils-8.10.0.0b0.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
|
80
|
+
dcicutils-8.10.0.0b0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|