dcicutils 8.10.0.0b0__py3-none-any.whl → 8.10.0.1b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dcicutils/command_utils.py +69 -1
- dcicutils/license_policies/park-lab-common.jsonc +6 -0
- dcicutils/misc_utils.py +41 -10
- dcicutils/portal_object_utils.py +24 -89
- dcicutils/portal_utils.py +237 -36
- dcicutils/schema_utils.py +0 -50
- dcicutils/structured_data.py +31 -20
- dcicutils/submitr/ref_lookup_strategy.py +31 -25
- {dcicutils-8.10.0.0b0.dist-info → dcicutils-8.10.0.1b1.dist-info}/METADATA +1 -1
- {dcicutils-8.10.0.0b0.dist-info → dcicutils-8.10.0.1b1.dist-info}/RECORD +13 -13
- {dcicutils-8.10.0.0b0.dist-info → dcicutils-8.10.0.1b1.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.10.0.0b0.dist-info → dcicutils-8.10.0.1b1.dist-info}/WHEEL +0 -0
- {dcicutils-8.10.0.0b0.dist-info → dcicutils-8.10.0.1b1.dist-info}/entry_points.txt +0 -0
dcicutils/command_utils.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
from __future__ import annotations
|
1
2
|
import contextlib
|
2
3
|
import functools
|
3
4
|
import glob
|
@@ -7,7 +8,7 @@ import re
|
|
7
8
|
import requests
|
8
9
|
import subprocess
|
9
10
|
|
10
|
-
from typing import Optional
|
11
|
+
from typing import Callable, Optional
|
11
12
|
from .exceptions import InvalidParameterError
|
12
13
|
from .lang_utils import there_are
|
13
14
|
from .misc_utils import INPUT, PRINT, environ_bool, print_error_message, decorator
|
@@ -384,3 +385,70 @@ def script_catch_errors():
|
|
384
385
|
message = str(e) # Note: We ignore the type, which isn't intended to be shown.
|
385
386
|
PRINT(message)
|
386
387
|
exit(1)
|
388
|
+
|
389
|
+
|
390
|
+
class Question:
|
391
|
+
"""
|
392
|
+
Supports asking the user (via stdin) a yes/no question, possibly repeatedly; and after
|
393
|
+
some maximum number times of the same answer in a row (consecutively), then asks them
|
394
|
+
if they want to automatically give that same answer to any/all subsequent questions.
|
395
|
+
Supports static/global list of such Question instances, hashed (only) by the question text.
|
396
|
+
"""
|
397
|
+
_static_instances = {}
|
398
|
+
|
399
|
+
@staticmethod
|
400
|
+
def instance(question: Optional[str] = None,
|
401
|
+
max: Optional[int] = None, printf: Optional[Callable] = None) -> Question:
|
402
|
+
question = question if isinstance(question, str) else ""
|
403
|
+
if not (instance := Question._static_instances.get(question)):
|
404
|
+
Question._static_instances[question] = (instance := Question(question, max=max, printf=printf))
|
405
|
+
return instance
|
406
|
+
|
407
|
+
@staticmethod
|
408
|
+
def yes(question: Optional[str] = None,
|
409
|
+
max: Optional[int] = None, printf: Optional[Callable] = None) -> bool:
|
410
|
+
return Question.instance(question, max=max, printf=printf).ask()
|
411
|
+
|
412
|
+
def __init__(self, question: Optional[str] = None,
|
413
|
+
max: Optional[int] = None, printf: Optional[Callable] = None) -> None:
|
414
|
+
self._question = question if isinstance(question, str) else ""
|
415
|
+
self._max = max if isinstance(max, int) and max > 0 else None
|
416
|
+
self._print = printf if callable(printf) else print
|
417
|
+
self._yes_consecutive_count = 0
|
418
|
+
self._no_consecutive_count = 0
|
419
|
+
self._yes_automatic = False
|
420
|
+
self._no_automatic = False
|
421
|
+
|
422
|
+
def ask(self, question: Optional[str] = None) -> bool:
|
423
|
+
|
424
|
+
def question_automatic(value: str) -> bool:
|
425
|
+
nonlocal self
|
426
|
+
RARROW = "▶"
|
427
|
+
LARROW = "◀"
|
428
|
+
if yes_or_no(f"{RARROW}{RARROW}{RARROW}"
|
429
|
+
f" Do you want to answer {value} to all such questions?"
|
430
|
+
f" {LARROW}{LARROW}{LARROW}"):
|
431
|
+
return True
|
432
|
+
self._yes_consecutive_count = 0
|
433
|
+
self._no_consecutive_count = 0
|
434
|
+
|
435
|
+
if self._yes_automatic:
|
436
|
+
return True
|
437
|
+
elif self._no_automatic:
|
438
|
+
return False
|
439
|
+
elif yes_or_no((question if isinstance(question, str) else "") or self._question or "Undefined question"):
|
440
|
+
self._yes_consecutive_count += 1
|
441
|
+
self._no_consecutive_count = 0
|
442
|
+
if (self._no_consecutive_count == 0) and self._max and (self._yes_consecutive_count >= self._max):
|
443
|
+
# Have reached the maximum number of consecutive YES answers; ask if YES to all subsequent.
|
444
|
+
if question_automatic("YES"):
|
445
|
+
self._yes_automatic = True
|
446
|
+
return True
|
447
|
+
else:
|
448
|
+
self._no_consecutive_count += 1
|
449
|
+
self._yes_consecutive_count = 0
|
450
|
+
if (self._yes_consecutive_count == 0) and self._max and (self._no_consecutive_count >= self._max):
|
451
|
+
# Have reached the maximum number of consecutive NO answers; ask if NO to all subsequent.
|
452
|
+
if question_automatic("NO"):
|
453
|
+
self._no_automatic = True
|
454
|
+
return False
|
@@ -248,6 +248,12 @@
|
|
248
248
|
"docutils" // Used only privately as a separate documentation-generation task for ReadTheDocs
|
249
249
|
],
|
250
250
|
|
251
|
+
|
252
|
+
"GNU General Public License v2 (GPLv2)": [
|
253
|
+
"pyinstaller",
|
254
|
+
"pyinstaller-hooks-contrib"
|
255
|
+
],
|
256
|
+
|
251
257
|
"MIT/X11 Derivative": [
|
252
258
|
// The license used by libxkbcommon is complicated and involves numerous included licenses,
|
253
259
|
// but all are permissive.
|
dcicutils/misc_utils.py
CHANGED
@@ -4,6 +4,7 @@ This file contains functions that might be generally useful.
|
|
4
4
|
|
5
5
|
from collections import namedtuple
|
6
6
|
import appdirs
|
7
|
+
from copy import deepcopy
|
7
8
|
import contextlib
|
8
9
|
import datetime
|
9
10
|
import functools
|
@@ -2199,28 +2200,58 @@ def merge_key_value_dict_lists(x, y):
|
|
2199
2200
|
return [key_value_dict(k, v) for k, v in merged.items()]
|
2200
2201
|
|
2201
2202
|
|
2202
|
-
def merge_objects(target: Union[dict, List[Any]], source: Union[dict, List[Any]],
|
2203
|
+
def merge_objects(target: Union[dict, List[Any]], source: Union[dict, List[Any]],
|
2204
|
+
full: bool = False, # deprecated
|
2205
|
+
expand_lists: Optional[bool] = None,
|
2206
|
+
primitive_lists: bool = False,
|
2207
|
+
copy: bool = False, _recursing: bool = False) -> Union[dict, List[Any]]:
|
2203
2208
|
"""
|
2204
|
-
Merges the given source dictionary or list into the target dictionary or list
|
2205
|
-
This MAY well change the given target (dictionary or list) IN PLACE
|
2206
|
-
|
2207
|
-
|
2209
|
+
Merges the given source dictionary or list into the target dictionary or list and returns the
|
2210
|
+
result. This MAY well change the given target (dictionary or list) IN PLACE ... UNLESS the copy
|
2211
|
+
argument is True, then the given target will not change as a local copy is made (and returned).
|
2212
|
+
|
2213
|
+
If the expand_lists argument is True then any target lists longer than the
|
2214
|
+
source be will be filled out with the last element(s) of the source; the full
|
2215
|
+
argument (is deprecated and) is a synomym for this. The default is False.
|
2216
|
+
|
2217
|
+
If the primitive_lists argument is True then lists of primitives (i.e. lists in which
|
2218
|
+
NONE of its elements are dictionaries, lists, or tuples) will themselves be treated
|
2219
|
+
like primitives, meaning the whole of a source list will replace the corresponding
|
2220
|
+
target; otherwise they will be merged normally, meaning each element of a source list
|
2221
|
+
will be merged, recursively, into the corresponding target list. The default is False.
|
2208
2222
|
"""
|
2223
|
+
def is_primitive_list(value: Any) -> bool: # noqa
|
2224
|
+
if not isinstance(value, list):
|
2225
|
+
return False
|
2226
|
+
for item in value:
|
2227
|
+
if isinstance(item, (dict, list, tuple)):
|
2228
|
+
return False
|
2229
|
+
return True
|
2230
|
+
|
2209
2231
|
if target is None:
|
2210
2232
|
return source
|
2233
|
+
if expand_lists not in (True, False):
|
2234
|
+
expand_lists = full is True
|
2235
|
+
if (copy is True) and (_recursing is not True):
|
2236
|
+
target = deepcopy(target)
|
2211
2237
|
if isinstance(target, dict) and isinstance(source, dict) and source:
|
2212
2238
|
for key, value in source.items():
|
2213
|
-
|
2239
|
+
if ((primitive_lists is True) and
|
2240
|
+
(key in target) and is_primitive_list(target[key]) and is_primitive_list(value)): # noqa
|
2241
|
+
target[key] = value
|
2242
|
+
else:
|
2243
|
+
target[key] = merge_objects(target[key], value,
|
2244
|
+
expand_lists=expand_lists, _recursing=True) if key in target else value
|
2214
2245
|
elif isinstance(target, list) and isinstance(source, list) and source:
|
2215
2246
|
for i in range(max(len(source), len(target))):
|
2216
2247
|
if i < len(target):
|
2217
2248
|
if i < len(source):
|
2218
|
-
target[i] = merge_objects(target[i], source[i],
|
2219
|
-
elif
|
2220
|
-
target[i] = merge_objects(target[i], source[len(source) - 1],
|
2249
|
+
target[i] = merge_objects(target[i], source[i], expand_lists=expand_lists, _recursing=True)
|
2250
|
+
elif expand_lists is True:
|
2251
|
+
target[i] = merge_objects(target[i], source[len(source) - 1], expand_lists=expand_lists)
|
2221
2252
|
else:
|
2222
2253
|
target.append(source[i])
|
2223
|
-
elif source:
|
2254
|
+
elif source not in (None, {}, []):
|
2224
2255
|
target = source
|
2225
2256
|
return target
|
2226
2257
|
|
dcicutils/portal_object_utils.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
from copy import deepcopy
|
2
2
|
from functools import lru_cache
|
3
|
-
import re
|
4
3
|
from typing import Any, Callable, List, Optional, Tuple, Type, Union
|
5
4
|
from dcicutils.data_readers import RowReader
|
6
5
|
from dcicutils.misc_utils import create_readonly_object
|
@@ -14,11 +13,9 @@ class PortalObject:
|
|
14
13
|
|
15
14
|
_PROPERTY_DELETION_SENTINEL = RowReader.CELL_DELETION_SENTINEL
|
16
15
|
|
17
|
-
def __init__(self, data: dict, portal: Portal = None,
|
18
|
-
schema: Optional[Union[dict, Schema]] = None, type: Optional[str] = None) -> None:
|
16
|
+
def __init__(self, data: dict, portal: Optional[Portal] = None, type: Optional[str] = None) -> None:
|
19
17
|
self._data = data if isinstance(data, dict) else {}
|
20
18
|
self._portal = portal if isinstance(portal, Portal) else None
|
21
|
-
self._schema = schema if isinstance(schema, dict) else (schema.data if isinstance(schema, Schema) else None)
|
22
19
|
self._type = type if isinstance(type, str) else ""
|
23
20
|
|
24
21
|
@property
|
@@ -32,7 +29,7 @@ class PortalObject:
|
|
32
29
|
@property
|
33
30
|
@lru_cache(maxsize=1)
|
34
31
|
def type(self) -> str:
|
35
|
-
return self._type or Portal.get_schema_type(self._data) or
|
32
|
+
return self._type or Portal.get_schema_type(self._data) or ""
|
36
33
|
|
37
34
|
@property
|
38
35
|
@lru_cache(maxsize=1)
|
@@ -47,7 +44,7 @@ class PortalObject:
|
|
47
44
|
@property
|
48
45
|
@lru_cache(maxsize=1)
|
49
46
|
def schema(self) -> Optional[dict]:
|
50
|
-
return self.
|
47
|
+
return self._portal.get_schema(self.type) if self._portal else None
|
51
48
|
|
52
49
|
def copy(self) -> PortalObject:
|
53
50
|
return PortalObject(deepcopy(self.data), portal=self.portal, type=self.type)
|
@@ -59,39 +56,29 @@ class PortalObject:
|
|
59
56
|
Returns the list of all identifying property names of this Portal object which actually have values.
|
60
57
|
Implicitly include "uuid" and "identifier" properties as identifying properties if they are actually
|
61
58
|
properties in the object schema, and favor these (first); defavor "aliases"; no other ordering defined.
|
59
|
+
Changed (2024-05-26) to use portal_utils.get_identifying_property_names; migrating some intricate stuff there.
|
62
60
|
"""
|
63
|
-
|
64
|
-
|
65
|
-
identifying_properties = []
|
66
|
-
for identifying_property in schema_identifying_properties:
|
67
|
-
if identifying_property not in ["uuid", "identifier", "aliases"]:
|
68
|
-
if self._data.get(identifying_property):
|
69
|
-
identifying_properties.append(identifying_property)
|
70
|
-
if self._data.get("identifier"):
|
71
|
-
identifying_properties.insert(0, "identifier")
|
72
|
-
if self._data.get("uuid"):
|
73
|
-
identifying_properties.insert(0, "uuid")
|
74
|
-
if "aliases" in schema_identifying_properties and self._data.get("aliases"):
|
75
|
-
identifying_properties.append("aliases")
|
76
|
-
return identifying_properties or None
|
61
|
+
# Migrating to and unifying this in portal_utils.Portal.get_identifying_paths (2024-05-26).
|
62
|
+
return self._portal.get_identifying_property_names(self.type, portal_object=self._data) if self._portal else []
|
77
63
|
|
78
64
|
@lru_cache(maxsize=8192)
|
79
65
|
def lookup(self, raw: bool = False,
|
80
66
|
ref_lookup_strategy: Optional[Callable] = None) -> Tuple[Optional[PortalObject], Optional[str], int]:
|
67
|
+
if not (identifying_paths := self._get_identifying_paths(ref_lookup_strategy=ref_lookup_strategy)):
|
68
|
+
return None, None, 0
|
81
69
|
nlookups = 0
|
82
70
|
first_identifying_path = None
|
83
71
|
try:
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
)
|
72
|
+
for identifying_path in identifying_paths:
|
73
|
+
if not first_identifying_path:
|
74
|
+
first_identifying_path = identifying_path
|
75
|
+
nlookups += 1
|
76
|
+
if self._portal and (item := self._portal.get(identifying_path, raw=raw)) and (item.status_code == 200):
|
77
|
+
return (
|
78
|
+
PortalObject(item.json(), portal=self._portal, type=self.type if raw else None),
|
79
|
+
identifying_path,
|
80
|
+
nlookups
|
81
|
+
)
|
95
82
|
except Exception:
|
96
83
|
pass
|
97
84
|
return None, first_identifying_path, nlookups
|
@@ -159,64 +146,12 @@ class PortalObject:
|
|
159
146
|
|
160
147
|
@lru_cache(maxsize=1)
|
161
148
|
def _get_identifying_paths(self, ref_lookup_strategy: Optional[Callable] = None) -> Optional[List[str]]:
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
if self.type:
|
169
|
-
identifying_paths.append(f"/{self.type}/{self.uuid}")
|
170
|
-
identifying_paths.append(f"/{self.uuid}")
|
171
|
-
return identifying_paths
|
172
|
-
for identifying_property in identifying_properties:
|
173
|
-
if identifying_value := self._data.get(identifying_property):
|
174
|
-
if identifying_property == "uuid":
|
175
|
-
if self.type:
|
176
|
-
identifying_paths.append(f"/{self.type}/{identifying_value}")
|
177
|
-
identifying_paths.append(f"/{identifying_value}")
|
178
|
-
# For now at least we include the path both with and without the schema type component,
|
179
|
-
# as for some identifying values, it works (only) with, and some, it works (only) without.
|
180
|
-
# For example: If we have FileSet with "accession", an identifying property, with value
|
181
|
-
# SMAFSFXF1RO4 then /SMAFSFXF1RO4 works but /FileSet/SMAFSFXF1RO4 does not; and
|
182
|
-
# conversely using "submitted_id", also an identifying property, with value
|
183
|
-
# UW_FILE-SET_COLO-829BL_HI-C_1 then /UW_FILE-SET_COLO-829BL_HI-C_1 does
|
184
|
-
# not work but /FileSet/UW_FILE-SET_COLO-829BL_HI-C_1 does work.
|
185
|
-
elif isinstance(identifying_value, list):
|
186
|
-
for identifying_value_item in identifying_value:
|
187
|
-
if self.type:
|
188
|
-
identifying_paths.append(f"/{self.type}/{identifying_value_item}")
|
189
|
-
identifying_paths.append(f"/{identifying_value_item}")
|
190
|
-
else:
|
191
|
-
# TODO: Import from somewhere ...
|
192
|
-
lookup_options = 0
|
193
|
-
if schema := self.schema:
|
194
|
-
# TODO: Hook into the ref_lookup_strategy thing in structured_data to make
|
195
|
-
# sure we check accession format (since it does not have a pattern).
|
196
|
-
if callable(ref_lookup_strategy):
|
197
|
-
lookup_options, ref_validator = ref_lookup_strategy(
|
198
|
-
self._portal, self.type, schema, identifying_value)
|
199
|
-
if callable(ref_validator):
|
200
|
-
if ref_validator(schema, identifying_property, identifying_value) is False:
|
201
|
-
continue
|
202
|
-
if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
|
203
|
-
if not re.match(pattern, identifying_value):
|
204
|
-
# If this identifying value is for a (identifying) property which has a
|
205
|
-
# pattern, and the value does NOT match the pattern, then do NOT include
|
206
|
-
# this value as an identifying path, since it cannot possibly be found.
|
207
|
-
continue
|
208
|
-
if not lookup_options:
|
209
|
-
lookup_options = Portal.LOOKUP_DEFAULT
|
210
|
-
if Portal.is_lookup_root_first(lookup_options):
|
211
|
-
identifying_paths.append(f"/{identifying_value}")
|
212
|
-
if Portal.is_lookup_specified_type(lookup_options) and self.type:
|
213
|
-
identifying_paths.append(f"/{self.type}/{identifying_value}")
|
214
|
-
if Portal.is_lookup_root(lookup_options) and not Portal.is_lookup_root_first(lookup_options):
|
215
|
-
identifying_paths.append(f"/{identifying_value}")
|
216
|
-
if Portal.is_lookup_subtypes(lookup_options):
|
217
|
-
for subtype_name in self._portal.get_schema_subtype_names(self.type):
|
218
|
-
identifying_paths.append(f"/{subtype_name}/{identifying_value}")
|
219
|
-
return identifying_paths or None
|
149
|
+
if not self._portal and (uuid := self.uuid):
|
150
|
+
return [f"/{uuid}"]
|
151
|
+
# Migrating to and unifying this in portal_utils.Portal.get_identifying_paths (2024-05-26).
|
152
|
+
return self._portal.get_identifying_paths(self._data,
|
153
|
+
portal_type=self.schema,
|
154
|
+
lookup_strategy=ref_lookup_strategy) if self._portal else None
|
220
155
|
|
221
156
|
def _normalized_refs(self, refs: List[dict]) -> Tuple[PortalObject, int]:
|
222
157
|
"""
|
dcicutils/portal_utils.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from collections import deque
|
2
2
|
from functools import lru_cache
|
3
|
+
from dcicutils.function_cache_decorator import function_cache
|
3
4
|
import io
|
4
5
|
import json
|
5
6
|
from pyramid.config import Configurator as PyramidConfigurator
|
@@ -18,6 +19,7 @@ from wsgiref.simple_server import make_server as wsgi_make_server
|
|
18
19
|
from dcicutils.common import APP_SMAHT, OrchestratedApp, ORCHESTRATED_APPS
|
19
20
|
from dcicutils.ff_utils import get_metadata, get_schema, patch_metadata, post_metadata
|
20
21
|
from dcicutils.misc_utils import to_camel_case, VirtualApp
|
22
|
+
from dcicutils.schema_utils import get_identifying_properties
|
21
23
|
from dcicutils.tmpfile_utils import temporary_file
|
22
24
|
|
23
25
|
Portal = Type["Portal"] # Forward type reference for type hints.
|
@@ -48,15 +50,16 @@ class Portal:
|
|
48
50
|
FILE_TYPE_SCHEMA_NAME = "File"
|
49
51
|
|
50
52
|
# Object lookup strategies; on a per-reference (type/value) basis, used currently ONLY by
|
51
|
-
# structured_data.py; controlled by an optional
|
53
|
+
# structured_data.py; controlled by an optional lookup_strategy callable; default is
|
52
54
|
# lookup at root path but after the specified type path lookup, and then lookup all subtypes;
|
53
55
|
# can choose to lookup root path first, or not lookup root path at all, or not lookup
|
54
|
-
# subtypes at all; the
|
56
|
+
# subtypes at all; the lookup_strategy callable if specified should take a type_name
|
55
57
|
# and value (string) arguements and return an integer of any of the below ORed together.
|
56
58
|
# The main purpose of this is optimization; to minimize portal lookups; since for example,
|
57
59
|
# currently at least, /{type}/{accession} does not work but /{accession} does; so we
|
58
60
|
# currently (smaht-portal/.../ingestion_processors) use LOOKUP_ROOT_FIRST for this.
|
59
61
|
# And current usage NEVER has LOOKUP_SUBTYPES turned OFF; but support just in case.
|
62
|
+
LOOKUP_UNDEFINED = 0
|
60
63
|
LOOKUP_SPECIFIED_TYPE = 0x0001
|
61
64
|
LOOKUP_ROOT = 0x0002
|
62
65
|
LOOKUP_ROOT_FIRST = 0x0004 | LOOKUP_ROOT
|
@@ -205,23 +208,6 @@ class Portal:
|
|
205
208
|
def vapp(self) -> Optional[TestApp]:
|
206
209
|
return self._vapp
|
207
210
|
|
208
|
-
@staticmethod
|
209
|
-
def is_lookup_specified_type(lookup_options: int) -> bool:
|
210
|
-
return (lookup_options &
|
211
|
-
Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
|
212
|
-
|
213
|
-
@staticmethod
|
214
|
-
def is_lookup_root(lookup_options: int) -> bool:
|
215
|
-
return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
|
216
|
-
|
217
|
-
@staticmethod
|
218
|
-
def is_lookup_root_first(lookup_options: int) -> bool:
|
219
|
-
return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
|
220
|
-
|
221
|
-
@staticmethod
|
222
|
-
def is_lookup_subtypes(lookup_options: int) -> bool:
|
223
|
-
return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
|
224
|
-
|
225
211
|
def get(self, url: str, follow: bool = True,
|
226
212
|
raw: bool = False, database: bool = False, raise_for_status: bool = False, **kwargs) -> OptionalResponse:
|
227
213
|
url = self.url(url, raw, database)
|
@@ -305,7 +291,10 @@ class Portal:
|
|
305
291
|
|
306
292
|
@lru_cache(maxsize=100)
|
307
293
|
def get_schema(self, schema_name: str) -> Optional[dict]:
|
308
|
-
|
294
|
+
try:
|
295
|
+
return get_schema(self.schema_name(schema_name), portal_vapp=self.vapp, key=self.key)
|
296
|
+
except Exception:
|
297
|
+
return None
|
309
298
|
|
310
299
|
@lru_cache(maxsize=1)
|
311
300
|
def get_schemas(self) -> dict:
|
@@ -416,6 +405,218 @@ class Portal:
|
|
416
405
|
return []
|
417
406
|
return schemas_super_type_map.get(type_name, [])
|
418
407
|
|
408
|
+
@function_cache(maxsize=100, serialize_key=True)
|
409
|
+
def get_identifying_paths(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None,
|
410
|
+
first_only: bool = False,
|
411
|
+
lookup_strategy: Optional[Union[Callable, bool]] = None) -> List[str]:
|
412
|
+
"""
|
413
|
+
Returns the list of the identifying Portal (URL) paths for the given Portal object. Favors any uuid
|
414
|
+
and identifier based paths and defavors aliases based paths (ala self.get_identifying_property_names);
|
415
|
+
no other ordering defined. Returns an empty list if no identifying properties or otherwise not found.
|
416
|
+
Note that this is a newer version of what was in portal_object_utils and just uses the ref_lookup_stratey
|
417
|
+
module directly, as it no longer needs to be exposed (to smaht-portal/ingester and smaht-submitr) and so
|
418
|
+
this is a first step toward internalizing it to structured_data/portal_utils/portal_object_utils usages.
|
419
|
+
"""
|
420
|
+
def is_lookup_specified_type(lookup_options: int) -> bool:
|
421
|
+
return (lookup_options & Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
|
422
|
+
def is_lookup_root(lookup_options: int) -> bool: # noqa
|
423
|
+
return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
|
424
|
+
def is_lookup_root_first(lookup_options: int) -> bool: # noqa
|
425
|
+
return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
|
426
|
+
def is_lookup_subtypes(lookup_options: int) -> bool: # noqa
|
427
|
+
return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
|
428
|
+
|
429
|
+
results = []
|
430
|
+
if not isinstance(portal_object, dict):
|
431
|
+
return results
|
432
|
+
if not (isinstance(portal_type, str) and portal_type):
|
433
|
+
if isinstance(portal_type, dict):
|
434
|
+
# It appears that the given portal_type is an actual schema dictionary.
|
435
|
+
portal_type = self.schema_name(portal_type.get("title"))
|
436
|
+
if not (isinstance(portal_type, str) and portal_type):
|
437
|
+
if not (portal_type := self.get_schema_type(portal_object)):
|
438
|
+
return results
|
439
|
+
if not callable(lookup_strategy):
|
440
|
+
lookup_strategy = None if lookup_strategy is False else Portal._lookup_strategy
|
441
|
+
for identifying_property in self.get_identifying_property_names(portal_type):
|
442
|
+
if not (identifying_value := portal_object.get(identifying_property)):
|
443
|
+
continue
|
444
|
+
# The get_identifying_property_names call above ensures uuid is first if it is in the object.
|
445
|
+
# And also note that ALL schemas do in fact have identifyingProperties which do in fact have
|
446
|
+
# uuid, except for a couple "Test" ones, and (for some reason) SubmittedItem; otherwise we
|
447
|
+
# might have a special case to check the Portal object explicitly for uuid, but no need.
|
448
|
+
if identifying_property == "uuid":
|
449
|
+
#
|
450
|
+
# Note this idiosyncrasy with Portal paths: the only way we do NOT get a (HTTP 301) redirect
|
451
|
+
# is if we use the lower-case-dashed-plural based version of the path, e.g. all of these:
|
452
|
+
#
|
453
|
+
# - /d13d06c1-218e-4f61-aaf0-91f226248b3c
|
454
|
+
# - /d13d06c1-218e-4f61-aaf0-91f226248b3c/
|
455
|
+
# - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c
|
456
|
+
# - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c/
|
457
|
+
# - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c
|
458
|
+
#
|
459
|
+
# Will result in a (HTTP 301) redirect to:
|
460
|
+
#
|
461
|
+
# - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c/
|
462
|
+
#
|
463
|
+
# Unfortunately, this code here has no reasonable way of getting that lower-case-dashed-plural
|
464
|
+
# based name (e.g. file-formats) from the schema/portal type name (e.g. FileFormat); as the
|
465
|
+
# information is contained, for this example, in the snovault.collection decorator for the
|
466
|
+
# endpoint definition in smaht-portal/.../types/file_format.py. Unfortunately merely because
|
467
|
+
# behind-the-scenes an extra round-trip HTTP request will occur, but happens automatically.
|
468
|
+
# And note the disction of just using /{uuid} here rather than /{type}/{uuid} as in the else
|
469
|
+
# statement below is not really necessary; just here for emphasis that this is all that's needed.
|
470
|
+
#
|
471
|
+
# TODO
|
472
|
+
# Consider (from PR-308) writing a portal API for retrieving possible path formats.
|
473
|
+
#
|
474
|
+
if first_only is True:
|
475
|
+
results.append(f"/{portal_type}/{identifying_value}")
|
476
|
+
else:
|
477
|
+
results.append(f"/{identifying_value}")
|
478
|
+
elif isinstance(identifying_value, list):
|
479
|
+
for identifying_value_item in identifying_value:
|
480
|
+
if identifying_value_item:
|
481
|
+
results.append(f"/{portal_type}/{identifying_value_item}")
|
482
|
+
else:
|
483
|
+
lookup_options = Portal.LOOKUP_UNDEFINED
|
484
|
+
if schema := self.get_schema(portal_type):
|
485
|
+
if callable(lookup_strategy):
|
486
|
+
lookup_options, validator = lookup_strategy(self, portal_type, schema, identifying_value)
|
487
|
+
if callable(validator):
|
488
|
+
if validator(schema, identifying_property, identifying_value) is False:
|
489
|
+
continue
|
490
|
+
if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
|
491
|
+
if not re.match(pattern, identifying_value):
|
492
|
+
# If this identifying value is for a (identifying) property which has a
|
493
|
+
# pattern, and the value does NOT match the pattern, then do NOT include
|
494
|
+
# this value as an identifying path, since it cannot possibly be found.
|
495
|
+
continue
|
496
|
+
if lookup_options == Portal.LOOKUP_UNDEFINED:
|
497
|
+
lookup_options = Portal.LOOKUP_DEFAULT
|
498
|
+
if is_lookup_root_first(lookup_options):
|
499
|
+
results.append(f"/{identifying_value}")
|
500
|
+
if is_lookup_specified_type(lookup_options) and portal_type:
|
501
|
+
results.append(f"/{portal_type}/{identifying_value}")
|
502
|
+
if is_lookup_root(lookup_options) and not is_lookup_root_first(lookup_options):
|
503
|
+
results.append(f"/{identifying_value}")
|
504
|
+
if is_lookup_subtypes(lookup_options):
|
505
|
+
for subtype_name in self.get_schema_subtype_names(portal_type):
|
506
|
+
results.append(f"/{subtype_name}/{identifying_value}")
|
507
|
+
if (first_only is True) and results:
|
508
|
+
return results
|
509
|
+
return results
|
510
|
+
|
511
|
+
@function_cache(maxsize=100, serialize_key=True)
|
512
|
+
def get_identifying_path(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None,
|
513
|
+
lookup_strategy: Optional[Union[Callable, bool]] = None) -> Optional[str]:
|
514
|
+
if identifying_paths := self.get_identifying_paths(portal_object, portal_type, first_only=True,
|
515
|
+
lookup_strategy=lookup_strategy):
|
516
|
+
return identifying_paths[0]
|
517
|
+
return None
|
518
|
+
|
519
|
+
@function_cache(maxsize=100, serialize_key=True)
|
520
|
+
def get_identifying_property_names(self, schema: Union[str, dict],
|
521
|
+
portal_object: Optional[dict] = None) -> List[str]:
|
522
|
+
"""
|
523
|
+
Returns the list of identifying property names for the given Portal schema, which may be
|
524
|
+
either a schema name or a schema object. If a Portal object is also given then restricts this
|
525
|
+
set of identifying properties to those which actually have values within this Portal object.
|
526
|
+
Favors the uuid and identifier property names and defavors the aliases property name; no other
|
527
|
+
ordering imposed. Returns empty list if no identifying properties or otherwise not found.
|
528
|
+
"""
|
529
|
+
results = []
|
530
|
+
if isinstance(schema, str):
|
531
|
+
if not (schema := self.get_schema(schema)):
|
532
|
+
return results
|
533
|
+
elif not isinstance(schema, dict):
|
534
|
+
return results
|
535
|
+
if not (identifying_properties := get_identifying_properties(schema)):
|
536
|
+
return results
|
537
|
+
identifying_properties = list(set(identifying_properties)) # paranoid dedup
|
538
|
+
identifying_properties = [*identifying_properties] # copy so as not to change schema if given
|
539
|
+
favored_identifying_properties = ["uuid", "identifier"]
|
540
|
+
defavored_identifying_properties = ["aliases"]
|
541
|
+
for favored_identifying_property in reversed(favored_identifying_properties):
|
542
|
+
if favored_identifying_property in identifying_properties:
|
543
|
+
identifying_properties.remove(favored_identifying_property)
|
544
|
+
identifying_properties.insert(0, favored_identifying_property)
|
545
|
+
for defavored_identifying_property in defavored_identifying_properties:
|
546
|
+
if defavored_identifying_property in identifying_properties:
|
547
|
+
identifying_properties.remove(defavored_identifying_property)
|
548
|
+
identifying_properties.append(defavored_identifying_property)
|
549
|
+
if isinstance(portal_object, dict):
|
550
|
+
for identifying_property in [*identifying_properties]:
|
551
|
+
if portal_object.get(identifying_property) is None:
|
552
|
+
identifying_properties.remove(identifying_property)
|
553
|
+
return identifying_properties
|
554
|
+
|
555
|
+
@staticmethod
|
556
|
+
def _lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
|
557
|
+
#
|
558
|
+
# Note this slightly odd situation WRT object lookups by submitted_id and accession:
|
559
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
560
|
+
# PATH | EXAMPLE | LOOKUP RESULT |
|
561
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
562
|
+
# /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
563
|
+
# /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
564
|
+
# /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
565
|
+
# /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
566
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
567
|
+
# /accession | /SMAFSFXF1RO4 | FOUND |
|
568
|
+
# /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
|
569
|
+
# /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
|
570
|
+
# /File/accession | /File/SMAFSFXF1RO4 | FOUND |
|
571
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
572
|
+
#
|
573
|
+
def ref_validator(schema: Optional[dict],
|
574
|
+
property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
|
575
|
+
"""
|
576
|
+
Returns False iff objects of type represented by the given schema, CANNOT be referenced with
|
577
|
+
a Portal path using the given property name and its given property value, otherwise returns None.
|
578
|
+
|
579
|
+
For example, if the schema is for UnalignedReads and the property name is accession, then we will
|
580
|
+
return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
|
581
|
+
will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
|
582
|
+
will continue executing its default behavior, which is to check other ways in which the given type
|
583
|
+
CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
|
584
|
+
and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
585
|
+
|
586
|
+
The goal (in structured_data) being to detect if a type is being referenced in such a way that
|
587
|
+
CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
|
588
|
+
if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
|
589
|
+
identifying property for the given type.
|
590
|
+
"""
|
591
|
+
if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
|
592
|
+
if (property_format == "accession") and (property_name == "accession"):
|
593
|
+
if not Portal._is_accession_id(property_value):
|
594
|
+
return False
|
595
|
+
return None
|
596
|
+
|
597
|
+
DEFAULT_RESULT = (Portal.LOOKUP_DEFAULT, ref_validator)
|
598
|
+
if not value:
|
599
|
+
return DEFAULT_RESULT
|
600
|
+
if not schema:
|
601
|
+
if not isinstance(portal, Portal) or not (schema := portal.get_schema(type_name)):
|
602
|
+
return DEFAULT_RESULT
|
603
|
+
if schema_properties := schema.get("properties"):
|
604
|
+
if schema_properties.get("accession") and Portal._is_accession_id(value):
|
605
|
+
# Case: lookup by accession (only by root).
|
606
|
+
return (Portal.LOOKUP_ROOT, ref_validator)
|
607
|
+
elif schema_property_info_submitted_id := schema_properties.get("submitted_id"):
|
608
|
+
if schema_property_pattern_submitted_id := schema_property_info_submitted_id.get("pattern"):
|
609
|
+
if re.match(schema_property_pattern_submitted_id, value):
|
610
|
+
# Case: lookup by submitted_id (only by specified type).
|
611
|
+
return (Portal.LOOKUP_SPECIFIED_TYPE, ref_validator)
|
612
|
+
return DEFAULT_RESULT
|
613
|
+
|
614
|
+
@staticmethod
|
615
|
+
def _is_accession_id(value: str) -> bool:
|
616
|
+
# This is here for now because of problems with circular dependencies.
|
617
|
+
# See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
|
618
|
+
return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
|
619
|
+
|
419
620
|
def url(self, url: str, raw: bool = False, database: bool = False) -> str:
|
420
621
|
if not isinstance(url, str) or not url:
|
421
622
|
return "/"
|
@@ -516,6 +717,22 @@ class Portal:
|
|
516
717
|
response = TestResponseWrapper(response)
|
517
718
|
return response
|
518
719
|
|
720
|
+
@staticmethod
|
721
|
+
def _create_vapp(arg: Union[TestApp, VirtualApp, PyramidRouter, str] = None) -> TestApp:
|
722
|
+
if isinstance(arg, TestApp):
|
723
|
+
return arg
|
724
|
+
elif isinstance(arg, VirtualApp):
|
725
|
+
if not isinstance(arg.wrapped_app, TestApp):
|
726
|
+
raise Exception("Portal._create_vapp VirtualApp argument error.")
|
727
|
+
return arg.wrapped_app
|
728
|
+
if isinstance(arg, PyramidRouter):
|
729
|
+
router = arg
|
730
|
+
elif isinstance(arg, str) or not arg:
|
731
|
+
router = pyramid_get_app(arg or "development.ini", "app")
|
732
|
+
else:
|
733
|
+
raise Exception("Portal._create_vapp argument error.")
|
734
|
+
return TestApp(router, {"HTTP_ACCEPT": Portal.MIME_TYPE_JSON, "REMOTE_USER": "TEST"})
|
735
|
+
|
519
736
|
@staticmethod
|
520
737
|
def create_for_testing(arg: Optional[Union[str, bool, List[dict], dict, Callable]] = None) -> Portal:
|
521
738
|
if isinstance(arg, list) or isinstance(arg, dict) or isinstance(arg, Callable):
|
@@ -547,22 +764,6 @@ class Portal:
|
|
547
764
|
with temporary_file(content=minimal_ini_for_testing, suffix=".ini") as ini_file:
|
548
765
|
return Portal(ini_file)
|
549
766
|
|
550
|
-
@staticmethod
|
551
|
-
def _create_vapp(arg: Union[TestApp, VirtualApp, PyramidRouter, str] = None) -> TestApp:
|
552
|
-
if isinstance(arg, TestApp):
|
553
|
-
return arg
|
554
|
-
elif isinstance(arg, VirtualApp):
|
555
|
-
if not isinstance(arg.wrapped_app, TestApp):
|
556
|
-
raise Exception("Portal._create_vapp VirtualApp argument error.")
|
557
|
-
return arg.wrapped_app
|
558
|
-
if isinstance(arg, PyramidRouter):
|
559
|
-
router = arg
|
560
|
-
elif isinstance(arg, str) or not arg:
|
561
|
-
router = pyramid_get_app(arg or "development.ini", "app")
|
562
|
-
else:
|
563
|
-
raise Exception("Portal._create_vapp argument error.")
|
564
|
-
return TestApp(router, {"HTTP_ACCEPT": Portal.MIME_TYPE_JSON, "REMOTE_USER": "TEST"})
|
565
|
-
|
566
767
|
@staticmethod
|
567
768
|
def _create_router_for_testing(endpoints: Optional[List[Dict[str, Union[str, Callable]]]] = None) -> PyramidRouter:
|
568
769
|
if isinstance(endpoints, dict):
|
dcicutils/schema_utils.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
import os
|
2
2
|
from typing import Any, Dict, List, Optional, Tuple
|
3
|
-
|
4
3
|
from dcicutils.misc_utils import to_camel_case
|
5
4
|
|
6
5
|
|
@@ -9,7 +8,6 @@ class JsonSchemaConstants:
|
|
9
8
|
ARRAY = "array"
|
10
9
|
BOOLEAN = "boolean"
|
11
10
|
DEFAULT = "default"
|
12
|
-
DEPENDENT_REQUIRED = "dependentRequired"
|
13
11
|
ENUM = "enum"
|
14
12
|
FORMAT = "format"
|
15
13
|
INTEGER = "integer"
|
@@ -31,10 +29,6 @@ class EncodedSchemaConstants:
|
|
31
29
|
LINK_TO = "linkTo"
|
32
30
|
MERGE_REF = "$merge"
|
33
31
|
MIXIN_PROPERTIES = "mixinProperties"
|
34
|
-
SUBMISSION_COMMENT = "submissionComment"
|
35
|
-
SUBMISSION_EXAMPLES = "submissionExamples"
|
36
|
-
SUBMITTER_REQUIRED = "submitterRequired"
|
37
|
-
SUGGESTED_ENUM = "suggested_enum"
|
38
32
|
UNIQUE_KEY = "uniqueKey"
|
39
33
|
|
40
34
|
|
@@ -209,50 +203,6 @@ def get_description(schema: Dict[str, Any]) -> str:
|
|
209
203
|
return schema.get(SchemaConstants.DESCRIPTION, "")
|
210
204
|
|
211
205
|
|
212
|
-
def is_submitter_required(schema: Dict[str, Any]) -> bool:
|
213
|
-
"""Return True if the schema is marked as required for submitters.
|
214
|
-
|
215
|
-
Specifically, required for external (i.e. non-admin) submitters.
|
216
|
-
|
217
|
-
This is typically validated within the context of a oneOf, anyOf,
|
218
|
-
or allOf schema on an item type which is used within the team and
|
219
|
-
by external submitters, and is tricky to pick up on automatically.
|
220
|
-
"""
|
221
|
-
return schema.get(SchemaConstants.SUBMITTER_REQUIRED, False)
|
222
|
-
|
223
|
-
|
224
|
-
def get_submission_comment(schema: Dict[str, Any]) -> str:
|
225
|
-
"""Return the submission comment for a property.
|
226
|
-
|
227
|
-
Custom property that can be manually added to a schema to provide
|
228
|
-
additional context for submitters.
|
229
|
-
"""
|
230
|
-
return schema.get(SchemaConstants.SUBMISSION_COMMENT, "")
|
231
|
-
|
232
|
-
|
233
|
-
def get_submission_examples(schema: Dict[str, Any]) -> List[str]:
|
234
|
-
"""Return the submission example for a property.
|
235
|
-
|
236
|
-
Custom property that can be manually added to a schema to provide
|
237
|
-
an example for submitters.
|
238
|
-
"""
|
239
|
-
return schema.get(SchemaConstants.SUBMISSION_EXAMPLES, [])
|
240
|
-
|
241
|
-
|
242
|
-
def get_suggested_enum(schema: Dict[str, Any]) -> List[str]:
|
243
|
-
"""Return the suggested enum for a property.
|
244
|
-
|
245
|
-
Custom property that can be manually added to a schema to provide
|
246
|
-
a suggested list of values for submitters.
|
247
|
-
"""
|
248
|
-
return schema.get(SchemaConstants.SUGGESTED_ENUM, [])
|
249
|
-
|
250
|
-
|
251
|
-
def get_dependent_required(schema: Dict[str, Any]) -> Dict[str, List[str]]:
|
252
|
-
"""Return the dependent required properties of a schema."""
|
253
|
-
return schema.get(SchemaConstants.DEPENDENT_REQUIRED, {})
|
254
|
-
|
255
|
-
|
256
206
|
class Schema:
|
257
207
|
|
258
208
|
def __init__(self, schema: dict, type: Optional[str] = None) -> None:
|
dcicutils/structured_data.py
CHANGED
@@ -11,7 +11,6 @@ from webtest.app import TestApp
|
|
11
11
|
from dcicutils.common import OrchestratedApp
|
12
12
|
from dcicutils.data_readers import CsvReader, Excel, RowReader
|
13
13
|
from dcicutils.datetime_utils import normalize_date_string, normalize_datetime_string
|
14
|
-
from dcicutils.file_utils import search_for_file
|
15
14
|
from dcicutils.misc_utils import (create_dict, create_readonly_object, is_uuid, load_json_if,
|
16
15
|
merge_objects, remove_empty_properties, right_trim, split_string,
|
17
16
|
to_boolean, to_enum, to_float, to_integer, VirtualApp)
|
@@ -56,7 +55,7 @@ class StructuredDataSet:
|
|
56
55
|
remove_empty_objects_from_lists: bool = True,
|
57
56
|
ref_lookup_strategy: Optional[Callable] = None,
|
58
57
|
ref_lookup_nocache: bool = False,
|
59
|
-
norefs: bool = False,
|
58
|
+
norefs: bool = False, merge: bool = False,
|
60
59
|
progress: Optional[Callable] = None,
|
61
60
|
debug_sleep: Optional[str] = None) -> None:
|
62
61
|
self._progress = progress if callable(progress) else None
|
@@ -75,6 +74,7 @@ class StructuredDataSet:
|
|
75
74
|
self._nrows = 0
|
76
75
|
self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None
|
77
76
|
self._norefs = True if norefs is True else False
|
77
|
+
self._merge = True if merge is True else False # New merge functionality (2024-05-25)
|
78
78
|
self._debug_sleep = None
|
79
79
|
if debug_sleep:
|
80
80
|
try:
|
@@ -98,13 +98,13 @@ class StructuredDataSet:
|
|
98
98
|
remove_empty_objects_from_lists: bool = True,
|
99
99
|
ref_lookup_strategy: Optional[Callable] = None,
|
100
100
|
ref_lookup_nocache: bool = False,
|
101
|
-
norefs: bool = False,
|
101
|
+
norefs: bool = False, merge: bool = False,
|
102
102
|
progress: Optional[Callable] = None,
|
103
103
|
debug_sleep: Optional[str] = None) -> StructuredDataSet:
|
104
104
|
return StructuredDataSet(file=file, portal=portal, schemas=schemas, autoadd=autoadd, order=order, prune=prune,
|
105
105
|
remove_empty_objects_from_lists=remove_empty_objects_from_lists,
|
106
106
|
ref_lookup_strategy=ref_lookup_strategy, ref_lookup_nocache=ref_lookup_nocache,
|
107
|
-
norefs=norefs, progress=progress, debug_sleep=debug_sleep)
|
107
|
+
norefs=norefs, merge=merge, progress=progress, debug_sleep=debug_sleep)
|
108
108
|
|
109
109
|
def validate(self, force: bool = False) -> None:
|
110
110
|
def data_without_deleted_properties(data: dict) -> dict:
|
@@ -208,14 +208,6 @@ class StructuredDataSet:
|
|
208
208
|
result.append({"type": type_name, "file": file_name})
|
209
209
|
return result
|
210
210
|
|
211
|
-
def upload_files_located(self,
|
212
|
-
location: Union[str, Optional[List[str]]] = None, recursive: bool = False) -> List[str]:
|
213
|
-
upload_files = copy.deepcopy(self.upload_files)
|
214
|
-
for upload_file in upload_files:
|
215
|
-
if file_path := search_for_file(upload_file["file"], location, recursive=recursive, single=True):
|
216
|
-
upload_file["path"] = file_path
|
217
|
-
return upload_files
|
218
|
-
|
219
211
|
@property
|
220
212
|
def nrows(self) -> int:
|
221
213
|
return self._nrows
|
@@ -350,18 +342,23 @@ class StructuredDataSet:
|
|
350
342
|
|
351
343
|
def _load_json_file(self, file: str) -> None:
|
352
344
|
with open(file) as f:
|
353
|
-
|
354
|
-
|
355
|
-
|
345
|
+
data = json.load(f)
|
346
|
+
if ((schema_name_inferred_from_file_name := Schema.type_name(file)) and
|
347
|
+
(self._portal.get_schema(schema_name_inferred_from_file_name) is not None)): # noqa
|
356
348
|
# If the JSON file name looks like a schema name then assume it
|
357
349
|
# contains an object or an array of object of that schema type.
|
358
|
-
self.
|
359
|
-
|
350
|
+
if self._merge: # New merge functionality (2024-05-25)
|
351
|
+
data = self._merge_with_existing_portal_object(data, schema_name_inferred_from_file_name)
|
352
|
+
self._add(Schema.type_name(file), data)
|
353
|
+
elif isinstance(data, dict):
|
360
354
|
# Otherwise if the JSON file name does not look like a schema name then
|
361
355
|
# assume it a dictionary where each property is the name of a schema, and
|
362
356
|
# which (each property) contains a list of object of that schema type.
|
363
|
-
for schema_name in
|
364
|
-
|
357
|
+
for schema_name in data:
|
358
|
+
item = data[schema_name]
|
359
|
+
if self._merge: # New merge functionality (2024-05-25)
|
360
|
+
item = self._merge_with_existing_portal_object(item, schema_name)
|
361
|
+
self._add(schema_name, item)
|
365
362
|
|
366
363
|
def _load_reader(self, reader: RowReader, type_name: str) -> None:
|
367
364
|
schema = None
|
@@ -383,11 +380,13 @@ class StructuredDataSet:
|
|
383
380
|
structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number)
|
384
381
|
if self._autoadd_properties:
|
385
382
|
self._add_properties(structured_row, self._autoadd_properties, schema)
|
383
|
+
if self._merge: # New merge functionality (2024-05-25)
|
384
|
+
structured_row = self._merge_with_existing_portal_object(structured_row, schema_name)
|
386
385
|
if (prune_error := self._prune_structured_row(structured_row)) is not None:
|
387
386
|
self._note_error({"src": create_dict(type=schema_name, row=reader.row_number),
|
388
387
|
"error": prune_error}, "validation")
|
389
388
|
else:
|
390
|
-
self._add(type_name, structured_row)
|
389
|
+
self._add(type_name, structured_row) # TODO: why type_name and not schema_name?
|
391
390
|
if self._progress:
|
392
391
|
self._progress({
|
393
392
|
PROGRESS.LOAD_ITEM: self._nrows,
|
@@ -428,6 +427,18 @@ class StructuredDataSet:
|
|
428
427
|
if name not in structured_row and (not schema or schema.data.get("properties", {}).get(name)):
|
429
428
|
structured_row[name] = properties[name]
|
430
429
|
|
430
|
+
def _merge_with_existing_portal_object(self, portal_object: dict, portal_type: str) -> dict:
|
431
|
+
"""
|
432
|
+
Given a Portal object (presumably/in-practice from the given metadata), if there is
|
433
|
+
an existing Portal item, identified by the identifying properties for the given object,
|
434
|
+
then merges the given object into the existing one and returns the result; otherwise
|
435
|
+
just returns the given object. Note that the given object may be CHANGED in place.
|
436
|
+
"""
|
437
|
+
for identifying_path in self._portal.get_identifying_paths(portal_object, portal_type):
|
438
|
+
if existing_portal_object := self._portal.get_metadata(identifying_path, raw=True, raise_exception=False):
|
439
|
+
return merge_objects(existing_portal_object, portal_object, primitive_lists=True)
|
440
|
+
return portal_object
|
441
|
+
|
431
442
|
def _is_ref_lookup_specified_type(ref_lookup_flags: int) -> bool:
|
432
443
|
return (ref_lookup_flags &
|
433
444
|
Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
|
@@ -2,39 +2,45 @@ import re
|
|
2
2
|
from typing import Optional
|
3
3
|
from dcicutils.structured_data import Portal
|
4
4
|
|
5
|
+
# This function is exposed (to smaht-portal/ingester and smaht-submitr) only because previously,
|
6
|
+
# before it was fully developed, we had differing behaviors; but this has been unified; so this
|
7
|
+
# could now be internalized to structured_data, and portal_object_utils (TODO).
|
8
|
+
|
5
9
|
|
6
10
|
def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
|
7
11
|
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
# /
|
13
|
-
# /
|
14
|
-
#
|
15
|
-
# /
|
16
|
-
#
|
17
|
-
# /
|
18
|
-
# /
|
12
|
+
# Note this slight odd situation WRT object lookups by submitted_id and accession:
|
13
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
14
|
+
# PATH | EXAMPLE | LOOKUP RESULT |
|
15
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
16
|
+
# /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
17
|
+
# /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
18
|
+
# /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
19
|
+
# /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
20
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
21
|
+
# /accession | /SMAFSFXF1RO4 | FOUND |
|
22
|
+
# /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
|
23
|
+
# /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
|
24
|
+
# /File/accession | /File/SMAFSFXF1RO4 | FOUND |
|
25
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
19
26
|
#
|
20
27
|
def ref_validator(schema: Optional[dict],
|
21
28
|
property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
|
22
29
|
"""
|
23
|
-
Returns False iff
|
24
|
-
the given property name
|
30
|
+
Returns False iff objects of type represented by the given schema, CANNOT be referenced with
|
31
|
+
a Portal path using the given property name and its given property value, otherwise returns None.
|
25
32
|
|
26
|
-
For example, if the schema is for
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
the type and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
33
|
+
For example, if the schema is for UnalignedReads and the property name is accession, then we will
|
34
|
+
return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
|
35
|
+
will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
|
36
|
+
will continue executing its default behavior, which is to check other ways in which the given type
|
37
|
+
CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
|
38
|
+
and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
33
39
|
|
34
|
-
The goal (in structured_data) being to detect if a type is being referenced in such
|
35
|
-
|
36
|
-
|
37
|
-
|
40
|
+
The goal (in structured_data) being to detect if a type is being referenced in such a way that
|
41
|
+
CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
|
42
|
+
if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
|
43
|
+
identifying property for the given type.
|
38
44
|
"""
|
39
45
|
if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
|
40
46
|
if (property_format == "accession") and (property_name == "accession"):
|
@@ -62,6 +68,6 @@ def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str
|
|
62
68
|
|
63
69
|
|
64
70
|
# This is here for now because of problems with circular dependencies.
|
65
|
-
# See: smaht-portal/.../schema_formats.py
|
71
|
+
# See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
|
66
72
|
def _is_accession_id(value: str) -> bool:
|
67
73
|
return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
|
@@ -5,7 +5,7 @@ dcicutils/bundle_utils.py,sha256=ZVQcqlt7Yly8-YbL3A-5DW859_hMWpTL6dXtknEYZIw,346
|
|
5
5
|
dcicutils/captured_output.py,sha256=0hP7sPwleMaYXQAvCfJOxG8Z8T_JJYy8ADp8A5ZoblE,3295
|
6
6
|
dcicutils/cloudformation_utils.py,sha256=MtWJrSTXyiImgbPHgRvfH9bWso20ZPLTFJAfhDQSVj4,13786
|
7
7
|
dcicutils/codebuild_utils.py,sha256=CKpmhJ-Z8gYbkt1I2zyMlKtFdsg7T8lqrx3V5ieta-U,1155
|
8
|
-
dcicutils/command_utils.py,sha256=
|
8
|
+
dcicutils/command_utils.py,sha256=1_h18LGX86sLAkRkH33HNmBkwMb7v2wAh3jL01hzceU,18487
|
9
9
|
dcicutils/common.py,sha256=YE8Mt5-vaZWWz4uaChSVhqGFbFtW5QKtnIyOr4zG4vM,3955
|
10
10
|
dcicutils/contribution_scripts.py,sha256=0k5Gw1TumcD5SAcXVkDd6-yvuMEw-jUp5Kfb7FJH6XQ,2015
|
11
11
|
dcicutils/contribution_utils.py,sha256=vYLS1JUB3sKd24BUxZ29qUBqYeQBLK9cwo8x3k64uPg,25653
|
@@ -39,16 +39,16 @@ dcicutils/lang_utils.py,sha256=MI3K6bPHLUqlkx3s_9jYZfbGbahiQFlpq4rBE3OYMbg,28151
|
|
39
39
|
dcicutils/license_policies/c4-infrastructure.jsonc,sha256=xEQbIN08Y2xh3gSLRtSz9EhAZox1p3kHC4r678hCpss,278
|
40
40
|
dcicutils/license_policies/c4-python-infrastructure.jsonc,sha256=Tkq8P1mKGYlix68I82IFNmasrT4wtSdokOIM-g2B8DQ,296
|
41
41
|
dcicutils/license_policies/park-lab-common-server.jsonc,sha256=aaK-NdFDT8f8z_gBXihZnQJ6g3CAZdGSlHOwUP8HvUQ,5790
|
42
|
-
dcicutils/license_policies/park-lab-common.jsonc,sha256=
|
42
|
+
dcicutils/license_policies/park-lab-common.jsonc,sha256=QyzpPveVr87RMpjrLLhnxLSp4VuEWta1gehMAqgKKig,18995
|
43
43
|
dcicutils/license_policies/park-lab-gpl-pipeline.jsonc,sha256=vLZkwm3Js-kjV44nug3PizRGDLVnDox4CnvDKu5d2oQ,3260
|
44
44
|
dcicutils/license_policies/park-lab-pipeline.jsonc,sha256=9qlY0ASy3iUMQlr3gorVcXrSfRHnVGbLhkS427UaRy4,283
|
45
45
|
dcicutils/license_utils.py,sha256=d1cq6iwv5Ju-VjdoINi6q7CPNNL7Oz6rcJdLMY38RX0,46978
|
46
46
|
dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
|
47
|
-
dcicutils/misc_utils.py,sha256
|
47
|
+
dcicutils/misc_utils.py,sha256=-syqTAj8DESiiP_KHoyBv9VvfboFYB03QbBlmXnBZXw,109423
|
48
48
|
dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
|
49
49
|
dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
|
50
|
-
dcicutils/portal_object_utils.py,sha256=
|
51
|
-
dcicutils/portal_utils.py,sha256=
|
50
|
+
dcicutils/portal_object_utils.py,sha256=Az3n1aL-PQkN5gOFE6ZqC2XkYsqiwKlq7-tZggs1QN4,11062
|
51
|
+
dcicutils/portal_utils.py,sha256=R7v4uQUll34mn-NxyU3qoTouAwWrVDzW6W1zBGSU-M4,44762
|
52
52
|
dcicutils/progress_bar.py,sha256=UT7lxb-rVF_gp4yjY2Tg4eun1naaH__hB4_v3O85bcE,19468
|
53
53
|
dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
|
54
54
|
dcicutils/qa_checkers.py,sha256=cdXjeL0jCDFDLT8VR8Px78aS10hwNISOO5G_Zv2TZ6M,20534
|
@@ -56,7 +56,7 @@ dcicutils/qa_utils.py,sha256=TT0SiJWiuxYvbsIyhK9VO4uV_suxhB6CpuC4qPacCzQ,160208
|
|
56
56
|
dcicutils/redis_tools.py,sha256=qkcSNMtvqkpvts-Cm9gWhneK523Q_oHwhNUud1be1qk,7055
|
57
57
|
dcicutils/redis_utils.py,sha256=VJ-7g8pOZqR1ZCtdcjKz3-6as2DMUcs1b1zG6wSprH4,6462
|
58
58
|
dcicutils/s3_utils.py,sha256=LauLFQGvZLfpBJ81tYMikjLd3SJRz2R_FrL1n4xSlyI,28868
|
59
|
-
dcicutils/schema_utils.py,sha256=
|
59
|
+
dcicutils/schema_utils.py,sha256=IIteRrg-iOJOFU17n2lvKByVdWdiMfuAQ1kf_QIM96Q,10604
|
60
60
|
dcicutils/scripts/publish_to_pypi.py,sha256=LFzNHIQK2EXFr88YcfctyA_WKEBFc1ElnSjWrCXedPM,13889
|
61
61
|
dcicutils/scripts/run_license_checker.py,sha256=z2keYnRDZsHQbTeo1XORAXSXNJK5axVzL5LjiNqZ7jE,4184
|
62
62
|
dcicutils/scripts/view_portal_object.py,sha256=HZzM44BDcGycO9XTOTZyP-F7PRMZaZrnFfiqiT7Qvqg,29777
|
@@ -64,17 +64,17 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
|
|
64
64
|
dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
|
65
65
|
dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
|
66
66
|
dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
|
67
|
-
dcicutils/structured_data.py,sha256=
|
67
|
+
dcicutils/structured_data.py,sha256=HVe1ruXz0vH4nRBOwq0cNrfR4KtqocC4U940KRXM5zY,64160
|
68
68
|
dcicutils/submitr/progress_constants.py,sha256=5bxyX77ql8qEJearfHEvsvXl7D0GuUODW0T65mbRmnE,2895
|
69
|
-
dcicutils/submitr/ref_lookup_strategy.py,sha256=
|
69
|
+
dcicutils/submitr/ref_lookup_strategy.py,sha256=VJN-Oo0LLna6Vo2cu47eC-eU-yUC9NFlQP29xajejVU,4741
|
70
70
|
dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
|
71
71
|
dcicutils/tmpfile_utils.py,sha256=irmN6Otvtxyum-7qr5h9GIzDs9rtFFyUsGQyqJXd_y4,2997
|
72
72
|
dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
|
73
73
|
dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
|
74
74
|
dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
|
75
75
|
dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
|
76
|
-
dcicutils-8.10.0.
|
77
|
-
dcicutils-8.10.0.
|
78
|
-
dcicutils-8.10.0.
|
79
|
-
dcicutils-8.10.0.
|
80
|
-
dcicutils-8.10.0.
|
76
|
+
dcicutils-8.10.0.1b1.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
|
77
|
+
dcicutils-8.10.0.1b1.dist-info/METADATA,sha256=mVkdVaQtLvCiBIwKoT3JL9HYr8fmTgy-TiFlpGyCcZs,3440
|
78
|
+
dcicutils-8.10.0.1b1.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
79
|
+
dcicutils-8.10.0.1b1.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
|
80
|
+
dcicutils-8.10.0.1b1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|