dcicutils 8.9.0__tar.gz → 8.9.0.1b1__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/PKG-INFO +1 -1
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/command_utils.py +69 -1
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/portal_object_utils.py +24 -89
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/portal_utils.py +234 -36
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/structured_data.py +32 -20
- dcicutils-8.9.0.1b1/dcicutils/submitr/ref_lookup_strategy.py +73 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/pyproject.toml +1 -1
- dcicutils-8.9.0/dcicutils/submitr/ref_lookup_strategy.py +0 -67
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/LICENSE.txt +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/README.rst +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/__init__.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/base.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/beanstalk_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/bundle_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/captured_output.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/cloudformation_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/codebuild_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/common.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/contribution_scripts.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/contribution_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/creds_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/data_readers.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/data_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/datetime_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/deployment_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/diff_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/docker_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/ecr_scripts.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/ecr_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/ecs_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/env_base.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/env_manager.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/env_scripts.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/env_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/env_utils_legacy.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/es_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/exceptions.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/ff_mocks.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/ff_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/file_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/function_cache_decorator.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/glacier_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/http_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/jh_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/kibana/dashboards.json +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/kibana/readme.md +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/lang_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/license_policies/c4-infrastructure.jsonc +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/license_policies/c4-python-infrastructure.jsonc +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/license_policies/park-lab-common-server.jsonc +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/license_policies/park-lab-common.jsonc +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/license_policies/park-lab-pipeline.jsonc +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/license_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/log_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/misc_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/obfuscation_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/opensearch_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/progress_bar.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/project_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/qa_checkers.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/qa_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/redis_tools.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/redis_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/s3_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/schema_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/scripts/publish_to_pypi.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/scripts/run_license_checker.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/scripts/view_portal_object.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/secrets_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/sheet_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/snapshot_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/ssl_certificate_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/submitr/progress_constants.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/task_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/tmpfile_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/trace_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/validation_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/variant_utils.py +0 -0
- {dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/zip_utils.py +0 -0
@@ -1,3 +1,4 @@
|
|
1
|
+
from __future__ import annotations
|
1
2
|
import contextlib
|
2
3
|
import functools
|
3
4
|
import glob
|
@@ -7,7 +8,7 @@ import re
|
|
7
8
|
import requests
|
8
9
|
import subprocess
|
9
10
|
|
10
|
-
from typing import Optional
|
11
|
+
from typing import Callable, Optional
|
11
12
|
from .exceptions import InvalidParameterError
|
12
13
|
from .lang_utils import there_are
|
13
14
|
from .misc_utils import INPUT, PRINT, environ_bool, print_error_message, decorator
|
@@ -384,3 +385,70 @@ def script_catch_errors():
|
|
384
385
|
message = str(e) # Note: We ignore the type, which isn't intended to be shown.
|
385
386
|
PRINT(message)
|
386
387
|
exit(1)
|
388
|
+
|
389
|
+
|
390
|
+
class Question:
|
391
|
+
"""
|
392
|
+
Supports asking the user (via stdin) a yes/no question, possibly repeatedly; and after
|
393
|
+
some maximum number times of the same answer in a row (consecutively), then asks them
|
394
|
+
if they want to automatically give that same answer to any/all subsequent questions.
|
395
|
+
Supports static/global list of such Question instances, hashed (only) by the question text.
|
396
|
+
"""
|
397
|
+
_static_instances = {}
|
398
|
+
|
399
|
+
@staticmethod
|
400
|
+
def instance(question: Optional[str] = None,
|
401
|
+
max: Optional[int] = None, printf: Optional[Callable] = None) -> Question:
|
402
|
+
question = question if isinstance(question, str) else ""
|
403
|
+
if not (instance := Question._static_instances.get(question)):
|
404
|
+
Question._static_instances[question] = (instance := Question(question, max=max, printf=printf))
|
405
|
+
return instance
|
406
|
+
|
407
|
+
@staticmethod
|
408
|
+
def yes(question: Optional[str] = None,
|
409
|
+
max: Optional[int] = None, printf: Optional[Callable] = None) -> bool:
|
410
|
+
return Question.instance(question, max=max, printf=printf).ask()
|
411
|
+
|
412
|
+
def __init__(self, question: Optional[str] = None,
|
413
|
+
max: Optional[int] = None, printf: Optional[Callable] = None) -> None:
|
414
|
+
self._question = question if isinstance(question, str) else ""
|
415
|
+
self._max = max if isinstance(max, int) and max > 0 else None
|
416
|
+
self._print = printf if callable(printf) else print
|
417
|
+
self._yes_consecutive_count = 0
|
418
|
+
self._no_consecutive_count = 0
|
419
|
+
self._yes_automatic = False
|
420
|
+
self._no_automatic = False
|
421
|
+
|
422
|
+
def ask(self, question: Optional[str] = None) -> bool:
|
423
|
+
|
424
|
+
def question_automatic(value: str) -> bool:
|
425
|
+
nonlocal self
|
426
|
+
RARROW = "▶"
|
427
|
+
LARROW = "◀"
|
428
|
+
if yes_or_no(f"{RARROW}{RARROW}{RARROW}"
|
429
|
+
f" Do you want to answer {value} to all such questions?"
|
430
|
+
f" {LARROW}{LARROW}{LARROW}"):
|
431
|
+
return True
|
432
|
+
self._yes_consecutive_count = 0
|
433
|
+
self._no_consecutive_count = 0
|
434
|
+
|
435
|
+
if self._yes_automatic:
|
436
|
+
return True
|
437
|
+
elif self._no_automatic:
|
438
|
+
return False
|
439
|
+
elif yes_or_no((question if isinstance(question, str) else "") or self._question or "Undefined question"):
|
440
|
+
self._yes_consecutive_count += 1
|
441
|
+
self._no_consecutive_count = 0
|
442
|
+
if (self._no_consecutive_count == 0) and self._max and (self._yes_consecutive_count >= self._max):
|
443
|
+
# Have reached the maximum number of consecutive YES answers; ask if YES to all subsequent.
|
444
|
+
if question_automatic("YES"):
|
445
|
+
self._yes_automatic = True
|
446
|
+
return True
|
447
|
+
else:
|
448
|
+
self._no_consecutive_count += 1
|
449
|
+
self._yes_consecutive_count = 0
|
450
|
+
if (self._yes_consecutive_count == 0) and self._max and (self._no_consecutive_count >= self._max):
|
451
|
+
# Have reached the maximum number of consecutive NO answers; ask if NO to all subsequent.
|
452
|
+
if question_automatic("NO"):
|
453
|
+
self._no_automatic = True
|
454
|
+
return False
|
@@ -1,6 +1,5 @@
|
|
1
1
|
from copy import deepcopy
|
2
2
|
from functools import lru_cache
|
3
|
-
import re
|
4
3
|
from typing import Any, Callable, List, Optional, Tuple, Type, Union
|
5
4
|
from dcicutils.data_readers import RowReader
|
6
5
|
from dcicutils.misc_utils import create_readonly_object
|
@@ -14,11 +13,9 @@ class PortalObject:
|
|
14
13
|
|
15
14
|
_PROPERTY_DELETION_SENTINEL = RowReader.CELL_DELETION_SENTINEL
|
16
15
|
|
17
|
-
def __init__(self, data: dict, portal: Portal = None,
|
18
|
-
schema: Optional[Union[dict, Schema]] = None, type: Optional[str] = None) -> None:
|
16
|
+
def __init__(self, data: dict, portal: Optional[Portal] = None, type: Optional[str] = None) -> None:
|
19
17
|
self._data = data if isinstance(data, dict) else {}
|
20
18
|
self._portal = portal if isinstance(portal, Portal) else None
|
21
|
-
self._schema = schema if isinstance(schema, dict) else (schema.data if isinstance(schema, Schema) else None)
|
22
19
|
self._type = type if isinstance(type, str) else ""
|
23
20
|
|
24
21
|
@property
|
@@ -32,7 +29,7 @@ class PortalObject:
|
|
32
29
|
@property
|
33
30
|
@lru_cache(maxsize=1)
|
34
31
|
def type(self) -> str:
|
35
|
-
return self._type or Portal.get_schema_type(self._data) or
|
32
|
+
return self._type or Portal.get_schema_type(self._data) or ""
|
36
33
|
|
37
34
|
@property
|
38
35
|
@lru_cache(maxsize=1)
|
@@ -47,7 +44,7 @@ class PortalObject:
|
|
47
44
|
@property
|
48
45
|
@lru_cache(maxsize=1)
|
49
46
|
def schema(self) -> Optional[dict]:
|
50
|
-
return self.
|
47
|
+
return self._portal.get_schema(self.type) if self._portal else None
|
51
48
|
|
52
49
|
def copy(self) -> PortalObject:
|
53
50
|
return PortalObject(deepcopy(self.data), portal=self.portal, type=self.type)
|
@@ -59,39 +56,29 @@ class PortalObject:
|
|
59
56
|
Returns the list of all identifying property names of this Portal object which actually have values.
|
60
57
|
Implicitly include "uuid" and "identifier" properties as identifying properties if they are actually
|
61
58
|
properties in the object schema, and favor these (first); defavor "aliases"; no other ordering defined.
|
59
|
+
Changed (2024-05-26) to use portal_utils.get_identifying_property_names; migrating some intricate stuff there.
|
62
60
|
"""
|
63
|
-
|
64
|
-
|
65
|
-
identifying_properties = []
|
66
|
-
for identifying_property in schema_identifying_properties:
|
67
|
-
if identifying_property not in ["uuid", "identifier", "aliases"]:
|
68
|
-
if self._data.get(identifying_property):
|
69
|
-
identifying_properties.append(identifying_property)
|
70
|
-
if self._data.get("identifier"):
|
71
|
-
identifying_properties.insert(0, "identifier")
|
72
|
-
if self._data.get("uuid"):
|
73
|
-
identifying_properties.insert(0, "uuid")
|
74
|
-
if "aliases" in schema_identifying_properties and self._data.get("aliases"):
|
75
|
-
identifying_properties.append("aliases")
|
76
|
-
return identifying_properties or None
|
61
|
+
# Migrating to and unifying this in portal_utils.Portal.get_identifying_paths (2024-05-26).
|
62
|
+
return self._portal.get_identifying_property_names(self.type, portal_object=self._data) if self._portal else []
|
77
63
|
|
78
64
|
@lru_cache(maxsize=8192)
|
79
65
|
def lookup(self, raw: bool = False,
|
80
66
|
ref_lookup_strategy: Optional[Callable] = None) -> Tuple[Optional[PortalObject], Optional[str], int]:
|
67
|
+
if not (identifying_paths := self._get_identifying_paths(ref_lookup_strategy=ref_lookup_strategy)):
|
68
|
+
return None, None, 0
|
81
69
|
nlookups = 0
|
82
70
|
first_identifying_path = None
|
83
71
|
try:
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
)
|
72
|
+
for identifying_path in identifying_paths:
|
73
|
+
if not first_identifying_path:
|
74
|
+
first_identifying_path = identifying_path
|
75
|
+
nlookups += 1
|
76
|
+
if self._portal and (item := self._portal.get(identifying_path, raw=raw)) and (item.status_code == 200):
|
77
|
+
return (
|
78
|
+
PortalObject(item.json(), portal=self._portal, type=self.type if raw else None),
|
79
|
+
identifying_path,
|
80
|
+
nlookups
|
81
|
+
)
|
95
82
|
except Exception:
|
96
83
|
pass
|
97
84
|
return None, first_identifying_path, nlookups
|
@@ -159,64 +146,12 @@ class PortalObject:
|
|
159
146
|
|
160
147
|
@lru_cache(maxsize=1)
|
161
148
|
def _get_identifying_paths(self, ref_lookup_strategy: Optional[Callable] = None) -> Optional[List[str]]:
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
if self.type:
|
169
|
-
identifying_paths.append(f"/{self.type}/{self.uuid}")
|
170
|
-
identifying_paths.append(f"/{self.uuid}")
|
171
|
-
return identifying_paths
|
172
|
-
for identifying_property in identifying_properties:
|
173
|
-
if identifying_value := self._data.get(identifying_property):
|
174
|
-
if identifying_property == "uuid":
|
175
|
-
if self.type:
|
176
|
-
identifying_paths.append(f"/{self.type}/{identifying_value}")
|
177
|
-
identifying_paths.append(f"/{identifying_value}")
|
178
|
-
# For now at least we include the path both with and without the schema type component,
|
179
|
-
# as for some identifying values, it works (only) with, and some, it works (only) without.
|
180
|
-
# For example: If we have FileSet with "accession", an identifying property, with value
|
181
|
-
# SMAFSFXF1RO4 then /SMAFSFXF1RO4 works but /FileSet/SMAFSFXF1RO4 does not; and
|
182
|
-
# conversely using "submitted_id", also an identifying property, with value
|
183
|
-
# UW_FILE-SET_COLO-829BL_HI-C_1 then /UW_FILE-SET_COLO-829BL_HI-C_1 does
|
184
|
-
# not work but /FileSet/UW_FILE-SET_COLO-829BL_HI-C_1 does work.
|
185
|
-
elif isinstance(identifying_value, list):
|
186
|
-
for identifying_value_item in identifying_value:
|
187
|
-
if self.type:
|
188
|
-
identifying_paths.append(f"/{self.type}/{identifying_value_item}")
|
189
|
-
identifying_paths.append(f"/{identifying_value_item}")
|
190
|
-
else:
|
191
|
-
# TODO: Import from somewhere ...
|
192
|
-
lookup_options = 0
|
193
|
-
if schema := self.schema:
|
194
|
-
# TODO: Hook into the ref_lookup_strategy thing in structured_data to make
|
195
|
-
# sure we check accession format (since it does not have a pattern).
|
196
|
-
if callable(ref_lookup_strategy):
|
197
|
-
lookup_options, ref_validator = ref_lookup_strategy(
|
198
|
-
self._portal, self.type, schema, identifying_value)
|
199
|
-
if callable(ref_validator):
|
200
|
-
if ref_validator(schema, identifying_property, identifying_value) is False:
|
201
|
-
continue
|
202
|
-
if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
|
203
|
-
if not re.match(pattern, identifying_value):
|
204
|
-
# If this identifying value is for a (identifying) property which has a
|
205
|
-
# pattern, and the value does NOT match the pattern, then do NOT include
|
206
|
-
# this value as an identifying path, since it cannot possibly be found.
|
207
|
-
continue
|
208
|
-
if not lookup_options:
|
209
|
-
lookup_options = Portal.LOOKUP_DEFAULT
|
210
|
-
if Portal.is_lookup_root_first(lookup_options):
|
211
|
-
identifying_paths.append(f"/{identifying_value}")
|
212
|
-
if Portal.is_lookup_specified_type(lookup_options) and self.type:
|
213
|
-
identifying_paths.append(f"/{self.type}/{identifying_value}")
|
214
|
-
if Portal.is_lookup_root(lookup_options) and not Portal.is_lookup_root_first(lookup_options):
|
215
|
-
identifying_paths.append(f"/{identifying_value}")
|
216
|
-
if Portal.is_lookup_subtypes(lookup_options):
|
217
|
-
for subtype_name in self._portal.get_schema_subtype_names(self.type):
|
218
|
-
identifying_paths.append(f"/{subtype_name}/{identifying_value}")
|
219
|
-
return identifying_paths or None
|
149
|
+
if not self._portal and (uuid := self.uuid):
|
150
|
+
return [f"/{uuid}"]
|
151
|
+
# Migrating to and unifying this in portal_utils.Portal.get_identifying_paths (2024-05-26).
|
152
|
+
return self._portal.get_identifying_paths(self._data,
|
153
|
+
portal_type=self.schema,
|
154
|
+
lookup_strategy=ref_lookup_strategy) if self._portal else None
|
220
155
|
|
221
156
|
def _normalized_refs(self, refs: List[dict]) -> Tuple[PortalObject, int]:
|
222
157
|
"""
|
@@ -1,5 +1,6 @@
|
|
1
1
|
from collections import deque
|
2
2
|
from functools import lru_cache
|
3
|
+
from dcicutils.function_cache_decorator import function_cache
|
3
4
|
import io
|
4
5
|
import json
|
5
6
|
from pyramid.config import Configurator as PyramidConfigurator
|
@@ -18,6 +19,7 @@ from wsgiref.simple_server import make_server as wsgi_make_server
|
|
18
19
|
from dcicutils.common import APP_SMAHT, OrchestratedApp, ORCHESTRATED_APPS
|
19
20
|
from dcicutils.ff_utils import get_metadata, get_schema, patch_metadata, post_metadata
|
20
21
|
from dcicutils.misc_utils import to_camel_case, VirtualApp
|
22
|
+
from dcicutils.schema_utils import get_identifying_properties
|
21
23
|
from dcicutils.tmpfile_utils import temporary_file
|
22
24
|
|
23
25
|
Portal = Type["Portal"] # Forward type reference for type hints.
|
@@ -48,15 +50,16 @@ class Portal:
|
|
48
50
|
FILE_TYPE_SCHEMA_NAME = "File"
|
49
51
|
|
50
52
|
# Object lookup strategies; on a per-reference (type/value) basis, used currently ONLY by
|
51
|
-
# structured_data.py; controlled by an optional
|
53
|
+
# structured_data.py; controlled by an optional lookup_strategy callable; default is
|
52
54
|
# lookup at root path but after the specified type path lookup, and then lookup all subtypes;
|
53
55
|
# can choose to lookup root path first, or not lookup root path at all, or not lookup
|
54
|
-
# subtypes at all; the
|
56
|
+
# subtypes at all; the lookup_strategy callable if specified should take a type_name
|
55
57
|
# and value (string) arguements and return an integer of any of the below ORed together.
|
56
58
|
# The main purpose of this is optimization; to minimize portal lookups; since for example,
|
57
59
|
# currently at least, /{type}/{accession} does not work but /{accession} does; so we
|
58
60
|
# currently (smaht-portal/.../ingestion_processors) use LOOKUP_ROOT_FIRST for this.
|
59
61
|
# And current usage NEVER has LOOKUP_SUBTYPES turned OFF; but support just in case.
|
62
|
+
LOOKUP_UNDEFINED = 0
|
60
63
|
LOOKUP_SPECIFIED_TYPE = 0x0001
|
61
64
|
LOOKUP_ROOT = 0x0002
|
62
65
|
LOOKUP_ROOT_FIRST = 0x0004 | LOOKUP_ROOT
|
@@ -205,23 +208,6 @@ class Portal:
|
|
205
208
|
def vapp(self) -> Optional[TestApp]:
|
206
209
|
return self._vapp
|
207
210
|
|
208
|
-
@staticmethod
|
209
|
-
def is_lookup_specified_type(lookup_options: int) -> bool:
|
210
|
-
return (lookup_options &
|
211
|
-
Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
|
212
|
-
|
213
|
-
@staticmethod
|
214
|
-
def is_lookup_root(lookup_options: int) -> bool:
|
215
|
-
return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
|
216
|
-
|
217
|
-
@staticmethod
|
218
|
-
def is_lookup_root_first(lookup_options: int) -> bool:
|
219
|
-
return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
|
220
|
-
|
221
|
-
@staticmethod
|
222
|
-
def is_lookup_subtypes(lookup_options: int) -> bool:
|
223
|
-
return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
|
224
|
-
|
225
211
|
def get(self, url: str, follow: bool = True,
|
226
212
|
raw: bool = False, database: bool = False, raise_for_status: bool = False, **kwargs) -> OptionalResponse:
|
227
213
|
url = self.url(url, raw, database)
|
@@ -305,7 +291,10 @@ class Portal:
|
|
305
291
|
|
306
292
|
@lru_cache(maxsize=100)
|
307
293
|
def get_schema(self, schema_name: str) -> Optional[dict]:
|
308
|
-
|
294
|
+
try:
|
295
|
+
return get_schema(self.schema_name(schema_name), portal_vapp=self.vapp, key=self.key)
|
296
|
+
except Exception:
|
297
|
+
return None
|
309
298
|
|
310
299
|
@lru_cache(maxsize=1)
|
311
300
|
def get_schemas(self) -> dict:
|
@@ -416,6 +405,215 @@ class Portal:
|
|
416
405
|
return []
|
417
406
|
return schemas_super_type_map.get(type_name, [])
|
418
407
|
|
408
|
+
@function_cache(maxsize=100, serialize_key=True)
|
409
|
+
def get_identifying_paths(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None,
|
410
|
+
first_only: bool = False,
|
411
|
+
lookup_strategy: Optional[Union[Callable, bool]] = None) -> List[str]:
|
412
|
+
"""
|
413
|
+
Returns the list of the identifying Portal (URL) paths for the given Portal object. Favors any uuid
|
414
|
+
and identifier based paths and defavors aliases based paths (ala self.get_identifying_property_names);
|
415
|
+
no other ordering defined. Returns an empty list if no identifying properties or otherwise not found.
|
416
|
+
Note that this is a newer version of what was in portal_object_utils and just uses the ref_lookup_stratey
|
417
|
+
module directly, as it no longer needs to be exposed (to smaht-portal/ingester and smaht-submitr) and so
|
418
|
+
this is a first step toward internalizing it to structured_data/portal_utils/portal_object_utils usages.
|
419
|
+
"""
|
420
|
+
def is_lookup_specified_type(lookup_options: int) -> bool:
|
421
|
+
return (lookup_options & Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
|
422
|
+
def is_lookup_root(lookup_options: int) -> bool: # noqa
|
423
|
+
return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
|
424
|
+
def is_lookup_root_first(lookup_options: int) -> bool: # noqa
|
425
|
+
return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
|
426
|
+
def is_lookup_subtypes(lookup_options: int) -> bool: # noqa
|
427
|
+
return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
|
428
|
+
|
429
|
+
results = []
|
430
|
+
if not isinstance(portal_object, dict):
|
431
|
+
return results
|
432
|
+
if not (isinstance(portal_type, str) and portal_type):
|
433
|
+
if isinstance(portal_type, dict):
|
434
|
+
# It appears that the given portal_type is an actual schema dictionary.
|
435
|
+
portal_type = self.schema_name(portal_type.get("title"))
|
436
|
+
if not (isinstance(portal_type, str) and portal_type):
|
437
|
+
if not (portal_type := self.get_schema_type(portal_object)):
|
438
|
+
return results
|
439
|
+
if not callable(lookup_strategy):
|
440
|
+
lookup_strategy = None if lookup_strategy is False else Portal._lookup_strategy
|
441
|
+
for identifying_property in self.get_identifying_property_names(portal_type):
|
442
|
+
if not (identifying_value := portal_object.get(identifying_property)):
|
443
|
+
continue
|
444
|
+
# The get_identifying_property_names call above ensures uuid is first if it is in the object.
|
445
|
+
# And also note that ALL schemas do in fact have identifyingProperties which do in fact have
|
446
|
+
# uuid, except for a couple "Test" ones, and (for some reason) SubmittedItem; otherwise we
|
447
|
+
# might have a special case to check the Portal object explicitly for uuid, but no need.
|
448
|
+
if identifying_property == "uuid":
|
449
|
+
#
|
450
|
+
# Note this idiosyncrasy with Portal paths: the only way we do NOT get a (HTTP 301) redirect
|
451
|
+
# is if we use the lower-case-dashed-plural based version of the path, e.g. all of these:
|
452
|
+
#
|
453
|
+
# - /d13d06c1-218e-4f61-aaf0-91f226248b3c
|
454
|
+
# - /d13d06c1-218e-4f61-aaf0-91f226248b3c/
|
455
|
+
# - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c
|
456
|
+
# - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c/
|
457
|
+
# - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c
|
458
|
+
#
|
459
|
+
# Will result in a (HTTP 301) redirect to:
|
460
|
+
#
|
461
|
+
# - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c/
|
462
|
+
#
|
463
|
+
# Unfortunately, this code here has no reasonable way of getting that lower-case-dashed-plural
|
464
|
+
# based name (e.g. file-formats) from the schema/portal type name (e.g. FileFormat); as the
|
465
|
+
# information is contained, for this example, in the snovault.collection decorator for the
|
466
|
+
# endpoint definition in smaht-portal/.../types/file_format.py. Unfortunately merely because
|
467
|
+
# behind-the-scenes an extra round-trip HTTP request will occur, but happens automatically.
|
468
|
+
# And note the disction of just using /{uuid} here rather than /{type}/{uuid} as in the else
|
469
|
+
# statement below is not really necessary; just here for emphasis that this is all that's needed.
|
470
|
+
#
|
471
|
+
if first_only is True:
|
472
|
+
results.append(f"/{portal_type}/{identifying_value}")
|
473
|
+
else:
|
474
|
+
results.append(f"/{identifying_value}")
|
475
|
+
elif isinstance(identifying_value, list):
|
476
|
+
for identifying_value_item in identifying_value:
|
477
|
+
if identifying_value_item:
|
478
|
+
results.append(f"/{portal_type}/{identifying_value_item}")
|
479
|
+
else:
|
480
|
+
lookup_options = Portal.LOOKUP_UNDEFINED
|
481
|
+
if schema := self.get_schema(portal_type):
|
482
|
+
if callable(lookup_strategy):
|
483
|
+
lookup_options, validator = lookup_strategy(self, portal_type, schema, identifying_value)
|
484
|
+
if callable(validator):
|
485
|
+
if validator(schema, identifying_property, identifying_value) is False:
|
486
|
+
continue
|
487
|
+
if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
|
488
|
+
if not re.match(pattern, identifying_value):
|
489
|
+
# If this identifying value is for a (identifying) property which has a
|
490
|
+
# pattern, and the value does NOT match the pattern, then do NOT include
|
491
|
+
# this value as an identifying path, since it cannot possibly be found.
|
492
|
+
continue
|
493
|
+
if lookup_options == Portal.LOOKUP_UNDEFINED:
|
494
|
+
lookup_options = Portal.LOOKUP_DEFAULT
|
495
|
+
if is_lookup_root_first(lookup_options):
|
496
|
+
results.append(f"/{identifying_value}")
|
497
|
+
if is_lookup_specified_type(lookup_options) and portal_type:
|
498
|
+
results.append(f"/{portal_type}/{identifying_value}")
|
499
|
+
if is_lookup_root(lookup_options) and not is_lookup_root_first(lookup_options):
|
500
|
+
results.append(f"/{identifying_value}")
|
501
|
+
if is_lookup_subtypes(lookup_options):
|
502
|
+
for subtype_name in self.get_schema_subtype_names(portal_type):
|
503
|
+
results.append(f"/{subtype_name}/{identifying_value}")
|
504
|
+
if (first_only is True) and results:
|
505
|
+
return results
|
506
|
+
return results
|
507
|
+
|
508
|
+
@function_cache(maxsize=100, serialize_key=True)
|
509
|
+
def get_identifying_path(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None,
|
510
|
+
lookup_strategy: Optional[Union[Callable, bool]] = None) -> Optional[str]:
|
511
|
+
if identifying_paths := self.get_identifying_paths(portal_object, portal_type, first_only=True,
|
512
|
+
lookup_strategy=lookup_strategy):
|
513
|
+
return identifying_paths[0]
|
514
|
+
return None
|
515
|
+
|
516
|
+
@function_cache(maxsize=100, serialize_key=True)
|
517
|
+
def get_identifying_property_names(self, schema: Union[str, dict],
|
518
|
+
portal_object: Optional[dict] = None) -> List[str]:
|
519
|
+
"""
|
520
|
+
Returns the list of identifying property names for the given Portal schema, which may be
|
521
|
+
either a schema name or a schema object. If a Portal object is also given then restricts this
|
522
|
+
set of identifying properties to those which actually have values within this Portal object.
|
523
|
+
Favors the uuid and identifier property names and defavors the aliases property name; no other
|
524
|
+
ordering imposed. Returns empty list if no identifying properties or otherwise not found.
|
525
|
+
"""
|
526
|
+
results = []
|
527
|
+
if isinstance(schema, str):
|
528
|
+
if not (schema := self.get_schema(schema)):
|
529
|
+
return results
|
530
|
+
elif not isinstance(schema, dict):
|
531
|
+
return results
|
532
|
+
if not (identifying_properties := get_identifying_properties(schema)):
|
533
|
+
return results
|
534
|
+
identifying_properties = list(set(identifying_properties)) # paranoid dedup
|
535
|
+
identifying_properties = [*identifying_properties] # copy so as not to change schema if given
|
536
|
+
favored_identifying_properties = ["uuid", "identifier"]
|
537
|
+
defavored_identifying_properties = ["aliases"]
|
538
|
+
for favored_identifying_property in reversed(favored_identifying_properties):
|
539
|
+
if favored_identifying_property in identifying_properties:
|
540
|
+
identifying_properties.remove(favored_identifying_property)
|
541
|
+
identifying_properties.insert(0, favored_identifying_property)
|
542
|
+
for defavored_identifying_property in defavored_identifying_properties:
|
543
|
+
if defavored_identifying_property in identifying_properties:
|
544
|
+
identifying_properties.remove(defavored_identifying_property)
|
545
|
+
identifying_properties.append(defavored_identifying_property)
|
546
|
+
if isinstance(portal_object, dict):
|
547
|
+
for identifying_property in [*identifying_properties]:
|
548
|
+
if portal_object.get(identifying_property) is None:
|
549
|
+
identifying_properties.remove(identifying_property)
|
550
|
+
return identifying_properties
|
551
|
+
|
552
|
+
@staticmethod
|
553
|
+
def _lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
|
554
|
+
#
|
555
|
+
# Note this slightly odd situation WRT object lookups by submitted_id and accession:
|
556
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
557
|
+
# PATH | EXAMPLE | LOOKUP RESULT |
|
558
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
559
|
+
# /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
560
|
+
# /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
561
|
+
# /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
562
|
+
# /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
563
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
564
|
+
# /accession | /SMAFSFXF1RO4 | FOUND |
|
565
|
+
# /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
|
566
|
+
# /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
|
567
|
+
# /File/accession | /File/SMAFSFXF1RO4 | FOUND |
|
568
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
569
|
+
#
|
570
|
+
def ref_validator(schema: Optional[dict],
|
571
|
+
property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
|
572
|
+
"""
|
573
|
+
Returns False iff objects of type represented by the given schema, CANNOT be referenced with
|
574
|
+
a Portal path using the given property name and its given property value, otherwise returns None.
|
575
|
+
|
576
|
+
For example, if the schema is for UnalignedReads and the property name is accession, then we will
|
577
|
+
return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
|
578
|
+
will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
|
579
|
+
will continue executing its default behavior, which is to check other ways in which the given type
|
580
|
+
CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
|
581
|
+
and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
582
|
+
|
583
|
+
The goal (in structured_data) being to detect if a type is being referenced in such a way that
|
584
|
+
CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
|
585
|
+
if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
|
586
|
+
identifying property for the given type.
|
587
|
+
"""
|
588
|
+
if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
|
589
|
+
if (property_format == "accession") and (property_name == "accession"):
|
590
|
+
if not Portal._is_accession_id(property_value):
|
591
|
+
return False
|
592
|
+
return None
|
593
|
+
|
594
|
+
DEFAULT_RESULT = (Portal.LOOKUP_DEFAULT, ref_validator)
|
595
|
+
if not value:
|
596
|
+
return DEFAULT_RESULT
|
597
|
+
if not schema:
|
598
|
+
if not isinstance(portal, Portal) or not (schema := portal.get_schema(type_name)):
|
599
|
+
return DEFAULT_RESULT
|
600
|
+
if schema_properties := schema.get("properties"):
|
601
|
+
if schema_properties.get("accession") and Portal._is_accession_id(value):
|
602
|
+
# Case: lookup by accession (only by root).
|
603
|
+
return (Portal.LOOKUP_ROOT, ref_validator)
|
604
|
+
elif schema_property_info_submitted_id := schema_properties.get("submitted_id"):
|
605
|
+
if schema_property_pattern_submitted_id := schema_property_info_submitted_id.get("pattern"):
|
606
|
+
if re.match(schema_property_pattern_submitted_id, value):
|
607
|
+
# Case: lookup by submitted_id (only by specified type).
|
608
|
+
return (Portal.LOOKUP_SPECIFIED_TYPE, ref_validator)
|
609
|
+
return DEFAULT_RESULT
|
610
|
+
|
611
|
+
@staticmethod
|
612
|
+
def _is_accession_id(value: str) -> bool:
|
613
|
+
# This is here for now because of problems with circular dependencies.
|
614
|
+
# See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
|
615
|
+
return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
|
616
|
+
|
419
617
|
def url(self, url: str, raw: bool = False, database: bool = False) -> str:
|
420
618
|
if not isinstance(url, str) or not url:
|
421
619
|
return "/"
|
@@ -516,6 +714,22 @@ class Portal:
|
|
516
714
|
response = TestResponseWrapper(response)
|
517
715
|
return response
|
518
716
|
|
717
|
+
@staticmethod
|
718
|
+
def _create_vapp(arg: Union[TestApp, VirtualApp, PyramidRouter, str] = None) -> TestApp:
|
719
|
+
if isinstance(arg, TestApp):
|
720
|
+
return arg
|
721
|
+
elif isinstance(arg, VirtualApp):
|
722
|
+
if not isinstance(arg.wrapped_app, TestApp):
|
723
|
+
raise Exception("Portal._create_vapp VirtualApp argument error.")
|
724
|
+
return arg.wrapped_app
|
725
|
+
if isinstance(arg, PyramidRouter):
|
726
|
+
router = arg
|
727
|
+
elif isinstance(arg, str) or not arg:
|
728
|
+
router = pyramid_get_app(arg or "development.ini", "app")
|
729
|
+
else:
|
730
|
+
raise Exception("Portal._create_vapp argument error.")
|
731
|
+
return TestApp(router, {"HTTP_ACCEPT": Portal.MIME_TYPE_JSON, "REMOTE_USER": "TEST"})
|
732
|
+
|
519
733
|
@staticmethod
|
520
734
|
def create_for_testing(arg: Optional[Union[str, bool, List[dict], dict, Callable]] = None) -> Portal:
|
521
735
|
if isinstance(arg, list) or isinstance(arg, dict) or isinstance(arg, Callable):
|
@@ -547,22 +761,6 @@ class Portal:
|
|
547
761
|
with temporary_file(content=minimal_ini_for_testing, suffix=".ini") as ini_file:
|
548
762
|
return Portal(ini_file)
|
549
763
|
|
550
|
-
@staticmethod
|
551
|
-
def _create_vapp(arg: Union[TestApp, VirtualApp, PyramidRouter, str] = None) -> TestApp:
|
552
|
-
if isinstance(arg, TestApp):
|
553
|
-
return arg
|
554
|
-
elif isinstance(arg, VirtualApp):
|
555
|
-
if not isinstance(arg.wrapped_app, TestApp):
|
556
|
-
raise Exception("Portal._create_vapp VirtualApp argument error.")
|
557
|
-
return arg.wrapped_app
|
558
|
-
if isinstance(arg, PyramidRouter):
|
559
|
-
router = arg
|
560
|
-
elif isinstance(arg, str) or not arg:
|
561
|
-
router = pyramid_get_app(arg or "development.ini", "app")
|
562
|
-
else:
|
563
|
-
raise Exception("Portal._create_vapp argument error.")
|
564
|
-
return TestApp(router, {"HTTP_ACCEPT": Portal.MIME_TYPE_JSON, "REMOTE_USER": "TEST"})
|
565
|
-
|
566
764
|
@staticmethod
|
567
765
|
def _create_router_for_testing(endpoints: Optional[List[Dict[str, Union[str, Callable]]]] = None) -> PyramidRouter:
|
568
766
|
if isinstance(endpoints, dict):
|
@@ -11,7 +11,6 @@ from webtest.app import TestApp
|
|
11
11
|
from dcicutils.common import OrchestratedApp
|
12
12
|
from dcicutils.data_readers import CsvReader, Excel, RowReader
|
13
13
|
from dcicutils.datetime_utils import normalize_date_string, normalize_datetime_string
|
14
|
-
from dcicutils.file_utils import search_for_file
|
15
14
|
from dcicutils.misc_utils import (create_dict, create_readonly_object, is_uuid, load_json_if,
|
16
15
|
merge_objects, remove_empty_properties, right_trim, split_string,
|
17
16
|
to_boolean, to_enum, to_float, to_integer, VirtualApp)
|
@@ -56,7 +55,7 @@ class StructuredDataSet:
|
|
56
55
|
remove_empty_objects_from_lists: bool = True,
|
57
56
|
ref_lookup_strategy: Optional[Callable] = None,
|
58
57
|
ref_lookup_nocache: bool = False,
|
59
|
-
norefs: bool = False,
|
58
|
+
norefs: bool = False, merge: bool = False,
|
60
59
|
progress: Optional[Callable] = None,
|
61
60
|
debug_sleep: Optional[str] = None) -> None:
|
62
61
|
self._progress = progress if callable(progress) else None
|
@@ -75,6 +74,7 @@ class StructuredDataSet:
|
|
75
74
|
self._nrows = 0
|
76
75
|
self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None
|
77
76
|
self._norefs = True if norefs is True else False
|
77
|
+
self._merge = True if merge is True else False
|
78
78
|
self._debug_sleep = None
|
79
79
|
if debug_sleep:
|
80
80
|
try:
|
@@ -98,13 +98,13 @@ class StructuredDataSet:
|
|
98
98
|
remove_empty_objects_from_lists: bool = True,
|
99
99
|
ref_lookup_strategy: Optional[Callable] = None,
|
100
100
|
ref_lookup_nocache: bool = False,
|
101
|
-
norefs: bool = False,
|
101
|
+
norefs: bool = False, merge: bool = False,
|
102
102
|
progress: Optional[Callable] = None,
|
103
103
|
debug_sleep: Optional[str] = None) -> StructuredDataSet:
|
104
104
|
return StructuredDataSet(file=file, portal=portal, schemas=schemas, autoadd=autoadd, order=order, prune=prune,
|
105
105
|
remove_empty_objects_from_lists=remove_empty_objects_from_lists,
|
106
106
|
ref_lookup_strategy=ref_lookup_strategy, ref_lookup_nocache=ref_lookup_nocache,
|
107
|
-
norefs=norefs, progress=progress, debug_sleep=debug_sleep)
|
107
|
+
norefs=norefs, merge=merge, progress=progress, debug_sleep=debug_sleep)
|
108
108
|
|
109
109
|
def validate(self, force: bool = False) -> None:
|
110
110
|
def data_without_deleted_properties(data: dict) -> dict:
|
@@ -208,14 +208,6 @@ class StructuredDataSet:
|
|
208
208
|
result.append({"type": type_name, "file": file_name})
|
209
209
|
return result
|
210
210
|
|
211
|
-
def upload_files_located(self,
|
212
|
-
location: Union[str, Optional[List[str]]] = None, recursive: bool = False) -> List[str]:
|
213
|
-
upload_files = copy.deepcopy(self.upload_files)
|
214
|
-
for upload_file in upload_files:
|
215
|
-
if file_path := search_for_file(upload_file["file"], location, recursive=recursive, single=True):
|
216
|
-
upload_file["path"] = file_path
|
217
|
-
return upload_files
|
218
|
-
|
219
211
|
@property
|
220
212
|
def nrows(self) -> int:
|
221
213
|
return self._nrows
|
@@ -350,18 +342,23 @@ class StructuredDataSet:
|
|
350
342
|
|
351
343
|
def _load_json_file(self, file: str) -> None:
|
352
344
|
with open(file) as f:
|
353
|
-
|
354
|
-
|
355
|
-
|
345
|
+
data = json.load(f)
|
346
|
+
if ((schema_name_inferred_from_file_name := Schema.type_name(file)) and
|
347
|
+
(self._portal.get_schema(schema_name_inferred_from_file_name) is not None)): # noqa
|
356
348
|
# If the JSON file name looks like a schema name then assume it
|
357
349
|
# contains an object or an array of object of that schema type.
|
358
|
-
self.
|
359
|
-
|
350
|
+
if self._merge:
|
351
|
+
data = self._merge_with_existing_portal_object(data, schema_name_inferred_from_file_name)
|
352
|
+
self._add(Schema.type_name(file), data)
|
353
|
+
elif isinstance(data, dict):
|
360
354
|
# Otherwise if the JSON file name does not look like a schema name then
|
361
355
|
# assume it a dictionary where each property is the name of a schema, and
|
362
356
|
# which (each property) contains a list of object of that schema type.
|
363
|
-
for schema_name in
|
364
|
-
|
357
|
+
for schema_name in data:
|
358
|
+
item = data[schema_name]
|
359
|
+
if self._merge:
|
360
|
+
item = self._merge_with_existing_portal_object(item, schema_name)
|
361
|
+
self._add(schema_name, item)
|
365
362
|
|
366
363
|
def _load_reader(self, reader: RowReader, type_name: str) -> None:
|
367
364
|
schema = None
|
@@ -383,11 +380,14 @@ class StructuredDataSet:
|
|
383
380
|
structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number)
|
384
381
|
if self._autoadd_properties:
|
385
382
|
self._add_properties(structured_row, self._autoadd_properties, schema)
|
383
|
+
# New merge functionality (2024-05-25).
|
384
|
+
if self._merge:
|
385
|
+
structured_row = self._merge_with_existing_portal_object(structured_row, schema_name)
|
386
386
|
if (prune_error := self._prune_structured_row(structured_row)) is not None:
|
387
387
|
self._note_error({"src": create_dict(type=schema_name, row=reader.row_number),
|
388
388
|
"error": prune_error}, "validation")
|
389
389
|
else:
|
390
|
-
self._add(type_name, structured_row)
|
390
|
+
self._add(type_name, structured_row) # TODO: why type_name and not schema_name?
|
391
391
|
if self._progress:
|
392
392
|
self._progress({
|
393
393
|
PROGRESS.LOAD_ITEM: self._nrows,
|
@@ -428,6 +428,18 @@ class StructuredDataSet:
|
|
428
428
|
if name not in structured_row and (not schema or schema.data.get("properties", {}).get(name)):
|
429
429
|
structured_row[name] = properties[name]
|
430
430
|
|
431
|
+
def _merge_with_existing_portal_object(self, portal_object: dict, portal_type: str) -> dict:
|
432
|
+
"""
|
433
|
+
Given a Portal object (presumably/in-practice from the given metadata), if there is
|
434
|
+
an existing Portal item, identified by the identifying properties for the given object,
|
435
|
+
then merges the given object into the existing one and returns the result; otherwise
|
436
|
+
just returns the given object. Note that the given object may be CHANGED in place.
|
437
|
+
"""
|
438
|
+
for identifying_path in self._portal.get_identifying_paths(portal_object, portal_type):
|
439
|
+
if existing_portal_object := self._portal.get_metadata(identifying_path, raw=True, raise_exception=False):
|
440
|
+
return merge_objects(existing_portal_object, portal_object)
|
441
|
+
return portal_object
|
442
|
+
|
431
443
|
def _is_ref_lookup_specified_type(ref_lookup_flags: int) -> bool:
|
432
444
|
return (ref_lookup_flags &
|
433
445
|
Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
|
@@ -0,0 +1,73 @@
|
|
1
|
+
import re
|
2
|
+
from typing import Optional
|
3
|
+
from dcicutils.structured_data import Portal
|
4
|
+
|
5
|
+
# This function is exposed (to smaht-portal/ingester and smaht-submitr) only because previously,
|
6
|
+
# before it was fully developed, we had differing behaviors; but this has been unified; so this
|
7
|
+
# could now be internalized to structured_data, and portal_object_utils (TODO).
|
8
|
+
|
9
|
+
|
10
|
+
def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
|
11
|
+
#
|
12
|
+
# Note this slight odd situation WRT object lookups by submitted_id and accession:
|
13
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
14
|
+
# PATH | EXAMPLE | LOOKUP RESULT |
|
15
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
16
|
+
# /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
17
|
+
# /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
18
|
+
# /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
|
19
|
+
# /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
|
20
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
21
|
+
# /accession | /SMAFSFXF1RO4 | FOUND |
|
22
|
+
# /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
|
23
|
+
# /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
|
24
|
+
# /File/accession | /File/SMAFSFXF1RO4 | FOUND |
|
25
|
+
# -----------------------------+-----------------------------------------------+---------------+
|
26
|
+
#
|
27
|
+
def ref_validator(schema: Optional[dict],
|
28
|
+
property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
|
29
|
+
"""
|
30
|
+
Returns False iff objects of type represented by the given schema, CANNOT be referenced with
|
31
|
+
a Portal path using the given property name and its given property value, otherwise returns None.
|
32
|
+
|
33
|
+
For example, if the schema is for UnalignedReads and the property name is accession, then we will
|
34
|
+
return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
|
35
|
+
will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
|
36
|
+
will continue executing its default behavior, which is to check other ways in which the given type
|
37
|
+
CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
|
38
|
+
and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
39
|
+
|
40
|
+
The goal (in structured_data) being to detect if a type is being referenced in such a way that
|
41
|
+
CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
|
42
|
+
if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
|
43
|
+
identifying property for the given type.
|
44
|
+
"""
|
45
|
+
if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
|
46
|
+
if (property_format == "accession") and (property_name == "accession"):
|
47
|
+
if not _is_accession_id(property_value):
|
48
|
+
return False
|
49
|
+
return None
|
50
|
+
|
51
|
+
DEFAULT_RESPONSE = (Portal.LOOKUP_DEFAULT, ref_validator)
|
52
|
+
|
53
|
+
if not value:
|
54
|
+
return DEFAULT_RESPONSE
|
55
|
+
if not schema:
|
56
|
+
if not isinstance(portal, Portal) or not (schema := portal.get_schema(type_name)):
|
57
|
+
return DEFAULT_RESPONSE
|
58
|
+
if schema_properties := schema.get("properties"):
|
59
|
+
if schema_properties.get("accession") and _is_accession_id(value):
|
60
|
+
# Case: lookup by accession (only by root).
|
61
|
+
return Portal.LOOKUP_ROOT, ref_validator
|
62
|
+
elif schema_property_info_submitted_id := schema_properties.get("submitted_id"):
|
63
|
+
if schema_property_pattern_submitted_id := schema_property_info_submitted_id.get("pattern"):
|
64
|
+
if re.match(schema_property_pattern_submitted_id, value):
|
65
|
+
# Case: lookup by submitted_id (only by specified type).
|
66
|
+
return Portal.LOOKUP_SPECIFIED_TYPE, ref_validator
|
67
|
+
return DEFAULT_RESPONSE
|
68
|
+
|
69
|
+
|
70
|
+
# This is here for now because of problems with circular dependencies.
|
71
|
+
# See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
|
72
|
+
def _is_accession_id(value: str) -> bool:
|
73
|
+
return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "dcicutils"
|
3
|
-
version = "8.9.0"
|
3
|
+
version = "8.9.0.1b1" # TODO: To become 8.10.0
|
4
4
|
description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
|
5
5
|
authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
|
6
6
|
license = "MIT"
|
@@ -1,67 +0,0 @@
|
|
1
|
-
import re
|
2
|
-
from typing import Optional
|
3
|
-
from dcicutils.structured_data import Portal
|
4
|
-
|
5
|
-
|
6
|
-
def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
|
7
|
-
#
|
8
|
-
# FYI: Note this situation WRT object lookups ...
|
9
|
-
#
|
10
|
-
# /{submitted_id} # NOT FOUND
|
11
|
-
# /UnalignedReads/{submitted_id} # OK
|
12
|
-
# /SubmittedFile/{submitted_id} # OK
|
13
|
-
# /File/{submitted_id} # NOT FOUND
|
14
|
-
#
|
15
|
-
# /{accession} # OK
|
16
|
-
# /UnalignedReads/{accession} # NOT FOUND
|
17
|
-
# /SubmittedFile/{accession} # NOT FOUND
|
18
|
-
# /File/{accession} # OK
|
19
|
-
#
|
20
|
-
def ref_validator(schema: Optional[dict],
|
21
|
-
property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
|
22
|
-
"""
|
23
|
-
Returns False iff the type represented by the given schema, can NOT be referenced by
|
24
|
-
the given property name with the given property value, otherwise returns None.
|
25
|
-
|
26
|
-
For example, if the schema is for the UnalignedReads type and the property name
|
27
|
-
is accession, then we will return False iff the given property value is NOT a properly
|
28
|
-
formatted accession ID. Otherwise, we will return None, which indicates that the
|
29
|
-
caller (in dcicutils.structured_data.Portal.ref_exists) will continue executing
|
30
|
-
its default behavior, which is to check other ways in which the given type can NOT
|
31
|
-
be referenced by the given value, i.e. it checks other identifying properties for
|
32
|
-
the type and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
|
33
|
-
|
34
|
-
The goal (in structured_data) being to detect if a type is being referenced in such
|
35
|
-
a way that cannot possibly be allowed, i.e. because none of its identifying types
|
36
|
-
are in the required form (if indeed there any requirements). Note that it is guaranteed
|
37
|
-
that the given property name is indeed an identifying property for the given type.
|
38
|
-
"""
|
39
|
-
if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
|
40
|
-
if (property_format == "accession") and (property_name == "accession"):
|
41
|
-
if not _is_accession_id(property_value):
|
42
|
-
return False
|
43
|
-
return None
|
44
|
-
|
45
|
-
DEFAULT_RESPONSE = (Portal.LOOKUP_DEFAULT, ref_validator)
|
46
|
-
|
47
|
-
if not value:
|
48
|
-
return DEFAULT_RESPONSE
|
49
|
-
if not schema:
|
50
|
-
if not isinstance(portal, Portal) or not (schema := portal.get_schema(type_name)):
|
51
|
-
return DEFAULT_RESPONSE
|
52
|
-
if schema_properties := schema.get("properties"):
|
53
|
-
if schema_properties.get("accession") and _is_accession_id(value):
|
54
|
-
# Case: lookup by accession (only by root).
|
55
|
-
return Portal.LOOKUP_ROOT, ref_validator
|
56
|
-
elif schema_property_info_submitted_id := schema_properties.get("submitted_id"):
|
57
|
-
if schema_property_pattern_submitted_id := schema_property_info_submitted_id.get("pattern"):
|
58
|
-
if re.match(schema_property_pattern_submitted_id, value):
|
59
|
-
# Case: lookup by submitted_id (only by specified type).
|
60
|
-
return Portal.LOOKUP_SPECIFIED_TYPE, ref_validator
|
61
|
-
return DEFAULT_RESPONSE
|
62
|
-
|
63
|
-
|
64
|
-
# This is here for now because of problems with circular dependencies.
|
65
|
-
# See: smaht-portal/.../schema_formats.py
|
66
|
-
def _is_accession_id(value: str) -> bool:
|
67
|
-
return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/license_policies/c4-python-infrastructure.jsonc
RENAMED
File without changes
|
{dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/license_policies/park-lab-common-server.jsonc
RENAMED
File without changes
|
File without changes
|
{dcicutils-8.9.0 → dcicutils-8.9.0.1b1}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|