dcicutils 8.4.0.1b3__py3-none-any.whl → 8.4.0.1b5__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of dcicutils might be problematic. Click here for more details.
- dcicutils/data_readers.py +24 -12
- dcicutils/misc_utils.py +8 -0
- dcicutils/structured_data.py +216 -68
- {dcicutils-8.4.0.1b3.dist-info → dcicutils-8.4.0.1b5.dist-info}/METADATA +1 -1
- {dcicutils-8.4.0.1b3.dist-info → dcicutils-8.4.0.1b5.dist-info}/RECORD +8 -8
- {dcicutils-8.4.0.1b3.dist-info → dcicutils-8.4.0.1b5.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.4.0.1b3.dist-info → dcicutils-8.4.0.1b5.dist-info}/WHEEL +0 -0
- {dcicutils-8.4.0.1b3.dist-info → dcicutils-8.4.0.1b5.dist-info}/entry_points.txt +0 -0
dcicutils/data_readers.py
CHANGED
@@ -2,27 +2,30 @@ import abc
|
|
2
2
|
import csv
|
3
3
|
import openpyxl
|
4
4
|
from typing import Any, Generator, Iterator, List, Optional, Type, Tuple, Union
|
5
|
-
from dcicutils.misc_utils import right_trim
|
5
|
+
from dcicutils.misc_utils import create_object, right_trim
|
6
|
+
|
7
|
+
# Forward type references for type hints.
|
8
|
+
Excel = Type["Excel"]
|
6
9
|
|
7
10
|
|
8
11
|
class RowReader(abc.ABC):
|
9
12
|
|
10
13
|
def __init__(self):
|
11
14
|
self.header = None
|
12
|
-
self.
|
15
|
+
self.row_number = 0
|
13
16
|
self._warning_empty_headers = False
|
14
17
|
self._warning_extra_values = [] # Line numbers.
|
15
18
|
self.open()
|
16
19
|
|
17
20
|
def __iter__(self) -> Iterator:
|
18
21
|
for row in self.rows:
|
19
|
-
self.
|
22
|
+
self.row_number += 1
|
20
23
|
if self.is_comment_row(row):
|
21
24
|
continue
|
22
25
|
if self.is_terminating_row(row):
|
23
26
|
break
|
24
27
|
if len(self.header) < len(row): # Row values beyond what there are headers for are ignored.
|
25
|
-
self._warning_extra_values.append(self.
|
28
|
+
self._warning_extra_values.append(self.row_number)
|
26
29
|
yield {column: self.cell_value(value) for column, value in zip(self.header, row)}
|
27
30
|
|
28
31
|
def _define_header(self, header: List[Optional[Any]]) -> None:
|
@@ -49,13 +52,20 @@ class RowReader(abc.ABC):
|
|
49
52
|
pass
|
50
53
|
|
51
54
|
@property
|
52
|
-
def
|
53
|
-
|
55
|
+
def file(self) -> Optional[str]:
|
56
|
+
return self._file if hasattr(self, "_file") else None
|
57
|
+
|
58
|
+
@property
|
59
|
+
def warnings(self) -> List[str]:
|
60
|
+
warnings = []
|
54
61
|
if self._warning_empty_headers:
|
55
|
-
|
62
|
+
warnings.append({"src": create_object(file=self.file),
|
63
|
+
"warning": "Empty header column encountered; ignoring it and all subsequent columns."})
|
56
64
|
if self._warning_extra_values:
|
57
|
-
|
58
|
-
|
65
|
+
for row_number in self._warning_extra_values:
|
66
|
+
warnings.append({"src": create_object(file=self.file, row=row_number),
|
67
|
+
"warning": f"Extra row column values."})
|
68
|
+
return warnings
|
59
69
|
|
60
70
|
|
61
71
|
class ListReader(RowReader):
|
@@ -101,9 +111,10 @@ class CsvReader(RowReader):
|
|
101
111
|
|
102
112
|
class ExcelSheetReader(RowReader):
|
103
113
|
|
104
|
-
def __init__(self, sheet_name: str, workbook: openpyxl.workbook.workbook.Workbook) -> None:
|
114
|
+
def __init__(self, excel: Excel, sheet_name: str, workbook: openpyxl.workbook.workbook.Workbook) -> None:
|
105
115
|
self.sheet_name = sheet_name or "Sheet1"
|
106
116
|
self._workbook = workbook
|
117
|
+
self._file = excel._file
|
107
118
|
self._rows = None
|
108
119
|
super().__init__()
|
109
120
|
|
@@ -134,12 +145,13 @@ class Excel:
|
|
134
145
|
self.open()
|
135
146
|
|
136
147
|
def sheet_reader(self, sheet_name: str) -> ExcelSheetReader:
|
137
|
-
return self._reader_class(sheet_name=sheet_name, workbook=self._workbook)
|
148
|
+
return self._reader_class(self, sheet_name=sheet_name, workbook=self._workbook)
|
138
149
|
|
139
150
|
def open(self) -> None:
|
140
151
|
if self._workbook is None:
|
141
152
|
self._workbook = openpyxl.load_workbook(self._file, data_only=True)
|
142
|
-
self.sheet_names = [
|
153
|
+
self.sheet_names = [sheet_name for sheet_name in self._workbook.sheetnames
|
154
|
+
if self._workbook[sheet_name].sheet_state != "hidden"]
|
143
155
|
|
144
156
|
def __del__(self) -> None:
|
145
157
|
if (workbook := self._workbook) is not None:
|
dcicutils/misc_utils.py
CHANGED
@@ -1501,6 +1501,14 @@ def right_trim(list_or_tuple: Union[List[Any], Tuple[Any]],
|
|
1501
1501
|
return list_or_tuple[:i + 1]
|
1502
1502
|
|
1503
1503
|
|
1504
|
+
def create_object(**kwargs) -> dict:
|
1505
|
+
result = {}
|
1506
|
+
for name in kwargs:
|
1507
|
+
if kwargs[name]:
|
1508
|
+
result[name] = kwargs[name]
|
1509
|
+
return result
|
1510
|
+
|
1511
|
+
|
1504
1512
|
def is_c4_arn(arn: str) -> bool:
|
1505
1513
|
"""
|
1506
1514
|
Returns True iff the given (presumed) AWS ARN string value looks like it
|
dcicutils/structured_data.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
from collections import deque
|
1
2
|
import copy
|
2
3
|
from functools import lru_cache
|
3
4
|
import json
|
@@ -15,8 +16,8 @@ from dcicutils.common import OrchestratedApp, APP_CGAP, APP_FOURFRONT, APP_SMAHT
|
|
15
16
|
from dcicutils.creds_utils import CGAPKeyManager, FourfrontKeyManager, SMaHTKeyManager
|
16
17
|
from dcicutils.data_readers import CsvReader, Excel, RowReader
|
17
18
|
from dcicutils.ff_utils import get_metadata, get_schema, patch_metadata, post_metadata
|
18
|
-
from dcicutils.misc_utils import (load_json_if, merge_objects, remove_empty_properties, right_trim,
|
19
|
-
to_boolean, to_camel_case, to_enum, to_float, to_integer, VirtualApp)
|
19
|
+
from dcicutils.misc_utils import (create_object, load_json_if, merge_objects, remove_empty_properties, right_trim,
|
20
|
+
split_string, to_boolean, to_camel_case, to_enum, to_float, to_integer, VirtualApp)
|
20
21
|
from dcicutils.zip_utils import temporary_file, unpack_gz_file_to_temporary_file, unpack_files
|
21
22
|
|
22
23
|
|
@@ -52,7 +53,10 @@ class StructuredDataSet:
|
|
52
53
|
self._portal = Portal(portal, data=self.data, schemas=schemas) if portal else None
|
53
54
|
self._order = order
|
54
55
|
self._prune = prune
|
55
|
-
self.
|
56
|
+
self._warnings = {}
|
57
|
+
self._errors = {}
|
58
|
+
self._resolved_refs = []
|
59
|
+
self._validated = False
|
56
60
|
self._load_file(file) if file else None
|
57
61
|
|
58
62
|
@staticmethod
|
@@ -61,16 +65,85 @@ class StructuredDataSet:
|
|
61
65
|
order: Optional[List[str]] = None, prune: bool = True) -> StructuredDataSet:
|
62
66
|
return StructuredDataSet(file=file, portal=portal, schemas=schemas, order=order, prune=prune)
|
63
67
|
|
64
|
-
def validate(self) ->
|
65
|
-
|
68
|
+
def validate(self, force: bool = False) -> None:
|
69
|
+
if self._validated and not force:
|
70
|
+
return
|
71
|
+
self._validated = True
|
66
72
|
for type_name in self.data:
|
67
73
|
if (schema := Schema.load_by_name(type_name, portal=self._portal)):
|
68
|
-
|
74
|
+
row_number = 0
|
69
75
|
for data in self.data[type_name]:
|
70
|
-
|
71
|
-
if (
|
72
|
-
|
73
|
-
|
76
|
+
row_number += 1
|
77
|
+
if (validation_errors := schema.validate(data)) is not None:
|
78
|
+
for validation_error in validation_errors:
|
79
|
+
self._note_error({"src": create_object(type=schema.name, row=row_number),
|
80
|
+
"error": validation_error}, "validation")
|
81
|
+
|
82
|
+
@property
|
83
|
+
def warnings(self) -> dict:
|
84
|
+
return self._warnings
|
85
|
+
|
86
|
+
@property
|
87
|
+
def reader_warnings(self) -> List[dict]:
|
88
|
+
return self._warnings.get("reader") or []
|
89
|
+
|
90
|
+
@property
|
91
|
+
def errors(self) -> dict:
|
92
|
+
return self._errors
|
93
|
+
|
94
|
+
@property
|
95
|
+
def ref_errors(self) -> List[dict]:
|
96
|
+
return self._errors.get("ref") or []
|
97
|
+
|
98
|
+
@property
|
99
|
+
def validation_errors(self) -> List[dict]:
|
100
|
+
return self._errors.get("validation") or []
|
101
|
+
|
102
|
+
@property
|
103
|
+
def resolved_refs(self) -> List[str]:
|
104
|
+
return self._resolved_refs
|
105
|
+
|
106
|
+
@staticmethod
|
107
|
+
def format_issue(issue: dict, original_file: Optional[str] = None) -> str:
|
108
|
+
def src_string(issue: dict) -> str:
|
109
|
+
if not isinstance(issue, dict) or not isinstance(issue_src := issue.get("src"), dict):
|
110
|
+
return ""
|
111
|
+
show_file = original_file and (original_file.endswith(".zip") or
|
112
|
+
original_file.endswith(".tgz") or original_file.endswith(".gz"))
|
113
|
+
src_file = issue_src.get("file") if show_file else ""
|
114
|
+
src_type = issue_src.get("type")
|
115
|
+
src_column = issue_src.get("column")
|
116
|
+
src_row = issue_src.get("row", 0)
|
117
|
+
if src_file:
|
118
|
+
src = f"{os.path.basename(src_file)}"
|
119
|
+
sep = ":"
|
120
|
+
else:
|
121
|
+
src = ""
|
122
|
+
sep = "."
|
123
|
+
if src_type:
|
124
|
+
src += (sep if src else "") + src_type
|
125
|
+
sep = "."
|
126
|
+
if src_column:
|
127
|
+
src += (sep if src else "") + src_column
|
128
|
+
if src_row > 0:
|
129
|
+
src += (" " if src else "") + f"[{src_row}]"
|
130
|
+
if not src:
|
131
|
+
if issue.get("warning"):
|
132
|
+
src = "Warning"
|
133
|
+
elif issue.get("error"):
|
134
|
+
src = "Error"
|
135
|
+
else:
|
136
|
+
src = "Issue"
|
137
|
+
return src
|
138
|
+
issue_message = None
|
139
|
+
if issue:
|
140
|
+
if error := issue.get("error"):
|
141
|
+
issue_message = error
|
142
|
+
elif warning := issue.get("warning"):
|
143
|
+
issue_message = warning
|
144
|
+
elif issue.get("truncated"):
|
145
|
+
return f"Truncated result set | More: {issue.get('more')} | See: {issue.get('details')}"
|
146
|
+
return f"{src_string(issue)}: {issue_message}" if issue_message else ""
|
74
147
|
|
75
148
|
def _load_file(self, file: str) -> None:
|
76
149
|
# Returns a dictionary where each property is the name (i.e. the type) of the data,
|
@@ -100,15 +173,13 @@ class StructuredDataSet:
|
|
100
173
|
self._load_file(file)
|
101
174
|
|
102
175
|
def _load_csv_file(self, file: str) -> None:
|
103
|
-
self._load_reader(
|
104
|
-
self._note_issues(reader.issues, os.path.basename(file))
|
176
|
+
self._load_reader(CsvReader(file), type_name=Schema.type_name(file))
|
105
177
|
|
106
178
|
def _load_excel_file(self, file: str) -> None:
|
107
179
|
excel = Excel(file) # Order the sheet names by any specified ordering (e.g. ala snovault.loadxl).
|
108
180
|
order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {}
|
109
181
|
for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)):
|
110
|
-
self._load_reader(
|
111
|
-
self._note_issues(reader.issues, f"{file}:{sheet_name}")
|
182
|
+
self._load_reader(excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name))
|
112
183
|
|
113
184
|
def _load_json_file(self, file: str) -> None:
|
114
185
|
with open(file) as f:
|
@@ -119,16 +190,20 @@ class StructuredDataSet:
|
|
119
190
|
noschema = False
|
120
191
|
structured_row_template = None
|
121
192
|
for row in reader:
|
122
|
-
if not structured_row_template: # Delay creation
|
193
|
+
if not structured_row_template: # Delay schema creation so we don't reference it if there are no rows.
|
123
194
|
if not schema and not noschema and not (schema := Schema.load_by_name(type_name, portal=self._portal)):
|
124
195
|
noschema = True
|
196
|
+
elif schema and (schema_name := schema.name):
|
197
|
+
type_name = schema_name
|
125
198
|
structured_row_template = _StructuredRowTemplate(reader.header, schema)
|
126
199
|
structured_row = structured_row_template.create_row()
|
127
200
|
for column_name, value in row.items():
|
128
|
-
structured_row_template.set_value(structured_row, column_name, value, reader.
|
129
|
-
if schema and (schema_name := schema.name):
|
130
|
-
type_name = schema_name
|
201
|
+
structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number)
|
131
202
|
self._add(type_name, structured_row)
|
203
|
+
self._note_warning(reader.warnings, "reader")
|
204
|
+
if schema:
|
205
|
+
self._note_error(schema._unresolved_refs, "ref")
|
206
|
+
self._resolved_refs = schema._resolved_refs
|
132
207
|
|
133
208
|
def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None:
|
134
209
|
if self._prune:
|
@@ -138,11 +213,19 @@ class StructuredDataSet:
|
|
138
213
|
else:
|
139
214
|
self.data[type_name] = [data] if isinstance(data, dict) else data
|
140
215
|
|
141
|
-
def
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
216
|
+
def _note_warning(self, item: Optional[Union[dict, List[dict]]], group: str) -> None:
|
217
|
+
self._note_issue(self._warnings, item, group)
|
218
|
+
|
219
|
+
def _note_error(self, item: Optional[Union[dict, List[dict]]], group: str) -> None:
|
220
|
+
self._note_issue(self._errors, item, group)
|
221
|
+
|
222
|
+
def _note_issue(self, issues: dict, item: Optional[Union[dict, List[dict]]], group: str) -> None:
|
223
|
+
if isinstance(item, dict) and item:
|
224
|
+
item = [item]
|
225
|
+
if isinstance(item, list) and item:
|
226
|
+
if not issues.get(group):
|
227
|
+
issues[group] = []
|
228
|
+
issues[group].extend(item)
|
146
229
|
|
147
230
|
|
148
231
|
class _StructuredRowTemplate:
|
@@ -155,10 +238,10 @@ class _StructuredRowTemplate:
|
|
155
238
|
def create_row(self) -> dict:
|
156
239
|
return copy.deepcopy(self._template)
|
157
240
|
|
158
|
-
def set_value(self, data: dict, column_name: str, value: str,
|
241
|
+
def set_value(self, data: dict, column_name: str, value: str, file: Optional[str], row_number: int = -1) -> None:
|
159
242
|
if (set_value_function := self._set_value_functions.get(column_name)):
|
160
|
-
src = (
|
161
|
-
|
243
|
+
src = create_object(type=self._schema.name if self._schema else None,
|
244
|
+
column=column_name, file=file, row=row_number)
|
162
245
|
set_value_function(data, value, src)
|
163
246
|
|
164
247
|
def _create_row_template(self, column_names: List[str]) -> dict: # Surprisingly tricky code here.
|
@@ -262,17 +345,27 @@ class Schema:
|
|
262
345
|
"number": self._map_function_number,
|
263
346
|
"string": self._map_function_string
|
264
347
|
}
|
348
|
+
self._resolved_refs = set()
|
349
|
+
self._unresolved_refs = []
|
265
350
|
self._typeinfo = self._create_typeinfo(schema_json)
|
266
351
|
|
267
352
|
@staticmethod
|
268
353
|
def load_by_name(name: str, portal: Portal) -> Optional[dict]:
|
269
354
|
return Schema(portal.get_schema(Schema.type_name(name)), portal) if portal else None
|
270
355
|
|
271
|
-
def validate(self, data: dict) ->
|
272
|
-
|
273
|
-
for
|
274
|
-
|
275
|
-
return
|
356
|
+
def validate(self, data: dict) -> List[str]:
|
357
|
+
errors = []
|
358
|
+
for error in SchemaValidator(self.data, format_checker=SchemaValidator.FORMAT_CHECKER).iter_errors(data):
|
359
|
+
errors.append(error.message)
|
360
|
+
return errors
|
361
|
+
|
362
|
+
@property
|
363
|
+
def unresolved_refs(self) -> List[dict]:
|
364
|
+
return self._unresolved_refs
|
365
|
+
|
366
|
+
@property
|
367
|
+
def resolved_refs(self) -> List[str]:
|
368
|
+
return list(self._resolved_refs)
|
276
369
|
|
277
370
|
def get_map_value_function(self, column_name: str) -> Optional[Any]:
|
278
371
|
return (self._get_typeinfo(column_name) or {}).get("map")
|
@@ -333,14 +426,16 @@ class Schema:
|
|
333
426
|
def _map_function_ref(self, typeinfo: dict) -> Callable:
|
334
427
|
def map_ref(value: str, link_to: str, portal: Optional[Portal], src: Optional[str]) -> Any:
|
335
428
|
nonlocal self, typeinfo
|
336
|
-
exception = None
|
337
429
|
if not value:
|
338
430
|
if (column := typeinfo.get("column")) and column in self.data.get("required", []):
|
339
|
-
|
340
|
-
elif portal
|
341
|
-
|
342
|
-
|
343
|
-
|
431
|
+
self._unresolved_refs.append({"src": src, "error": f"/{link_to}/<null>"})
|
432
|
+
elif portal:
|
433
|
+
if not (resolved := portal.ref_exists(link_to, value)):
|
434
|
+
self._unresolved_refs.append({"src": src, "error": f"/{link_to}/{value}"})
|
435
|
+
elif len(resolved) > 1:
|
436
|
+
self._unresolved_refs.append({"src": src, "error": f"/{link_to}/{value}", "types": resolved})
|
437
|
+
else:
|
438
|
+
self._resolved_refs.add(f"/{link_to}/{value}")
|
344
439
|
return value
|
345
440
|
return lambda value, src: map_ref(value, typeinfo.get("linkTo"), self._portal, src)
|
346
441
|
|
@@ -481,15 +576,8 @@ class PortalBase:
|
|
481
576
|
env: Optional[str] = None, app: OrchestratedApp = APP_SMAHT, server: Optional[str] = None,
|
482
577
|
key: Optional[Union[dict, tuple]] = None,
|
483
578
|
portal: Optional[Union[VirtualApp, TestApp, Router, Portal, str]] = None) -> PortalBase:
|
484
|
-
if isinstance(arg, VirtualApp
|
485
|
-
|
486
|
-
elif isinstance(arg, TestApp) and not portal:
|
487
|
-
portal = arg
|
488
|
-
elif isinstance(arg, Router) and not portal:
|
489
|
-
portal = arg
|
490
|
-
elif isinstance(arg, Portal) and not portal:
|
491
|
-
portal = arg
|
492
|
-
elif isinstance(arg, str) and arg.endswith(".ini") and not portal:
|
579
|
+
if ((isinstance(arg, (VirtualApp, TestApp, Router, Portal)) or
|
580
|
+
isinstance(arg, str) and arg.endswith(".ini")) and not portal):
|
493
581
|
portal = arg
|
494
582
|
elif isinstance(arg, str) and not env:
|
495
583
|
env = arg
|
@@ -539,15 +627,12 @@ class PortalBase:
|
|
539
627
|
return post_metadata(schema_name=object_type, post_item=data, key=self._key)
|
540
628
|
return self.post(f"/{object_type}", data)
|
541
629
|
|
542
|
-
def get_schema(self, schema_name: str) -> Optional[dict]:
|
543
|
-
return get_schema(schema_name, portal_vapp=self._vapp, key=self._key)
|
544
|
-
|
545
630
|
def get(self, uri: str, follow: bool = True, **kwargs) -> Optional[Union[RequestResponse, TestResponse]]:
|
546
631
|
if isinstance(self._vapp, (VirtualApp, TestApp)):
|
547
632
|
response = self._vapp.get(self._uri(uri), **self._kwargs(**kwargs))
|
548
633
|
if response and response.status_code in [301, 302, 303, 307, 308] and follow:
|
549
634
|
response = response.follow()
|
550
|
-
return response
|
635
|
+
return self._response(response)
|
551
636
|
return requests.get(self._uri(uri), allow_redirects=follow, **self._kwargs(**kwargs))
|
552
637
|
|
553
638
|
def patch(self, uri: str, data: Optional[dict] = None,
|
@@ -565,6 +650,12 @@ class PortalBase:
|
|
565
650
|
return self._vapp.post_json(self._uri(uri), json or data, upload_files=files, **self._kwargs(**kwargs))
|
566
651
|
return requests.post(self._uri(uri), json=json or data, files=files, **self._kwargs(**kwargs))
|
567
652
|
|
653
|
+
def get_schema(self, schema_name: str) -> Optional[dict]:
|
654
|
+
return get_schema(schema_name, portal_vapp=self._vapp, key=self._key)
|
655
|
+
|
656
|
+
def get_schemas(self) -> dict:
|
657
|
+
return self.get("/profiles/").json()
|
658
|
+
|
568
659
|
def _uri(self, uri: str) -> str:
|
569
660
|
if not isinstance(uri, str) or not uri:
|
570
661
|
return "/"
|
@@ -582,6 +673,19 @@ class PortalBase:
|
|
582
673
|
result_kwargs["timeout"] = timeout
|
583
674
|
return result_kwargs
|
584
675
|
|
676
|
+
def _response(self, response) -> Optional[RequestResponse]:
|
677
|
+
if response and isinstance(getattr(response.__class__, "json"), property):
|
678
|
+
class RequestResponseWrapper: # For consistency change json property to method.
|
679
|
+
def __init__(self, respnose, **kwargs):
|
680
|
+
super().__init__(**kwargs)
|
681
|
+
self._response = response
|
682
|
+
def __getattr__(self, attr): # noqa
|
683
|
+
return getattr(self._response, attr)
|
684
|
+
def json(self): # noqa
|
685
|
+
return self._response.json
|
686
|
+
response = RequestResponseWrapper(response)
|
687
|
+
return response
|
688
|
+
|
585
689
|
@staticmethod
|
586
690
|
def create_for_testing(ini_file: Optional[str] = None) -> PortalBase:
|
587
691
|
if isinstance(ini_file, str):
|
@@ -592,7 +696,7 @@ class PortalBase:
|
|
592
696
|
|
593
697
|
@staticmethod
|
594
698
|
def create_for_testing_local(ini_file: Optional[str] = None) -> Portal:
|
595
|
-
if isinstance(ini_file, str):
|
699
|
+
if isinstance(ini_file, str) and ini_file:
|
596
700
|
return Portal(Portal._create_testapp(ini_file))
|
597
701
|
minimal_ini_for_testing_local = "\n".join([
|
598
702
|
"[app:app]\nuse = egg:encoded\nfile_upload_bucket = dummy",
|
@@ -655,23 +759,67 @@ class Portal(PortalBase):
|
|
655
759
|
|
656
760
|
@lru_cache(maxsize=256)
|
657
761
|
def get_schema(self, schema_name: str) -> Optional[dict]:
|
658
|
-
|
659
|
-
return
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
762
|
+
if (schemas := self.get_schemas()) and (schema := schemas.get(schema_name := Schema.type_name(schema_name))):
|
763
|
+
return schema
|
764
|
+
if schema_name == schema_name.upper() and (schema := schemas.get(schema_name.lower().title())):
|
765
|
+
return schema
|
766
|
+
if schema_name == schema_name.lower() and (schema := schemas.get(schema_name.title())):
|
767
|
+
return schema
|
768
|
+
|
769
|
+
@lru_cache(maxsize=1)
|
770
|
+
def get_schemas(self) -> dict:
|
771
|
+
schemas = super(Portal, self).get_schemas()
|
772
|
+
if self._schemas:
|
773
|
+
schemas = copy.deepcopy(schemas)
|
774
|
+
for user_specified_schema in self._schemas:
|
775
|
+
if user_specified_schema.get("title"):
|
776
|
+
schemas[user_specified_schema["title"]] = user_specified_schema
|
777
|
+
return schemas
|
778
|
+
|
779
|
+
@lru_cache(maxsize=1)
|
780
|
+
def get_schemas_super_type_map(self) -> dict:
|
781
|
+
"""
|
782
|
+
Returns the "super type map" for all of the known schemas (via /profiles).
|
783
|
+
This is a dictionary of all types which have (one or more) sub-types whose value is
|
784
|
+
an array of all of those sub-types (direct and all descendents), in breadth first order.
|
785
|
+
"""
|
786
|
+
def breadth_first(super_type_map: dict, super_type_name: str) -> dict:
|
787
|
+
result = []
|
788
|
+
queue = deque(super_type_map.get(super_type_name, []))
|
789
|
+
while queue:
|
790
|
+
result.append(sub_type_name := queue.popleft())
|
791
|
+
if sub_type_name in super_type_map:
|
792
|
+
queue.extend(super_type_map[sub_type_name])
|
793
|
+
return result
|
794
|
+
if not (schemas := self.get_schemas()):
|
795
|
+
return {}
|
796
|
+
super_type_map = {}
|
797
|
+
for type_name in schemas:
|
798
|
+
if super_type_name := schemas[type_name].get("rdfs:subClassOf"):
|
799
|
+
super_type_name = super_type_name.replace("/profiles/", "").replace(".json", "")
|
800
|
+
if super_type_name != "Item":
|
801
|
+
if not super_type_map.get(super_type_name):
|
802
|
+
super_type_map[super_type_name] = [type_name]
|
803
|
+
elif type_name not in super_type_map[super_type_name]:
|
804
|
+
super_type_map[super_type_name].append(type_name)
|
805
|
+
super_type_map_flattened = {}
|
806
|
+
for super_type_name in super_type_map:
|
807
|
+
super_type_map_flattened[super_type_name] = breadth_first(super_type_map, super_type_name)
|
808
|
+
return super_type_map_flattened
|
809
|
+
|
810
|
+
def ref_exists(self, type_name: str, value: str) -> List[str]:
|
811
|
+
resolved = []
|
812
|
+
if self._ref_exists_single(type_name, value):
|
813
|
+
resolved.append(type_name)
|
814
|
+
# Check for the given ref in all sub-types of the given type.
|
815
|
+
if (schemas_super_type_map := self.get_schemas_super_type_map()):
|
816
|
+
if (sub_type_names := schemas_super_type_map.get(type_name)):
|
817
|
+
for sub_type_name in sub_type_names:
|
818
|
+
if self._ref_exists_single(sub_type_name, value):
|
819
|
+
resolved.append(type_name)
|
820
|
+
return resolved
|
821
|
+
|
822
|
+
def _ref_exists_single(self, type_name: str, value: str) -> bool:
|
675
823
|
if self._data and (items := self._data.get(type_name)) and (schema := self.get_schema(type_name)):
|
676
824
|
iproperties = set(schema.get("identifyingProperties", [])) | {"identifier", "uuid"}
|
677
825
|
for item in items:
|
@@ -9,7 +9,7 @@ dcicutils/common.py,sha256=YE8Mt5-vaZWWz4uaChSVhqGFbFtW5QKtnIyOr4zG4vM,3955
|
|
9
9
|
dcicutils/contribution_scripts.py,sha256=0k5Gw1TumcD5SAcXVkDd6-yvuMEw-jUp5Kfb7FJH6XQ,2015
|
10
10
|
dcicutils/contribution_utils.py,sha256=vYLS1JUB3sKd24BUxZ29qUBqYeQBLK9cwo8x3k64uPg,25653
|
11
11
|
dcicutils/creds_utils.py,sha256=xrLekD49Ex0GOpL9n7LlJA4gvNcY7txTVFOSYD7LvEU,11113
|
12
|
-
dcicutils/data_readers.py,sha256=
|
12
|
+
dcicutils/data_readers.py,sha256=qoyCnXeQa9hIxlDvZ1JeUyADet7rb6dQcG4ZEZ8r1_k,5674
|
13
13
|
dcicutils/data_utils.py,sha256=k2OxOlsx7AJ6jF-YNlMyGus_JqSUBe4_n1s65Mv1gQQ,3098
|
14
14
|
dcicutils/deployment_utils.py,sha256=rcNUFMe_tsrG4CHEtgBe41cZx4Pk4JqISPsjrJRMoEs,68891
|
15
15
|
dcicutils/diff_utils.py,sha256=sQx-yz56DHAcQWOChYbAG3clXu7TbiZKlw-GggeveO0,8118
|
@@ -40,7 +40,7 @@ dcicutils/license_policies/park-lab-gpl-pipeline.jsonc,sha256=vLZkwm3Js-kjV44nug
|
|
40
40
|
dcicutils/license_policies/park-lab-pipeline.jsonc,sha256=9qlY0ASy3iUMQlr3gorVcXrSfRHnVGbLhkS427UaRy4,283
|
41
41
|
dcicutils/license_utils.py,sha256=d1cq6iwv5Ju-VjdoINi6q7CPNNL7Oz6rcJdLMY38RX0,46978
|
42
42
|
dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
|
43
|
-
dcicutils/misc_utils.py,sha256=
|
43
|
+
dcicutils/misc_utils.py,sha256=jfyWDrHAlx2REun51i3igEApfEMAsmakRDo2VKUr0LQ,99818
|
44
44
|
dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
|
45
45
|
dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
|
46
46
|
dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
|
@@ -55,14 +55,14 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
|
|
55
55
|
dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
|
56
56
|
dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
|
57
57
|
dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
|
58
|
-
dcicutils/structured_data.py,sha256=
|
58
|
+
dcicutils/structured_data.py,sha256=q3IaTkXkVlFXkKbwmCSbDRN9oQp5DwFkmHz0kWvb_W4,44447
|
59
59
|
dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
|
60
60
|
dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
|
61
61
|
dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
|
62
62
|
dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
|
63
63
|
dcicutils/zip_utils.py,sha256=0OXR0aLNwyLIZOzIFTM_5DOun7dxIv6TIZbFiithkO0,3276
|
64
|
-
dcicutils-8.4.0.
|
65
|
-
dcicutils-8.4.0.
|
66
|
-
dcicutils-8.4.0.
|
67
|
-
dcicutils-8.4.0.
|
68
|
-
dcicutils-8.4.0.
|
64
|
+
dcicutils-8.4.0.1b5.dist-info/LICENSE.txt,sha256=t0_-jIjqxNnymZoNJe-OltRIuuF8qfhN0ATlHyrUJPk,1102
|
65
|
+
dcicutils-8.4.0.1b5.dist-info/METADATA,sha256=CpxQgbbWpPqaWG4bW9slXIOLl82aO2F3KzBf2PuaaP8,3314
|
66
|
+
dcicutils-8.4.0.1b5.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
67
|
+
dcicutils-8.4.0.1b5.dist-info/entry_points.txt,sha256=8wbw5csMIgBXhkwfgsgJeuFcoUc0WsucUxmOyml2aoA,209
|
68
|
+
dcicutils-8.4.0.1b5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|