dcicutils 8.4.0.1b4__tar.gz → 8.4.0.1b5__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/PKG-INFO +1 -1
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/data_readers.py +22 -11
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/misc_utils.py +8 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/structured_data.py +135 -52
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/pyproject.toml +1 -1
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/LICENSE.txt +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/README.rst +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/__init__.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/base.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/beanstalk_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/bundle_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/cloudformation_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/codebuild_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/command_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/common.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/contribution_scripts.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/contribution_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/creds_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/data_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/deployment_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/diff_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/docker_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/ecr_scripts.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/ecr_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/ecs_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/env_base.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/env_manager.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/env_scripts.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/env_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/env_utils_legacy.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/es_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/exceptions.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/ff_mocks.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/ff_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/function_cache_decorator.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/glacier_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/jh_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/kibana/dashboards.json +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/kibana/readme.md +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/lang_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/c4-infrastructure.jsonc +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/c4-python-infrastructure.jsonc +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-common-server.jsonc +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-common.jsonc +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-pipeline.jsonc +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/license_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/log_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/obfuscation_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/opensearch_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/project_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/qa_checkers.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/qa_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/redis_tools.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/redis_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/s3_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/scripts/publish_to_pypi.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/scripts/run_license_checker.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/secrets_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/sheet_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/snapshot_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/ssl_certificate_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/task_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/trace_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/validation_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/variant_utils.py +0 -0
- {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/zip_utils.py +0 -0
@@ -2,27 +2,30 @@ import abc
|
|
2
2
|
import csv
|
3
3
|
import openpyxl
|
4
4
|
from typing import Any, Generator, Iterator, List, Optional, Type, Tuple, Union
|
5
|
-
from dcicutils.misc_utils import right_trim
|
5
|
+
from dcicutils.misc_utils import create_object, right_trim
|
6
|
+
|
7
|
+
# Forward type references for type hints.
|
8
|
+
Excel = Type["Excel"]
|
6
9
|
|
7
10
|
|
8
11
|
class RowReader(abc.ABC):
|
9
12
|
|
10
13
|
def __init__(self):
|
11
14
|
self.header = None
|
12
|
-
self.
|
15
|
+
self.row_number = 0
|
13
16
|
self._warning_empty_headers = False
|
14
17
|
self._warning_extra_values = [] # Line numbers.
|
15
18
|
self.open()
|
16
19
|
|
17
20
|
def __iter__(self) -> Iterator:
|
18
21
|
for row in self.rows:
|
19
|
-
self.
|
22
|
+
self.row_number += 1
|
20
23
|
if self.is_comment_row(row):
|
21
24
|
continue
|
22
25
|
if self.is_terminating_row(row):
|
23
26
|
break
|
24
27
|
if len(self.header) < len(row): # Row values beyond what there are headers for are ignored.
|
25
|
-
self._warning_extra_values.append(self.
|
28
|
+
self._warning_extra_values.append(self.row_number)
|
26
29
|
yield {column: self.cell_value(value) for column, value in zip(self.header, row)}
|
27
30
|
|
28
31
|
def _define_header(self, header: List[Optional[Any]]) -> None:
|
@@ -49,13 +52,20 @@ class RowReader(abc.ABC):
|
|
49
52
|
pass
|
50
53
|
|
51
54
|
@property
|
52
|
-
def
|
53
|
-
|
55
|
+
def file(self) -> Optional[str]:
|
56
|
+
return self._file if hasattr(self, "_file") else None
|
57
|
+
|
58
|
+
@property
|
59
|
+
def warnings(self) -> List[str]:
|
60
|
+
warnings = []
|
54
61
|
if self._warning_empty_headers:
|
55
|
-
|
62
|
+
warnings.append({"src": create_object(file=self.file),
|
63
|
+
"warning": "Empty header column encountered; ignoring it and all subsequent columns."})
|
56
64
|
if self._warning_extra_values:
|
57
|
-
|
58
|
-
|
65
|
+
for row_number in self._warning_extra_values:
|
66
|
+
warnings.append({"src": create_object(file=self.file, row=row_number),
|
67
|
+
"warning": f"Extra row column values."})
|
68
|
+
return warnings
|
59
69
|
|
60
70
|
|
61
71
|
class ListReader(RowReader):
|
@@ -101,9 +111,10 @@ class CsvReader(RowReader):
|
|
101
111
|
|
102
112
|
class ExcelSheetReader(RowReader):
|
103
113
|
|
104
|
-
def __init__(self, sheet_name: str, workbook: openpyxl.workbook.workbook.Workbook) -> None:
|
114
|
+
def __init__(self, excel: Excel, sheet_name: str, workbook: openpyxl.workbook.workbook.Workbook) -> None:
|
105
115
|
self.sheet_name = sheet_name or "Sheet1"
|
106
116
|
self._workbook = workbook
|
117
|
+
self._file = excel._file
|
107
118
|
self._rows = None
|
108
119
|
super().__init__()
|
109
120
|
|
@@ -134,7 +145,7 @@ class Excel:
|
|
134
145
|
self.open()
|
135
146
|
|
136
147
|
def sheet_reader(self, sheet_name: str) -> ExcelSheetReader:
|
137
|
-
return self._reader_class(sheet_name=sheet_name, workbook=self._workbook)
|
148
|
+
return self._reader_class(self, sheet_name=sheet_name, workbook=self._workbook)
|
138
149
|
|
139
150
|
def open(self) -> None:
|
140
151
|
if self._workbook is None:
|
@@ -1501,6 +1501,14 @@ def right_trim(list_or_tuple: Union[List[Any], Tuple[Any]],
|
|
1501
1501
|
return list_or_tuple[:i + 1]
|
1502
1502
|
|
1503
1503
|
|
1504
|
+
def create_object(**kwargs) -> dict:
|
1505
|
+
result = {}
|
1506
|
+
for name in kwargs:
|
1507
|
+
if kwargs[name]:
|
1508
|
+
result[name] = kwargs[name]
|
1509
|
+
return result
|
1510
|
+
|
1511
|
+
|
1504
1512
|
def is_c4_arn(arn: str) -> bool:
|
1505
1513
|
"""
|
1506
1514
|
Returns True iff the given (presumed) AWS ARN string value looks like it
|
@@ -16,8 +16,8 @@ from dcicutils.common import OrchestratedApp, APP_CGAP, APP_FOURFRONT, APP_SMAHT
|
|
16
16
|
from dcicutils.creds_utils import CGAPKeyManager, FourfrontKeyManager, SMaHTKeyManager
|
17
17
|
from dcicutils.data_readers import CsvReader, Excel, RowReader
|
18
18
|
from dcicutils.ff_utils import get_metadata, get_schema, patch_metadata, post_metadata
|
19
|
-
from dcicutils.misc_utils import (load_json_if, merge_objects, remove_empty_properties, right_trim,
|
20
|
-
to_boolean, to_camel_case, to_enum, to_float, to_integer, VirtualApp)
|
19
|
+
from dcicutils.misc_utils import (create_object, load_json_if, merge_objects, remove_empty_properties, right_trim,
|
20
|
+
split_string, to_boolean, to_camel_case, to_enum, to_float, to_integer, VirtualApp)
|
21
21
|
from dcicutils.zip_utils import temporary_file, unpack_gz_file_to_temporary_file, unpack_files
|
22
22
|
|
23
23
|
|
@@ -53,9 +53,10 @@ class StructuredDataSet:
|
|
53
53
|
self._portal = Portal(portal, data=self.data, schemas=schemas) if portal else None
|
54
54
|
self._order = order
|
55
55
|
self._prune = prune
|
56
|
-
self.
|
57
|
-
self.
|
58
|
-
self.
|
56
|
+
self._warnings = {}
|
57
|
+
self._errors = {}
|
58
|
+
self._resolved_refs = []
|
59
|
+
self._validated = False
|
59
60
|
self._load_file(file) if file else None
|
60
61
|
|
61
62
|
@staticmethod
|
@@ -64,19 +65,85 @@ class StructuredDataSet:
|
|
64
65
|
order: Optional[List[str]] = None, prune: bool = True) -> StructuredDataSet:
|
65
66
|
return StructuredDataSet(file=file, portal=portal, schemas=schemas, order=order, prune=prune)
|
66
67
|
|
67
|
-
def validate(self) ->
|
68
|
-
|
68
|
+
def validate(self, force: bool = False) -> None:
|
69
|
+
if self._validated and not force:
|
70
|
+
return
|
71
|
+
self._validated = True
|
69
72
|
for type_name in self.data:
|
70
73
|
if (schema := Schema.load_by_name(type_name, portal=self._portal)):
|
71
|
-
|
74
|
+
row_number = 0
|
72
75
|
for data in self.data[type_name]:
|
73
|
-
|
74
|
-
if (
|
75
|
-
|
76
|
-
|
76
|
+
row_number += 1
|
77
|
+
if (validation_errors := schema.validate(data)) is not None:
|
78
|
+
for validation_error in validation_errors:
|
79
|
+
self._note_error({"src": create_object(type=schema.name, row=row_number),
|
80
|
+
"error": validation_error}, "validation")
|
77
81
|
|
78
|
-
|
79
|
-
|
82
|
+
@property
|
83
|
+
def warnings(self) -> dict:
|
84
|
+
return self._warnings
|
85
|
+
|
86
|
+
@property
|
87
|
+
def reader_warnings(self) -> List[dict]:
|
88
|
+
return self._warnings.get("reader") or []
|
89
|
+
|
90
|
+
@property
|
91
|
+
def errors(self) -> dict:
|
92
|
+
return self._errors
|
93
|
+
|
94
|
+
@property
|
95
|
+
def ref_errors(self) -> List[dict]:
|
96
|
+
return self._errors.get("ref") or []
|
97
|
+
|
98
|
+
@property
|
99
|
+
def validation_errors(self) -> List[dict]:
|
100
|
+
return self._errors.get("validation") or []
|
101
|
+
|
102
|
+
@property
|
103
|
+
def resolved_refs(self) -> List[str]:
|
104
|
+
return self._resolved_refs
|
105
|
+
|
106
|
+
@staticmethod
|
107
|
+
def format_issue(issue: dict, original_file: Optional[str] = None) -> str:
|
108
|
+
def src_string(issue: dict) -> str:
|
109
|
+
if not isinstance(issue, dict) or not isinstance(issue_src := issue.get("src"), dict):
|
110
|
+
return ""
|
111
|
+
show_file = original_file and (original_file.endswith(".zip") or
|
112
|
+
original_file.endswith(".tgz") or original_file.endswith(".gz"))
|
113
|
+
src_file = issue_src.get("file") if show_file else ""
|
114
|
+
src_type = issue_src.get("type")
|
115
|
+
src_column = issue_src.get("column")
|
116
|
+
src_row = issue_src.get("row", 0)
|
117
|
+
if src_file:
|
118
|
+
src = f"{os.path.basename(src_file)}"
|
119
|
+
sep = ":"
|
120
|
+
else:
|
121
|
+
src = ""
|
122
|
+
sep = "."
|
123
|
+
if src_type:
|
124
|
+
src += (sep if src else "") + src_type
|
125
|
+
sep = "."
|
126
|
+
if src_column:
|
127
|
+
src += (sep if src else "") + src_column
|
128
|
+
if src_row > 0:
|
129
|
+
src += (" " if src else "") + f"[{src_row}]"
|
130
|
+
if not src:
|
131
|
+
if issue.get("warning"):
|
132
|
+
src = "Warning"
|
133
|
+
elif issue.get("error"):
|
134
|
+
src = "Error"
|
135
|
+
else:
|
136
|
+
src = "Issue"
|
137
|
+
return src
|
138
|
+
issue_message = None
|
139
|
+
if issue:
|
140
|
+
if error := issue.get("error"):
|
141
|
+
issue_message = error
|
142
|
+
elif warning := issue.get("warning"):
|
143
|
+
issue_message = warning
|
144
|
+
elif issue.get("truncated"):
|
145
|
+
return f"Truncated result set | More: {issue.get('more')} | See: {issue.get('details')}"
|
146
|
+
return f"{src_string(issue)}: {issue_message}" if issue_message else ""
|
80
147
|
|
81
148
|
def _load_file(self, file: str) -> None:
|
82
149
|
# Returns a dictionary where each property is the name (i.e. the type) of the data,
|
@@ -106,15 +173,13 @@ class StructuredDataSet:
|
|
106
173
|
self._load_file(file)
|
107
174
|
|
108
175
|
def _load_csv_file(self, file: str) -> None:
|
109
|
-
self._load_reader(
|
110
|
-
self._note_issues(reader.issues, os.path.basename(file))
|
176
|
+
self._load_reader(CsvReader(file), type_name=Schema.type_name(file))
|
111
177
|
|
112
178
|
def _load_excel_file(self, file: str) -> None:
|
113
179
|
excel = Excel(file) # Order the sheet names by any specified ordering (e.g. ala snovault.loadxl).
|
114
180
|
order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {}
|
115
181
|
for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)):
|
116
|
-
self._load_reader(
|
117
|
-
self._note_issues(reader.issues, f"{file}:{sheet_name}")
|
182
|
+
self._load_reader(excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name))
|
118
183
|
|
119
184
|
def _load_json_file(self, file: str) -> None:
|
120
185
|
with open(file) as f:
|
@@ -125,18 +190,20 @@ class StructuredDataSet:
|
|
125
190
|
noschema = False
|
126
191
|
structured_row_template = None
|
127
192
|
for row in reader:
|
128
|
-
if not structured_row_template: # Delay creation
|
193
|
+
if not structured_row_template: # Delay schema creation so we don't reference it if there are no rows.
|
129
194
|
if not schema and not noschema and not (schema := Schema.load_by_name(type_name, portal=self._portal)):
|
130
195
|
noschema = True
|
196
|
+
elif schema and (schema_name := schema.name):
|
197
|
+
type_name = schema_name
|
131
198
|
structured_row_template = _StructuredRowTemplate(reader.header, schema)
|
132
199
|
structured_row = structured_row_template.create_row()
|
133
200
|
for column_name, value in row.items():
|
134
|
-
structured_row_template.set_value(structured_row, column_name, value, reader.
|
135
|
-
if schema and (schema_name := schema.name):
|
136
|
-
type_name = schema_name
|
137
|
-
self._refs_resolved = self._refs_resolved | schema._refs_resolved
|
138
|
-
self._refs_unresolved = self._refs_unresolved | schema._refs_unresolved
|
201
|
+
structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number)
|
139
202
|
self._add(type_name, structured_row)
|
203
|
+
self._note_warning(reader.warnings, "reader")
|
204
|
+
if schema:
|
205
|
+
self._note_error(schema._unresolved_refs, "ref")
|
206
|
+
self._resolved_refs = schema._resolved_refs
|
140
207
|
|
141
208
|
def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None:
|
142
209
|
if self._prune:
|
@@ -146,11 +213,19 @@ class StructuredDataSet:
|
|
146
213
|
else:
|
147
214
|
self.data[type_name] = [data] if isinstance(data, dict) else data
|
148
215
|
|
149
|
-
def
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
216
|
+
def _note_warning(self, item: Optional[Union[dict, List[dict]]], group: str) -> None:
|
217
|
+
self._note_issue(self._warnings, item, group)
|
218
|
+
|
219
|
+
def _note_error(self, item: Optional[Union[dict, List[dict]]], group: str) -> None:
|
220
|
+
self._note_issue(self._errors, item, group)
|
221
|
+
|
222
|
+
def _note_issue(self, issues: dict, item: Optional[Union[dict, List[dict]]], group: str) -> None:
|
223
|
+
if isinstance(item, dict) and item:
|
224
|
+
item = [item]
|
225
|
+
if isinstance(item, list) and item:
|
226
|
+
if not issues.get(group):
|
227
|
+
issues[group] = []
|
228
|
+
issues[group].extend(item)
|
154
229
|
|
155
230
|
|
156
231
|
class _StructuredRowTemplate:
|
@@ -163,10 +238,10 @@ class _StructuredRowTemplate:
|
|
163
238
|
def create_row(self) -> dict:
|
164
239
|
return copy.deepcopy(self._template)
|
165
240
|
|
166
|
-
def set_value(self, data: dict, column_name: str, value: str,
|
241
|
+
def set_value(self, data: dict, column_name: str, value: str, file: Optional[str], row_number: int = -1) -> None:
|
167
242
|
if (set_value_function := self._set_value_functions.get(column_name)):
|
168
|
-
src = (
|
169
|
-
|
243
|
+
src = create_object(type=self._schema.name if self._schema else None,
|
244
|
+
column=column_name, file=file, row=row_number)
|
170
245
|
set_value_function(data, value, src)
|
171
246
|
|
172
247
|
def _create_row_template(self, column_names: List[str]) -> dict: # Surprisingly tricky code here.
|
@@ -270,19 +345,27 @@ class Schema:
|
|
270
345
|
"number": self._map_function_number,
|
271
346
|
"string": self._map_function_string
|
272
347
|
}
|
273
|
-
self.
|
274
|
-
self.
|
348
|
+
self._resolved_refs = set()
|
349
|
+
self._unresolved_refs = []
|
275
350
|
self._typeinfo = self._create_typeinfo(schema_json)
|
276
351
|
|
277
352
|
@staticmethod
|
278
353
|
def load_by_name(name: str, portal: Portal) -> Optional[dict]:
|
279
354
|
return Schema(portal.get_schema(Schema.type_name(name)), portal) if portal else None
|
280
355
|
|
281
|
-
def validate(self, data: dict) ->
|
282
|
-
|
283
|
-
for
|
284
|
-
|
285
|
-
return
|
356
|
+
def validate(self, data: dict) -> List[str]:
|
357
|
+
errors = []
|
358
|
+
for error in SchemaValidator(self.data, format_checker=SchemaValidator.FORMAT_CHECKER).iter_errors(data):
|
359
|
+
errors.append(error.message)
|
360
|
+
return errors
|
361
|
+
|
362
|
+
@property
|
363
|
+
def unresolved_refs(self) -> List[dict]:
|
364
|
+
return self._unresolved_refs
|
365
|
+
|
366
|
+
@property
|
367
|
+
def resolved_refs(self) -> List[str]:
|
368
|
+
return list(self._resolved_refs)
|
286
369
|
|
287
370
|
def get_map_value_function(self, column_name: str) -> Optional[Any]:
|
288
371
|
return (self._get_typeinfo(column_name) or {}).get("map")
|
@@ -343,17 +426,16 @@ class Schema:
|
|
343
426
|
def _map_function_ref(self, typeinfo: dict) -> Callable:
|
344
427
|
def map_ref(value: str, link_to: str, portal: Optional[Portal], src: Optional[str]) -> Any:
|
345
428
|
nonlocal self, typeinfo
|
346
|
-
exception = None
|
347
429
|
if not value:
|
348
430
|
if (column := typeinfo.get("column")) and column in self.data.get("required", []):
|
349
|
-
self.
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
431
|
+
self._unresolved_refs.append({"src": src, "error": f"/{link_to}/<null>"})
|
432
|
+
elif portal:
|
433
|
+
if not (resolved := portal.ref_exists(link_to, value)):
|
434
|
+
self._unresolved_refs.append({"src": src, "error": f"/{link_to}/{value}"})
|
435
|
+
elif len(resolved) > 1:
|
436
|
+
self._unresolved_refs.append({"src": src, "error": f"/{link_to}/{value}", "types": resolved})
|
437
|
+
else:
|
438
|
+
self._resolved_refs.add(f"/{link_to}/{value}")
|
357
439
|
return value
|
358
440
|
return lambda value, src: map_ref(value, typeinfo.get("linkTo"), self._portal, src)
|
359
441
|
|
@@ -725,16 +807,17 @@ class Portal(PortalBase):
|
|
725
807
|
super_type_map_flattened[super_type_name] = breadth_first(super_type_map, super_type_name)
|
726
808
|
return super_type_map_flattened
|
727
809
|
|
728
|
-
def ref_exists(self, type_name: str, value: str) ->
|
810
|
+
def ref_exists(self, type_name: str, value: str) -> List[str]:
|
811
|
+
resolved = []
|
729
812
|
if self._ref_exists_single(type_name, value):
|
730
|
-
|
813
|
+
resolved.append(type_name)
|
731
814
|
# Check for the given ref in all sub-types of the given type.
|
732
815
|
if (schemas_super_type_map := self.get_schemas_super_type_map()):
|
733
816
|
if (sub_type_names := schemas_super_type_map.get(type_name)):
|
734
817
|
for sub_type_name in sub_type_names:
|
735
818
|
if self._ref_exists_single(sub_type_name, value):
|
736
|
-
|
737
|
-
return
|
819
|
+
resolved.append(type_name)
|
820
|
+
return resolved
|
738
821
|
|
739
822
|
def _ref_exists_single(self, type_name: str, value: str) -> bool:
|
740
823
|
if self._data and (items := self._data.get(type_name)) and (schema := self.get_schema(type_name)):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "dcicutils"
|
3
|
-
version = "8.4.0.
|
3
|
+
version = "8.4.0.1b5" # TODO: To become 8.4.1
|
4
4
|
description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
|
5
5
|
authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
|
6
6
|
license = "MIT"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/c4-infrastructure.jsonc
RENAMED
File without changes
|
File without changes
|
{dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-common-server.jsonc
RENAMED
File without changes
|
{dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-common.jsonc
RENAMED
File without changes
|
{dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc
RENAMED
File without changes
|
{dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-pipeline.jsonc
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|