dcicutils 8.4.0.1b3__tar.gz → 8.4.0.1b5__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/PKG-INFO +1 -1
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/data_readers.py +24 -12
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/misc_utils.py +8 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/structured_data.py +216 -68
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/pyproject.toml +1 -1
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/LICENSE.txt +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/README.rst +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/__init__.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/base.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/beanstalk_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/bundle_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/cloudformation_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/codebuild_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/command_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/common.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/contribution_scripts.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/contribution_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/creds_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/data_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/deployment_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/diff_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/docker_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/ecr_scripts.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/ecr_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/ecs_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/env_base.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/env_manager.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/env_scripts.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/env_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/env_utils_legacy.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/es_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/exceptions.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/ff_mocks.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/ff_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/function_cache_decorator.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/glacier_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/jh_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/kibana/dashboards.json +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/kibana/readme.md +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/lang_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/c4-infrastructure.jsonc +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/c4-python-infrastructure.jsonc +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-common-server.jsonc +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-common.jsonc +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-pipeline.jsonc +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/license_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/log_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/obfuscation_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/opensearch_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/project_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/qa_checkers.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/qa_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/redis_tools.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/redis_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/s3_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/scripts/publish_to_pypi.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/scripts/run_license_checker.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/secrets_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/sheet_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/snapshot_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/ssl_certificate_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/task_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/trace_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/validation_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/variant_utils.py +0 -0
- {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/zip_utils.py +0 -0
@@ -2,27 +2,30 @@ import abc
|
|
2
2
|
import csv
|
3
3
|
import openpyxl
|
4
4
|
from typing import Any, Generator, Iterator, List, Optional, Type, Tuple, Union
|
5
|
-
from dcicutils.misc_utils import right_trim
|
5
|
+
from dcicutils.misc_utils import create_object, right_trim
|
6
|
+
|
7
|
+
# Forward type references for type hints.
|
8
|
+
Excel = Type["Excel"]
|
6
9
|
|
7
10
|
|
8
11
|
class RowReader(abc.ABC):
|
9
12
|
|
10
13
|
def __init__(self):
|
11
14
|
self.header = None
|
12
|
-
self.
|
15
|
+
self.row_number = 0
|
13
16
|
self._warning_empty_headers = False
|
14
17
|
self._warning_extra_values = [] # Line numbers.
|
15
18
|
self.open()
|
16
19
|
|
17
20
|
def __iter__(self) -> Iterator:
|
18
21
|
for row in self.rows:
|
19
|
-
self.
|
22
|
+
self.row_number += 1
|
20
23
|
if self.is_comment_row(row):
|
21
24
|
continue
|
22
25
|
if self.is_terminating_row(row):
|
23
26
|
break
|
24
27
|
if len(self.header) < len(row): # Row values beyond what there are headers for are ignored.
|
25
|
-
self._warning_extra_values.append(self.
|
28
|
+
self._warning_extra_values.append(self.row_number)
|
26
29
|
yield {column: self.cell_value(value) for column, value in zip(self.header, row)}
|
27
30
|
|
28
31
|
def _define_header(self, header: List[Optional[Any]]) -> None:
|
@@ -49,13 +52,20 @@ class RowReader(abc.ABC):
|
|
49
52
|
pass
|
50
53
|
|
51
54
|
@property
|
52
|
-
def
|
53
|
-
|
55
|
+
def file(self) -> Optional[str]:
|
56
|
+
return self._file if hasattr(self, "_file") else None
|
57
|
+
|
58
|
+
@property
|
59
|
+
def warnings(self) -> List[str]:
|
60
|
+
warnings = []
|
54
61
|
if self._warning_empty_headers:
|
55
|
-
|
62
|
+
warnings.append({"src": create_object(file=self.file),
|
63
|
+
"warning": "Empty header column encountered; ignoring it and all subsequent columns."})
|
56
64
|
if self._warning_extra_values:
|
57
|
-
|
58
|
-
|
65
|
+
for row_number in self._warning_extra_values:
|
66
|
+
warnings.append({"src": create_object(file=self.file, row=row_number),
|
67
|
+
"warning": f"Extra row column values."})
|
68
|
+
return warnings
|
59
69
|
|
60
70
|
|
61
71
|
class ListReader(RowReader):
|
@@ -101,9 +111,10 @@ class CsvReader(RowReader):
|
|
101
111
|
|
102
112
|
class ExcelSheetReader(RowReader):
|
103
113
|
|
104
|
-
def __init__(self, sheet_name: str, workbook: openpyxl.workbook.workbook.Workbook) -> None:
|
114
|
+
def __init__(self, excel: Excel, sheet_name: str, workbook: openpyxl.workbook.workbook.Workbook) -> None:
|
105
115
|
self.sheet_name = sheet_name or "Sheet1"
|
106
116
|
self._workbook = workbook
|
117
|
+
self._file = excel._file
|
107
118
|
self._rows = None
|
108
119
|
super().__init__()
|
109
120
|
|
@@ -134,12 +145,13 @@ class Excel:
|
|
134
145
|
self.open()
|
135
146
|
|
136
147
|
def sheet_reader(self, sheet_name: str) -> ExcelSheetReader:
|
137
|
-
return self._reader_class(sheet_name=sheet_name, workbook=self._workbook)
|
148
|
+
return self._reader_class(self, sheet_name=sheet_name, workbook=self._workbook)
|
138
149
|
|
139
150
|
def open(self) -> None:
|
140
151
|
if self._workbook is None:
|
141
152
|
self._workbook = openpyxl.load_workbook(self._file, data_only=True)
|
142
|
-
self.sheet_names = [
|
153
|
+
self.sheet_names = [sheet_name for sheet_name in self._workbook.sheetnames
|
154
|
+
if self._workbook[sheet_name].sheet_state != "hidden"]
|
143
155
|
|
144
156
|
def __del__(self) -> None:
|
145
157
|
if (workbook := self._workbook) is not None:
|
@@ -1501,6 +1501,14 @@ def right_trim(list_or_tuple: Union[List[Any], Tuple[Any]],
|
|
1501
1501
|
return list_or_tuple[:i + 1]
|
1502
1502
|
|
1503
1503
|
|
1504
|
+
def create_object(**kwargs) -> dict:
|
1505
|
+
result = {}
|
1506
|
+
for name in kwargs:
|
1507
|
+
if kwargs[name]:
|
1508
|
+
result[name] = kwargs[name]
|
1509
|
+
return result
|
1510
|
+
|
1511
|
+
|
1504
1512
|
def is_c4_arn(arn: str) -> bool:
|
1505
1513
|
"""
|
1506
1514
|
Returns True iff the given (presumed) AWS ARN string value looks like it
|
@@ -1,3 +1,4 @@
|
|
1
|
+
from collections import deque
|
1
2
|
import copy
|
2
3
|
from functools import lru_cache
|
3
4
|
import json
|
@@ -15,8 +16,8 @@ from dcicutils.common import OrchestratedApp, APP_CGAP, APP_FOURFRONT, APP_SMAHT
|
|
15
16
|
from dcicutils.creds_utils import CGAPKeyManager, FourfrontKeyManager, SMaHTKeyManager
|
16
17
|
from dcicutils.data_readers import CsvReader, Excel, RowReader
|
17
18
|
from dcicutils.ff_utils import get_metadata, get_schema, patch_metadata, post_metadata
|
18
|
-
from dcicutils.misc_utils import (load_json_if, merge_objects, remove_empty_properties, right_trim,
|
19
|
-
to_boolean, to_camel_case, to_enum, to_float, to_integer, VirtualApp)
|
19
|
+
from dcicutils.misc_utils import (create_object, load_json_if, merge_objects, remove_empty_properties, right_trim,
|
20
|
+
split_string, to_boolean, to_camel_case, to_enum, to_float, to_integer, VirtualApp)
|
20
21
|
from dcicutils.zip_utils import temporary_file, unpack_gz_file_to_temporary_file, unpack_files
|
21
22
|
|
22
23
|
|
@@ -52,7 +53,10 @@ class StructuredDataSet:
|
|
52
53
|
self._portal = Portal(portal, data=self.data, schemas=schemas) if portal else None
|
53
54
|
self._order = order
|
54
55
|
self._prune = prune
|
55
|
-
self.
|
56
|
+
self._warnings = {}
|
57
|
+
self._errors = {}
|
58
|
+
self._resolved_refs = []
|
59
|
+
self._validated = False
|
56
60
|
self._load_file(file) if file else None
|
57
61
|
|
58
62
|
@staticmethod
|
@@ -61,16 +65,85 @@ class StructuredDataSet:
|
|
61
65
|
order: Optional[List[str]] = None, prune: bool = True) -> StructuredDataSet:
|
62
66
|
return StructuredDataSet(file=file, portal=portal, schemas=schemas, order=order, prune=prune)
|
63
67
|
|
64
|
-
def validate(self) ->
|
65
|
-
|
68
|
+
def validate(self, force: bool = False) -> None:
|
69
|
+
if self._validated and not force:
|
70
|
+
return
|
71
|
+
self._validated = True
|
66
72
|
for type_name in self.data:
|
67
73
|
if (schema := Schema.load_by_name(type_name, portal=self._portal)):
|
68
|
-
|
74
|
+
row_number = 0
|
69
75
|
for data in self.data[type_name]:
|
70
|
-
|
71
|
-
if (
|
72
|
-
|
73
|
-
|
76
|
+
row_number += 1
|
77
|
+
if (validation_errors := schema.validate(data)) is not None:
|
78
|
+
for validation_error in validation_errors:
|
79
|
+
self._note_error({"src": create_object(type=schema.name, row=row_number),
|
80
|
+
"error": validation_error}, "validation")
|
81
|
+
|
82
|
+
@property
|
83
|
+
def warnings(self) -> dict:
|
84
|
+
return self._warnings
|
85
|
+
|
86
|
+
@property
|
87
|
+
def reader_warnings(self) -> List[dict]:
|
88
|
+
return self._warnings.get("reader") or []
|
89
|
+
|
90
|
+
@property
|
91
|
+
def errors(self) -> dict:
|
92
|
+
return self._errors
|
93
|
+
|
94
|
+
@property
|
95
|
+
def ref_errors(self) -> List[dict]:
|
96
|
+
return self._errors.get("ref") or []
|
97
|
+
|
98
|
+
@property
|
99
|
+
def validation_errors(self) -> List[dict]:
|
100
|
+
return self._errors.get("validation") or []
|
101
|
+
|
102
|
+
@property
|
103
|
+
def resolved_refs(self) -> List[str]:
|
104
|
+
return self._resolved_refs
|
105
|
+
|
106
|
+
@staticmethod
|
107
|
+
def format_issue(issue: dict, original_file: Optional[str] = None) -> str:
|
108
|
+
def src_string(issue: dict) -> str:
|
109
|
+
if not isinstance(issue, dict) or not isinstance(issue_src := issue.get("src"), dict):
|
110
|
+
return ""
|
111
|
+
show_file = original_file and (original_file.endswith(".zip") or
|
112
|
+
original_file.endswith(".tgz") or original_file.endswith(".gz"))
|
113
|
+
src_file = issue_src.get("file") if show_file else ""
|
114
|
+
src_type = issue_src.get("type")
|
115
|
+
src_column = issue_src.get("column")
|
116
|
+
src_row = issue_src.get("row", 0)
|
117
|
+
if src_file:
|
118
|
+
src = f"{os.path.basename(src_file)}"
|
119
|
+
sep = ":"
|
120
|
+
else:
|
121
|
+
src = ""
|
122
|
+
sep = "."
|
123
|
+
if src_type:
|
124
|
+
src += (sep if src else "") + src_type
|
125
|
+
sep = "."
|
126
|
+
if src_column:
|
127
|
+
src += (sep if src else "") + src_column
|
128
|
+
if src_row > 0:
|
129
|
+
src += (" " if src else "") + f"[{src_row}]"
|
130
|
+
if not src:
|
131
|
+
if issue.get("warning"):
|
132
|
+
src = "Warning"
|
133
|
+
elif issue.get("error"):
|
134
|
+
src = "Error"
|
135
|
+
else:
|
136
|
+
src = "Issue"
|
137
|
+
return src
|
138
|
+
issue_message = None
|
139
|
+
if issue:
|
140
|
+
if error := issue.get("error"):
|
141
|
+
issue_message = error
|
142
|
+
elif warning := issue.get("warning"):
|
143
|
+
issue_message = warning
|
144
|
+
elif issue.get("truncated"):
|
145
|
+
return f"Truncated result set | More: {issue.get('more')} | See: {issue.get('details')}"
|
146
|
+
return f"{src_string(issue)}: {issue_message}" if issue_message else ""
|
74
147
|
|
75
148
|
def _load_file(self, file: str) -> None:
|
76
149
|
# Returns a dictionary where each property is the name (i.e. the type) of the data,
|
@@ -100,15 +173,13 @@ class StructuredDataSet:
|
|
100
173
|
self._load_file(file)
|
101
174
|
|
102
175
|
def _load_csv_file(self, file: str) -> None:
|
103
|
-
self._load_reader(
|
104
|
-
self._note_issues(reader.issues, os.path.basename(file))
|
176
|
+
self._load_reader(CsvReader(file), type_name=Schema.type_name(file))
|
105
177
|
|
106
178
|
def _load_excel_file(self, file: str) -> None:
|
107
179
|
excel = Excel(file) # Order the sheet names by any specified ordering (e.g. ala snovault.loadxl).
|
108
180
|
order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {}
|
109
181
|
for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)):
|
110
|
-
self._load_reader(
|
111
|
-
self._note_issues(reader.issues, f"{file}:{sheet_name}")
|
182
|
+
self._load_reader(excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name))
|
112
183
|
|
113
184
|
def _load_json_file(self, file: str) -> None:
|
114
185
|
with open(file) as f:
|
@@ -119,16 +190,20 @@ class StructuredDataSet:
|
|
119
190
|
noschema = False
|
120
191
|
structured_row_template = None
|
121
192
|
for row in reader:
|
122
|
-
if not structured_row_template: # Delay creation
|
193
|
+
if not structured_row_template: # Delay schema creation so we don't reference it if there are no rows.
|
123
194
|
if not schema and not noschema and not (schema := Schema.load_by_name(type_name, portal=self._portal)):
|
124
195
|
noschema = True
|
196
|
+
elif schema and (schema_name := schema.name):
|
197
|
+
type_name = schema_name
|
125
198
|
structured_row_template = _StructuredRowTemplate(reader.header, schema)
|
126
199
|
structured_row = structured_row_template.create_row()
|
127
200
|
for column_name, value in row.items():
|
128
|
-
structured_row_template.set_value(structured_row, column_name, value, reader.
|
129
|
-
if schema and (schema_name := schema.name):
|
130
|
-
type_name = schema_name
|
201
|
+
structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number)
|
131
202
|
self._add(type_name, structured_row)
|
203
|
+
self._note_warning(reader.warnings, "reader")
|
204
|
+
if schema:
|
205
|
+
self._note_error(schema._unresolved_refs, "ref")
|
206
|
+
self._resolved_refs = schema._resolved_refs
|
132
207
|
|
133
208
|
def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None:
|
134
209
|
if self._prune:
|
@@ -138,11 +213,19 @@ class StructuredDataSet:
|
|
138
213
|
else:
|
139
214
|
self.data[type_name] = [data] if isinstance(data, dict) else data
|
140
215
|
|
141
|
-
def
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
216
|
+
def _note_warning(self, item: Optional[Union[dict, List[dict]]], group: str) -> None:
|
217
|
+
self._note_issue(self._warnings, item, group)
|
218
|
+
|
219
|
+
def _note_error(self, item: Optional[Union[dict, List[dict]]], group: str) -> None:
|
220
|
+
self._note_issue(self._errors, item, group)
|
221
|
+
|
222
|
+
def _note_issue(self, issues: dict, item: Optional[Union[dict, List[dict]]], group: str) -> None:
|
223
|
+
if isinstance(item, dict) and item:
|
224
|
+
item = [item]
|
225
|
+
if isinstance(item, list) and item:
|
226
|
+
if not issues.get(group):
|
227
|
+
issues[group] = []
|
228
|
+
issues[group].extend(item)
|
146
229
|
|
147
230
|
|
148
231
|
class _StructuredRowTemplate:
|
@@ -155,10 +238,10 @@ class _StructuredRowTemplate:
|
|
155
238
|
def create_row(self) -> dict:
|
156
239
|
return copy.deepcopy(self._template)
|
157
240
|
|
158
|
-
def set_value(self, data: dict, column_name: str, value: str,
|
241
|
+
def set_value(self, data: dict, column_name: str, value: str, file: Optional[str], row_number: int = -1) -> None:
|
159
242
|
if (set_value_function := self._set_value_functions.get(column_name)):
|
160
|
-
src = (
|
161
|
-
|
243
|
+
src = create_object(type=self._schema.name if self._schema else None,
|
244
|
+
column=column_name, file=file, row=row_number)
|
162
245
|
set_value_function(data, value, src)
|
163
246
|
|
164
247
|
def _create_row_template(self, column_names: List[str]) -> dict: # Surprisingly tricky code here.
|
@@ -262,17 +345,27 @@ class Schema:
|
|
262
345
|
"number": self._map_function_number,
|
263
346
|
"string": self._map_function_string
|
264
347
|
}
|
348
|
+
self._resolved_refs = set()
|
349
|
+
self._unresolved_refs = []
|
265
350
|
self._typeinfo = self._create_typeinfo(schema_json)
|
266
351
|
|
267
352
|
@staticmethod
|
268
353
|
def load_by_name(name: str, portal: Portal) -> Optional[dict]:
|
269
354
|
return Schema(portal.get_schema(Schema.type_name(name)), portal) if portal else None
|
270
355
|
|
271
|
-
def validate(self, data: dict) ->
|
272
|
-
|
273
|
-
for
|
274
|
-
|
275
|
-
return
|
356
|
+
def validate(self, data: dict) -> List[str]:
|
357
|
+
errors = []
|
358
|
+
for error in SchemaValidator(self.data, format_checker=SchemaValidator.FORMAT_CHECKER).iter_errors(data):
|
359
|
+
errors.append(error.message)
|
360
|
+
return errors
|
361
|
+
|
362
|
+
@property
|
363
|
+
def unresolved_refs(self) -> List[dict]:
|
364
|
+
return self._unresolved_refs
|
365
|
+
|
366
|
+
@property
|
367
|
+
def resolved_refs(self) -> List[str]:
|
368
|
+
return list(self._resolved_refs)
|
276
369
|
|
277
370
|
def get_map_value_function(self, column_name: str) -> Optional[Any]:
|
278
371
|
return (self._get_typeinfo(column_name) or {}).get("map")
|
@@ -333,14 +426,16 @@ class Schema:
|
|
333
426
|
def _map_function_ref(self, typeinfo: dict) -> Callable:
|
334
427
|
def map_ref(value: str, link_to: str, portal: Optional[Portal], src: Optional[str]) -> Any:
|
335
428
|
nonlocal self, typeinfo
|
336
|
-
exception = None
|
337
429
|
if not value:
|
338
430
|
if (column := typeinfo.get("column")) and column in self.data.get("required", []):
|
339
|
-
|
340
|
-
elif portal
|
341
|
-
|
342
|
-
|
343
|
-
|
431
|
+
self._unresolved_refs.append({"src": src, "error": f"/{link_to}/<null>"})
|
432
|
+
elif portal:
|
433
|
+
if not (resolved := portal.ref_exists(link_to, value)):
|
434
|
+
self._unresolved_refs.append({"src": src, "error": f"/{link_to}/{value}"})
|
435
|
+
elif len(resolved) > 1:
|
436
|
+
self._unresolved_refs.append({"src": src, "error": f"/{link_to}/{value}", "types": resolved})
|
437
|
+
else:
|
438
|
+
self._resolved_refs.add(f"/{link_to}/{value}")
|
344
439
|
return value
|
345
440
|
return lambda value, src: map_ref(value, typeinfo.get("linkTo"), self._portal, src)
|
346
441
|
|
@@ -481,15 +576,8 @@ class PortalBase:
|
|
481
576
|
env: Optional[str] = None, app: OrchestratedApp = APP_SMAHT, server: Optional[str] = None,
|
482
577
|
key: Optional[Union[dict, tuple]] = None,
|
483
578
|
portal: Optional[Union[VirtualApp, TestApp, Router, Portal, str]] = None) -> PortalBase:
|
484
|
-
if isinstance(arg, VirtualApp
|
485
|
-
|
486
|
-
elif isinstance(arg, TestApp) and not portal:
|
487
|
-
portal = arg
|
488
|
-
elif isinstance(arg, Router) and not portal:
|
489
|
-
portal = arg
|
490
|
-
elif isinstance(arg, Portal) and not portal:
|
491
|
-
portal = arg
|
492
|
-
elif isinstance(arg, str) and arg.endswith(".ini") and not portal:
|
579
|
+
if ((isinstance(arg, (VirtualApp, TestApp, Router, Portal)) or
|
580
|
+
isinstance(arg, str) and arg.endswith(".ini")) and not portal):
|
493
581
|
portal = arg
|
494
582
|
elif isinstance(arg, str) and not env:
|
495
583
|
env = arg
|
@@ -539,15 +627,12 @@ class PortalBase:
|
|
539
627
|
return post_metadata(schema_name=object_type, post_item=data, key=self._key)
|
540
628
|
return self.post(f"/{object_type}", data)
|
541
629
|
|
542
|
-
def get_schema(self, schema_name: str) -> Optional[dict]:
|
543
|
-
return get_schema(schema_name, portal_vapp=self._vapp, key=self._key)
|
544
|
-
|
545
630
|
def get(self, uri: str, follow: bool = True, **kwargs) -> Optional[Union[RequestResponse, TestResponse]]:
|
546
631
|
if isinstance(self._vapp, (VirtualApp, TestApp)):
|
547
632
|
response = self._vapp.get(self._uri(uri), **self._kwargs(**kwargs))
|
548
633
|
if response and response.status_code in [301, 302, 303, 307, 308] and follow:
|
549
634
|
response = response.follow()
|
550
|
-
return response
|
635
|
+
return self._response(response)
|
551
636
|
return requests.get(self._uri(uri), allow_redirects=follow, **self._kwargs(**kwargs))
|
552
637
|
|
553
638
|
def patch(self, uri: str, data: Optional[dict] = None,
|
@@ -565,6 +650,12 @@ class PortalBase:
|
|
565
650
|
return self._vapp.post_json(self._uri(uri), json or data, upload_files=files, **self._kwargs(**kwargs))
|
566
651
|
return requests.post(self._uri(uri), json=json or data, files=files, **self._kwargs(**kwargs))
|
567
652
|
|
653
|
+
def get_schema(self, schema_name: str) -> Optional[dict]:
|
654
|
+
return get_schema(schema_name, portal_vapp=self._vapp, key=self._key)
|
655
|
+
|
656
|
+
def get_schemas(self) -> dict:
|
657
|
+
return self.get("/profiles/").json()
|
658
|
+
|
568
659
|
def _uri(self, uri: str) -> str:
|
569
660
|
if not isinstance(uri, str) or not uri:
|
570
661
|
return "/"
|
@@ -582,6 +673,19 @@ class PortalBase:
|
|
582
673
|
result_kwargs["timeout"] = timeout
|
583
674
|
return result_kwargs
|
584
675
|
|
676
|
+
def _response(self, response) -> Optional[RequestResponse]:
|
677
|
+
if response and isinstance(getattr(response.__class__, "json"), property):
|
678
|
+
class RequestResponseWrapper: # For consistency change json property to method.
|
679
|
+
def __init__(self, respnose, **kwargs):
|
680
|
+
super().__init__(**kwargs)
|
681
|
+
self._response = response
|
682
|
+
def __getattr__(self, attr): # noqa
|
683
|
+
return getattr(self._response, attr)
|
684
|
+
def json(self): # noqa
|
685
|
+
return self._response.json
|
686
|
+
response = RequestResponseWrapper(response)
|
687
|
+
return response
|
688
|
+
|
585
689
|
@staticmethod
|
586
690
|
def create_for_testing(ini_file: Optional[str] = None) -> PortalBase:
|
587
691
|
if isinstance(ini_file, str):
|
@@ -592,7 +696,7 @@ class PortalBase:
|
|
592
696
|
|
593
697
|
@staticmethod
|
594
698
|
def create_for_testing_local(ini_file: Optional[str] = None) -> Portal:
|
595
|
-
if isinstance(ini_file, str):
|
699
|
+
if isinstance(ini_file, str) and ini_file:
|
596
700
|
return Portal(Portal._create_testapp(ini_file))
|
597
701
|
minimal_ini_for_testing_local = "\n".join([
|
598
702
|
"[app:app]\nuse = egg:encoded\nfile_upload_bucket = dummy",
|
@@ -655,23 +759,67 @@ class Portal(PortalBase):
|
|
655
759
|
|
656
760
|
@lru_cache(maxsize=256)
|
657
761
|
def get_schema(self, schema_name: str) -> Optional[dict]:
|
658
|
-
|
659
|
-
return
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
762
|
+
if (schemas := self.get_schemas()) and (schema := schemas.get(schema_name := Schema.type_name(schema_name))):
|
763
|
+
return schema
|
764
|
+
if schema_name == schema_name.upper() and (schema := schemas.get(schema_name.lower().title())):
|
765
|
+
return schema
|
766
|
+
if schema_name == schema_name.lower() and (schema := schemas.get(schema_name.title())):
|
767
|
+
return schema
|
768
|
+
|
769
|
+
@lru_cache(maxsize=1)
|
770
|
+
def get_schemas(self) -> dict:
|
771
|
+
schemas = super(Portal, self).get_schemas()
|
772
|
+
if self._schemas:
|
773
|
+
schemas = copy.deepcopy(schemas)
|
774
|
+
for user_specified_schema in self._schemas:
|
775
|
+
if user_specified_schema.get("title"):
|
776
|
+
schemas[user_specified_schema["title"]] = user_specified_schema
|
777
|
+
return schemas
|
778
|
+
|
779
|
+
@lru_cache(maxsize=1)
|
780
|
+
def get_schemas_super_type_map(self) -> dict:
|
781
|
+
"""
|
782
|
+
Returns the "super type map" for all of the known schemas (via /profiles).
|
783
|
+
This is a dictionary of all types which have (one or more) sub-types whose value is
|
784
|
+
an array of all of those sub-types (direct and all descendents), in breadth first order.
|
785
|
+
"""
|
786
|
+
def breadth_first(super_type_map: dict, super_type_name: str) -> dict:
|
787
|
+
result = []
|
788
|
+
queue = deque(super_type_map.get(super_type_name, []))
|
789
|
+
while queue:
|
790
|
+
result.append(sub_type_name := queue.popleft())
|
791
|
+
if sub_type_name in super_type_map:
|
792
|
+
queue.extend(super_type_map[sub_type_name])
|
793
|
+
return result
|
794
|
+
if not (schemas := self.get_schemas()):
|
795
|
+
return {}
|
796
|
+
super_type_map = {}
|
797
|
+
for type_name in schemas:
|
798
|
+
if super_type_name := schemas[type_name].get("rdfs:subClassOf"):
|
799
|
+
super_type_name = super_type_name.replace("/profiles/", "").replace(".json", "")
|
800
|
+
if super_type_name != "Item":
|
801
|
+
if not super_type_map.get(super_type_name):
|
802
|
+
super_type_map[super_type_name] = [type_name]
|
803
|
+
elif type_name not in super_type_map[super_type_name]:
|
804
|
+
super_type_map[super_type_name].append(type_name)
|
805
|
+
super_type_map_flattened = {}
|
806
|
+
for super_type_name in super_type_map:
|
807
|
+
super_type_map_flattened[super_type_name] = breadth_first(super_type_map, super_type_name)
|
808
|
+
return super_type_map_flattened
|
809
|
+
|
810
|
+
def ref_exists(self, type_name: str, value: str) -> List[str]:
|
811
|
+
resolved = []
|
812
|
+
if self._ref_exists_single(type_name, value):
|
813
|
+
resolved.append(type_name)
|
814
|
+
# Check for the given ref in all sub-types of the given type.
|
815
|
+
if (schemas_super_type_map := self.get_schemas_super_type_map()):
|
816
|
+
if (sub_type_names := schemas_super_type_map.get(type_name)):
|
817
|
+
for sub_type_name in sub_type_names:
|
818
|
+
if self._ref_exists_single(sub_type_name, value):
|
819
|
+
resolved.append(type_name)
|
820
|
+
return resolved
|
821
|
+
|
822
|
+
def _ref_exists_single(self, type_name: str, value: str) -> bool:
|
675
823
|
if self._data and (items := self._data.get(type_name)) and (schema := self.get_schema(type_name)):
|
676
824
|
iproperties = set(schema.get("identifyingProperties", [])) | {"identifier", "uuid"}
|
677
825
|
for item in items:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "dcicutils"
|
3
|
-
version = "8.4.0.
|
3
|
+
version = "8.4.0.1b5" # TODO: To become 8.4.1
|
4
4
|
description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
|
5
5
|
authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
|
6
6
|
license = "MIT"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/c4-infrastructure.jsonc
RENAMED
File without changes
|
File without changes
|
{dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-common-server.jsonc
RENAMED
File without changes
|
{dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-common.jsonc
RENAMED
File without changes
|
{dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc
RENAMED
File without changes
|
{dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-pipeline.jsonc
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|