dcicutils 8.4.0.1b4__tar.gz → 8.4.0.1b5__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/PKG-INFO +1 -1
  2. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/data_readers.py +22 -11
  3. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/misc_utils.py +8 -0
  4. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/structured_data.py +135 -52
  5. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/pyproject.toml +1 -1
  6. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/LICENSE.txt +0 -0
  7. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/README.rst +0 -0
  8. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/__init__.py +0 -0
  9. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/base.py +0 -0
  10. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/beanstalk_utils.py +0 -0
  11. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/bundle_utils.py +0 -0
  12. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/cloudformation_utils.py +0 -0
  13. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/codebuild_utils.py +0 -0
  14. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/command_utils.py +0 -0
  15. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/common.py +0 -0
  16. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/contribution_scripts.py +0 -0
  17. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/contribution_utils.py +0 -0
  18. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/creds_utils.py +0 -0
  19. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/data_utils.py +0 -0
  20. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/deployment_utils.py +0 -0
  21. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/diff_utils.py +0 -0
  22. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/docker_utils.py +0 -0
  23. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/ecr_scripts.py +0 -0
  24. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/ecr_utils.py +0 -0
  25. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/ecs_utils.py +0 -0
  26. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/env_base.py +0 -0
  27. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/env_manager.py +0 -0
  28. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/env_scripts.py +0 -0
  29. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/env_utils.py +0 -0
  30. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/env_utils_legacy.py +0 -0
  31. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/es_utils.py +0 -0
  32. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/exceptions.py +0 -0
  33. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/ff_mocks.py +0 -0
  34. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/ff_utils.py +0 -0
  35. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/function_cache_decorator.py +0 -0
  36. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/glacier_utils.py +0 -0
  37. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/jh_utils.py +0 -0
  38. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/kibana/dashboards.json +0 -0
  39. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/kibana/readme.md +0 -0
  40. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/lang_utils.py +0 -0
  41. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/c4-infrastructure.jsonc +0 -0
  42. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/c4-python-infrastructure.jsonc +0 -0
  43. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-common-server.jsonc +0 -0
  44. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-common.jsonc +0 -0
  45. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc +0 -0
  46. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-pipeline.jsonc +0 -0
  47. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/license_utils.py +0 -0
  48. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/log_utils.py +0 -0
  49. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/obfuscation_utils.py +0 -0
  50. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/opensearch_utils.py +0 -0
  51. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/project_utils.py +0 -0
  52. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/qa_checkers.py +0 -0
  53. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/qa_utils.py +0 -0
  54. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/redis_tools.py +0 -0
  55. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/redis_utils.py +0 -0
  56. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/s3_utils.py +0 -0
  57. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/scripts/publish_to_pypi.py +0 -0
  58. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/scripts/run_license_checker.py +0 -0
  59. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/secrets_utils.py +0 -0
  60. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/sheet_utils.py +0 -0
  61. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/snapshot_utils.py +0 -0
  62. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/ssl_certificate_utils.py +0 -0
  63. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/task_utils.py +0 -0
  64. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/trace_utils.py +0 -0
  65. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/validation_utils.py +0 -0
  66. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/variant_utils.py +0 -0
  67. {dcicutils-8.4.0.1b4 → dcicutils-8.4.0.1b5}/dcicutils/zip_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.4.0.1b4
3
+ Version: 8.4.0.1b5
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -2,27 +2,30 @@ import abc
2
2
  import csv
3
3
  import openpyxl
4
4
  from typing import Any, Generator, Iterator, List, Optional, Type, Tuple, Union
5
- from dcicutils.misc_utils import right_trim
5
+ from dcicutils.misc_utils import create_object, right_trim
6
+
7
+ # Forward type references for type hints.
8
+ Excel = Type["Excel"]
6
9
 
7
10
 
8
11
  class RowReader(abc.ABC):
9
12
 
10
13
  def __init__(self):
11
14
  self.header = None
12
- self.location = 0
15
+ self.row_number = 0
13
16
  self._warning_empty_headers = False
14
17
  self._warning_extra_values = [] # Line numbers.
15
18
  self.open()
16
19
 
17
20
  def __iter__(self) -> Iterator:
18
21
  for row in self.rows:
19
- self.location += 1
22
+ self.row_number += 1
20
23
  if self.is_comment_row(row):
21
24
  continue
22
25
  if self.is_terminating_row(row):
23
26
  break
24
27
  if len(self.header) < len(row): # Row values beyond what there are headers for are ignored.
25
- self._warning_extra_values.append(self.location)
28
+ self._warning_extra_values.append(self.row_number)
26
29
  yield {column: self.cell_value(value) for column, value in zip(self.header, row)}
27
30
 
28
31
  def _define_header(self, header: List[Optional[Any]]) -> None:
@@ -49,13 +52,20 @@ class RowReader(abc.ABC):
49
52
  pass
50
53
 
51
54
  @property
52
- def issues(self) -> Optional[List[str]]:
53
- issues = []
55
+ def file(self) -> Optional[str]:
56
+ return self._file if hasattr(self, "_file") else None
57
+
58
+ @property
59
+ def warnings(self) -> List[str]:
60
+ warnings = []
54
61
  if self._warning_empty_headers:
55
- issues.append("Empty header column encountered; ignoring it and all subsequent columns.")
62
+ warnings.append({"src": create_object(file=self.file),
63
+ "warning": "Empty header column encountered; ignoring it and all subsequent columns."})
56
64
  if self._warning_extra_values:
57
- issues.extend([f"Extra column values on row [{row_number}]" for row_number in self._warning_extra_values])
58
- return issues if issues else None
65
+ for row_number in self._warning_extra_values:
66
+ warnings.append({"src": create_object(file=self.file, row=row_number),
67
+ "warning": f"Extra row column values."})
68
+ return warnings
59
69
 
60
70
 
61
71
  class ListReader(RowReader):
@@ -101,9 +111,10 @@ class CsvReader(RowReader):
101
111
 
102
112
  class ExcelSheetReader(RowReader):
103
113
 
104
- def __init__(self, sheet_name: str, workbook: openpyxl.workbook.workbook.Workbook) -> None:
114
+ def __init__(self, excel: Excel, sheet_name: str, workbook: openpyxl.workbook.workbook.Workbook) -> None:
105
115
  self.sheet_name = sheet_name or "Sheet1"
106
116
  self._workbook = workbook
117
+ self._file = excel._file
107
118
  self._rows = None
108
119
  super().__init__()
109
120
 
@@ -134,7 +145,7 @@ class Excel:
134
145
  self.open()
135
146
 
136
147
  def sheet_reader(self, sheet_name: str) -> ExcelSheetReader:
137
- return self._reader_class(sheet_name=sheet_name, workbook=self._workbook)
148
+ return self._reader_class(self, sheet_name=sheet_name, workbook=self._workbook)
138
149
 
139
150
  def open(self) -> None:
140
151
  if self._workbook is None:
@@ -1501,6 +1501,14 @@ def right_trim(list_or_tuple: Union[List[Any], Tuple[Any]],
1501
1501
  return list_or_tuple[:i + 1]
1502
1502
 
1503
1503
 
1504
+ def create_object(**kwargs) -> dict:
1505
+ result = {}
1506
+ for name in kwargs:
1507
+ if kwargs[name]:
1508
+ result[name] = kwargs[name]
1509
+ return result
1510
+
1511
+
1504
1512
  def is_c4_arn(arn: str) -> bool:
1505
1513
  """
1506
1514
  Returns True iff the given (presumed) AWS ARN string value looks like it
@@ -16,8 +16,8 @@ from dcicutils.common import OrchestratedApp, APP_CGAP, APP_FOURFRONT, APP_SMAHT
16
16
  from dcicutils.creds_utils import CGAPKeyManager, FourfrontKeyManager, SMaHTKeyManager
17
17
  from dcicutils.data_readers import CsvReader, Excel, RowReader
18
18
  from dcicutils.ff_utils import get_metadata, get_schema, patch_metadata, post_metadata
19
- from dcicutils.misc_utils import (load_json_if, merge_objects, remove_empty_properties, right_trim, split_string,
20
- to_boolean, to_camel_case, to_enum, to_float, to_integer, VirtualApp)
19
+ from dcicutils.misc_utils import (create_object, load_json_if, merge_objects, remove_empty_properties, right_trim,
20
+ split_string, to_boolean, to_camel_case, to_enum, to_float, to_integer, VirtualApp)
21
21
  from dcicutils.zip_utils import temporary_file, unpack_gz_file_to_temporary_file, unpack_files
22
22
 
23
23
 
@@ -53,9 +53,10 @@ class StructuredDataSet:
53
53
  self._portal = Portal(portal, data=self.data, schemas=schemas) if portal else None
54
54
  self._order = order
55
55
  self._prune = prune
56
- self._issues = None
57
- self._refs_resolved = set()
58
- self._refs_unresolved = set()
56
+ self._warnings = {}
57
+ self._errors = {}
58
+ self._resolved_refs = []
59
+ self._validated = False
59
60
  self._load_file(file) if file else None
60
61
 
61
62
  @staticmethod
@@ -64,19 +65,85 @@ class StructuredDataSet:
64
65
  order: Optional[List[str]] = None, prune: bool = True) -> StructuredDataSet:
65
66
  return StructuredDataSet(file=file, portal=portal, schemas=schemas, order=order, prune=prune)
66
67
 
67
- def validate(self) -> Optional[List[str]]:
68
- issues = []
68
+ def validate(self, force: bool = False) -> None:
69
+ if self._validated and not force:
70
+ return
71
+ self._validated = True
69
72
  for type_name in self.data:
70
73
  if (schema := Schema.load_by_name(type_name, portal=self._portal)):
71
- item_number = 0
74
+ row_number = 0
72
75
  for data in self.data[type_name]:
73
- item_number += 1
74
- if (validation_issues := schema.validate(data)) is not None:
75
- issues.extend([f"{schema.name} [{item_number}]: {issue}" for issue in validation_issues])
76
- return issues + (self._issues or [])
76
+ row_number += 1
77
+ if (validation_errors := schema.validate(data)) is not None:
78
+ for validation_error in validation_errors:
79
+ self._note_error({"src": create_object(type=schema.name, row=row_number),
80
+ "error": validation_error}, "validation")
77
81
 
78
- def refs(self) -> Tuple[List[str], List[str]]:
79
- return (sorted(self._refs_resolved), sorted(self._refs_unresolved))
82
+ @property
83
+ def warnings(self) -> dict:
84
+ return self._warnings
85
+
86
+ @property
87
+ def reader_warnings(self) -> List[dict]:
88
+ return self._warnings.get("reader") or []
89
+
90
+ @property
91
+ def errors(self) -> dict:
92
+ return self._errors
93
+
94
+ @property
95
+ def ref_errors(self) -> List[dict]:
96
+ return self._errors.get("ref") or []
97
+
98
+ @property
99
+ def validation_errors(self) -> List[dict]:
100
+ return self._errors.get("validation") or []
101
+
102
+ @property
103
+ def resolved_refs(self) -> List[str]:
104
+ return self._resolved_refs
105
+
106
+ @staticmethod
107
+ def format_issue(issue: dict, original_file: Optional[str] = None) -> str:
108
+ def src_string(issue: dict) -> str:
109
+ if not isinstance(issue, dict) or not isinstance(issue_src := issue.get("src"), dict):
110
+ return ""
111
+ show_file = original_file and (original_file.endswith(".zip") or
112
+ original_file.endswith(".tgz") or original_file.endswith(".gz"))
113
+ src_file = issue_src.get("file") if show_file else ""
114
+ src_type = issue_src.get("type")
115
+ src_column = issue_src.get("column")
116
+ src_row = issue_src.get("row", 0)
117
+ if src_file:
118
+ src = f"{os.path.basename(src_file)}"
119
+ sep = ":"
120
+ else:
121
+ src = ""
122
+ sep = "."
123
+ if src_type:
124
+ src += (sep if src else "") + src_type
125
+ sep = "."
126
+ if src_column:
127
+ src += (sep if src else "") + src_column
128
+ if src_row > 0:
129
+ src += (" " if src else "") + f"[{src_row}]"
130
+ if not src:
131
+ if issue.get("warning"):
132
+ src = "Warning"
133
+ elif issue.get("error"):
134
+ src = "Error"
135
+ else:
136
+ src = "Issue"
137
+ return src
138
+ issue_message = None
139
+ if issue:
140
+ if error := issue.get("error"):
141
+ issue_message = error
142
+ elif warning := issue.get("warning"):
143
+ issue_message = warning
144
+ elif issue.get("truncated"):
145
+ return f"Truncated result set | More: {issue.get('more')} | See: {issue.get('details')}"
146
+ return f"{src_string(issue)}: {issue_message}" if issue_message else ""
80
147
 
81
148
  def _load_file(self, file: str) -> None:
82
149
  # Returns a dictionary where each property is the name (i.e. the type) of the data,
@@ -106,15 +173,13 @@ class StructuredDataSet:
106
173
  self._load_file(file)
107
174
 
108
175
  def _load_csv_file(self, file: str) -> None:
109
- self._load_reader(reader := CsvReader(file), type_name=Schema.type_name(file))
110
- self._note_issues(reader.issues, os.path.basename(file))
176
+ self._load_reader(CsvReader(file), type_name=Schema.type_name(file))
111
177
 
112
178
  def _load_excel_file(self, file: str) -> None:
113
179
  excel = Excel(file) # Order the sheet names by any specified ordering (e.g. ala snovault.loadxl).
114
180
  order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {}
115
181
  for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)):
116
- self._load_reader(reader := excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name))
117
- self._note_issues(reader.issues, f"{file}:{sheet_name}")
182
+ self._load_reader(excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name))
118
183
 
119
184
  def _load_json_file(self, file: str) -> None:
120
185
  with open(file) as f:
@@ -125,18 +190,20 @@ class StructuredDataSet:
125
190
  noschema = False
126
191
  structured_row_template = None
127
192
  for row in reader:
128
- if not structured_row_template: # Delay creation just so we don't create it if there are no rows.
193
+ if not structured_row_template: # Delay schema creation so we don't reference it if there are no rows.
129
194
  if not schema and not noschema and not (schema := Schema.load_by_name(type_name, portal=self._portal)):
130
195
  noschema = True
196
+ elif schema and (schema_name := schema.name):
197
+ type_name = schema_name
131
198
  structured_row_template = _StructuredRowTemplate(reader.header, schema)
132
199
  structured_row = structured_row_template.create_row()
133
200
  for column_name, value in row.items():
134
- structured_row_template.set_value(structured_row, column_name, value, reader.location)
135
- if schema and (schema_name := schema.name):
136
- type_name = schema_name
137
- self._refs_resolved = self._refs_resolved | schema._refs_resolved
138
- self._refs_unresolved = self._refs_unresolved | schema._refs_unresolved
201
+ structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number)
139
202
  self._add(type_name, structured_row)
203
+ self._note_warning(reader.warnings, "reader")
204
+ if schema:
205
+ self._note_error(schema._unresolved_refs, "ref")
206
+ self._resolved_refs = schema._resolved_refs
140
207
 
141
208
  def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None:
142
209
  if self._prune:
@@ -146,11 +213,19 @@ class StructuredDataSet:
146
213
  else:
147
214
  self.data[type_name] = [data] if isinstance(data, dict) else data
148
215
 
149
- def _note_issues(self, issues: Optional[List[str]], source: str) -> None:
150
- if issues:
151
- if not self._issues:
152
- self._issues = []
153
- self._issues.append({source: issues})
216
+ def _note_warning(self, item: Optional[Union[dict, List[dict]]], group: str) -> None:
217
+ self._note_issue(self._warnings, item, group)
218
+
219
+ def _note_error(self, item: Optional[Union[dict, List[dict]]], group: str) -> None:
220
+ self._note_issue(self._errors, item, group)
221
+
222
+ def _note_issue(self, issues: dict, item: Optional[Union[dict, List[dict]]], group: str) -> None:
223
+ if isinstance(item, dict) and item:
224
+ item = [item]
225
+ if isinstance(item, list) and item:
226
+ if not issues.get(group):
227
+ issues[group] = []
228
+ issues[group].extend(item)
154
229
 
155
230
 
156
231
  class _StructuredRowTemplate:
@@ -163,10 +238,10 @@ class _StructuredRowTemplate:
163
238
  def create_row(self) -> dict:
164
239
  return copy.deepcopy(self._template)
165
240
 
166
- def set_value(self, data: dict, column_name: str, value: str, loc: int = -1) -> None:
241
+ def set_value(self, data: dict, column_name: str, value: str, file: Optional[str], row_number: int = -1) -> None:
167
242
  if (set_value_function := self._set_value_functions.get(column_name)):
168
- src = (f"{f'{self._schema.name}.' if self._schema else ''}" +
169
- f"{f'{column_name}' if column_name else ''}{f' [{loc}]' if loc else ''}")
243
+ src = create_object(type=self._schema.name if self._schema else None,
244
+ column=column_name, file=file, row=row_number)
170
245
  set_value_function(data, value, src)
171
246
 
172
247
  def _create_row_template(self, column_names: List[str]) -> dict: # Surprisingly tricky code here.
@@ -270,19 +345,27 @@ class Schema:
270
345
  "number": self._map_function_number,
271
346
  "string": self._map_function_string
272
347
  }
273
- self._refs_resolved = set()
274
- self._refs_unresolved = set()
348
+ self._resolved_refs = set()
349
+ self._unresolved_refs = []
275
350
  self._typeinfo = self._create_typeinfo(schema_json)
276
351
 
277
352
  @staticmethod
278
353
  def load_by_name(name: str, portal: Portal) -> Optional[dict]:
279
354
  return Schema(portal.get_schema(Schema.type_name(name)), portal) if portal else None
280
355
 
281
- def validate(self, data: dict) -> Optional[List[str]]:
282
- issues = []
283
- for issue in SchemaValidator(self.data, format_checker=SchemaValidator.FORMAT_CHECKER).iter_errors(data):
284
- issues.append(issue.message)
285
- return issues if issues else None
356
+ def validate(self, data: dict) -> List[str]:
357
+ errors = []
358
+ for error in SchemaValidator(self.data, format_checker=SchemaValidator.FORMAT_CHECKER).iter_errors(data):
359
+ errors.append(error.message)
360
+ return errors
361
+
362
+ @property
363
+ def unresolved_refs(self) -> List[dict]:
364
+ return self._unresolved_refs
365
+
366
+ @property
367
+ def resolved_refs(self) -> List[str]:
368
+ return list(self._resolved_refs)
286
369
 
287
370
  def get_map_value_function(self, column_name: str) -> Optional[Any]:
288
371
  return (self._get_typeinfo(column_name) or {}).get("map")
@@ -343,17 +426,16 @@ class Schema:
343
426
  def _map_function_ref(self, typeinfo: dict) -> Callable:
344
427
  def map_ref(value: str, link_to: str, portal: Optional[Portal], src: Optional[str]) -> Any:
345
428
  nonlocal self, typeinfo
346
- exception = None
347
429
  if not value:
348
430
  if (column := typeinfo.get("column")) and column in self.data.get("required", []):
349
- self._refs_unresolved.add(f"/{link_to}/<null>")
350
- exception = f"No required reference (linkTo) value for: /{link_to}"
351
- elif portal and not portal.ref_exists(link_to, value):
352
- self._refs_unresolved.add(f"/{link_to}/{value}")
353
- exception = f"Cannot resolve reference (linkTo) for: /{link_to}"
354
- if exception:
355
- raise Exception(exception + f"{f'/{value}' if value else ''}{f' from {src}' if src else ''}")
356
- self._refs_resolved.add(f"/{link_to}/{value}")
431
+ self._unresolved_refs.append({"src": src, "error": f"/{link_to}/<null>"})
432
+ elif portal:
433
+ if not (resolved := portal.ref_exists(link_to, value)):
434
+ self._unresolved_refs.append({"src": src, "error": f"/{link_to}/{value}"})
435
+ elif len(resolved) > 1:
436
+ self._unresolved_refs.append({"src": src, "error": f"/{link_to}/{value}", "types": resolved})
437
+ else:
438
+ self._resolved_refs.add(f"/{link_to}/{value}")
357
439
  return value
358
440
  return lambda value, src: map_ref(value, typeinfo.get("linkTo"), self._portal, src)
359
441
 
@@ -725,16 +807,17 @@ class Portal(PortalBase):
725
807
  super_type_map_flattened[super_type_name] = breadth_first(super_type_map, super_type_name)
726
808
  return super_type_map_flattened
727
809
 
728
- def ref_exists(self, type_name: str, value: str) -> bool:
810
+ def ref_exists(self, type_name: str, value: str) -> List[str]:
811
+ resolved = []
729
812
  if self._ref_exists_single(type_name, value):
730
- return True
813
+ resolved.append(type_name)
731
814
  # Check for the given ref in all sub-types of the given type.
732
815
  if (schemas_super_type_map := self.get_schemas_super_type_map()):
733
816
  if (sub_type_names := schemas_super_type_map.get(type_name)):
734
817
  for sub_type_name in sub_type_names:
735
818
  if self._ref_exists_single(sub_type_name, value):
736
- return True
737
- return False
819
+ resolved.append(type_name)
820
+ return resolved
738
821
 
739
822
  def _ref_exists_single(self, type_name: str, value: str) -> bool:
740
823
  if self._data and (items := self._data.get(type_name)) and (schema := self.get_schema(type_name)):
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dcicutils"
3
- version = "8.4.0.1b4" # TODO: To become 8.4.1
3
+ version = "8.4.0.1b5" # TODO: To become 8.4.1
4
4
  description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
5
5
  authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
6
6
  license = "MIT"
File without changes
File without changes