dcicutils 8.4.0.1b3__tar.gz → 8.4.0.1b5__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/PKG-INFO +1 -1
  2. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/data_readers.py +24 -12
  3. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/misc_utils.py +8 -0
  4. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/structured_data.py +216 -68
  5. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/pyproject.toml +1 -1
  6. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/LICENSE.txt +0 -0
  7. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/README.rst +0 -0
  8. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/__init__.py +0 -0
  9. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/base.py +0 -0
  10. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/beanstalk_utils.py +0 -0
  11. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/bundle_utils.py +0 -0
  12. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/cloudformation_utils.py +0 -0
  13. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/codebuild_utils.py +0 -0
  14. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/command_utils.py +0 -0
  15. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/common.py +0 -0
  16. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/contribution_scripts.py +0 -0
  17. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/contribution_utils.py +0 -0
  18. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/creds_utils.py +0 -0
  19. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/data_utils.py +0 -0
  20. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/deployment_utils.py +0 -0
  21. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/diff_utils.py +0 -0
  22. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/docker_utils.py +0 -0
  23. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/ecr_scripts.py +0 -0
  24. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/ecr_utils.py +0 -0
  25. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/ecs_utils.py +0 -0
  26. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/env_base.py +0 -0
  27. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/env_manager.py +0 -0
  28. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/env_scripts.py +0 -0
  29. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/env_utils.py +0 -0
  30. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/env_utils_legacy.py +0 -0
  31. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/es_utils.py +0 -0
  32. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/exceptions.py +0 -0
  33. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/ff_mocks.py +0 -0
  34. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/ff_utils.py +0 -0
  35. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/function_cache_decorator.py +0 -0
  36. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/glacier_utils.py +0 -0
  37. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/jh_utils.py +0 -0
  38. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/kibana/dashboards.json +0 -0
  39. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/kibana/readme.md +0 -0
  40. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/lang_utils.py +0 -0
  41. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/c4-infrastructure.jsonc +0 -0
  42. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/c4-python-infrastructure.jsonc +0 -0
  43. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-common-server.jsonc +0 -0
  44. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-common.jsonc +0 -0
  45. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc +0 -0
  46. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/license_policies/park-lab-pipeline.jsonc +0 -0
  47. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/license_utils.py +0 -0
  48. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/log_utils.py +0 -0
  49. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/obfuscation_utils.py +0 -0
  50. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/opensearch_utils.py +0 -0
  51. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/project_utils.py +0 -0
  52. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/qa_checkers.py +0 -0
  53. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/qa_utils.py +0 -0
  54. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/redis_tools.py +0 -0
  55. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/redis_utils.py +0 -0
  56. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/s3_utils.py +0 -0
  57. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/scripts/publish_to_pypi.py +0 -0
  58. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/scripts/run_license_checker.py +0 -0
  59. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/secrets_utils.py +0 -0
  60. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/sheet_utils.py +0 -0
  61. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/snapshot_utils.py +0 -0
  62. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/ssl_certificate_utils.py +0 -0
  63. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/task_utils.py +0 -0
  64. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/trace_utils.py +0 -0
  65. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/validation_utils.py +0 -0
  66. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/variant_utils.py +0 -0
  67. {dcicutils-8.4.0.1b3 → dcicutils-8.4.0.1b5}/dcicutils/zip_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.4.0.1b3
3
+ Version: 8.4.0.1b5
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -2,27 +2,30 @@ import abc
2
2
  import csv
3
3
  import openpyxl
4
4
  from typing import Any, Generator, Iterator, List, Optional, Type, Tuple, Union
5
- from dcicutils.misc_utils import right_trim
5
+ from dcicutils.misc_utils import create_object, right_trim
6
+
7
+ # Forward type references for type hints.
8
+ Excel = Type["Excel"]
6
9
 
7
10
 
8
11
  class RowReader(abc.ABC):
9
12
 
10
13
  def __init__(self):
11
14
  self.header = None
12
- self.location = 0
15
+ self.row_number = 0
13
16
  self._warning_empty_headers = False
14
17
  self._warning_extra_values = [] # Line numbers.
15
18
  self.open()
16
19
 
17
20
  def __iter__(self) -> Iterator:
18
21
  for row in self.rows:
19
- self.location += 1
22
+ self.row_number += 1
20
23
  if self.is_comment_row(row):
21
24
  continue
22
25
  if self.is_terminating_row(row):
23
26
  break
24
27
  if len(self.header) < len(row): # Row values beyond what there are headers for are ignored.
25
- self._warning_extra_values.append(self.location)
28
+ self._warning_extra_values.append(self.row_number)
26
29
  yield {column: self.cell_value(value) for column, value in zip(self.header, row)}
27
30
 
28
31
  def _define_header(self, header: List[Optional[Any]]) -> None:
@@ -49,13 +52,20 @@ class RowReader(abc.ABC):
49
52
  pass
50
53
 
51
54
  @property
52
- def issues(self) -> Optional[List[str]]:
53
- issues = []
55
+ def file(self) -> Optional[str]:
56
+ return self._file if hasattr(self, "_file") else None
57
+
58
+ @property
59
+ def warnings(self) -> List[str]:
60
+ warnings = []
54
61
  if self._warning_empty_headers:
55
- issues.append("Empty header column encountered; ignoring it and all subsequent columns.")
62
+ warnings.append({"src": create_object(file=self.file),
63
+ "warning": "Empty header column encountered; ignoring it and all subsequent columns."})
56
64
  if self._warning_extra_values:
57
- issues.extend([f"Extra column values on row [{row_number}]" for row_number in self._warning_extra_values])
58
- return issues if issues else None
65
+ for row_number in self._warning_extra_values:
66
+ warnings.append({"src": create_object(file=self.file, row=row_number),
67
+ "warning": f"Extra row column values."})
68
+ return warnings
59
69
 
60
70
 
61
71
  class ListReader(RowReader):
@@ -101,9 +111,10 @@ class CsvReader(RowReader):
101
111
 
102
112
  class ExcelSheetReader(RowReader):
103
113
 
104
- def __init__(self, sheet_name: str, workbook: openpyxl.workbook.workbook.Workbook) -> None:
114
+ def __init__(self, excel: Excel, sheet_name: str, workbook: openpyxl.workbook.workbook.Workbook) -> None:
105
115
  self.sheet_name = sheet_name or "Sheet1"
106
116
  self._workbook = workbook
117
+ self._file = excel._file
107
118
  self._rows = None
108
119
  super().__init__()
109
120
 
@@ -134,12 +145,13 @@ class Excel:
134
145
  self.open()
135
146
 
136
147
  def sheet_reader(self, sheet_name: str) -> ExcelSheetReader:
137
- return self._reader_class(sheet_name=sheet_name, workbook=self._workbook)
148
+ return self._reader_class(self, sheet_name=sheet_name, workbook=self._workbook)
138
149
 
139
150
  def open(self) -> None:
140
151
  if self._workbook is None:
141
152
  self._workbook = openpyxl.load_workbook(self._file, data_only=True)
142
- self.sheet_names = [(sheet_name or "").strip() for sheet_name in (self._workbook.sheetnames or [])]
153
+ self.sheet_names = [sheet_name for sheet_name in self._workbook.sheetnames
154
+ if self._workbook[sheet_name].sheet_state != "hidden"]
143
155
 
144
156
  def __del__(self) -> None:
145
157
  if (workbook := self._workbook) is not None:
@@ -1501,6 +1501,14 @@ def right_trim(list_or_tuple: Union[List[Any], Tuple[Any]],
1501
1501
  return list_or_tuple[:i + 1]
1502
1502
 
1503
1503
 
1504
+ def create_object(**kwargs) -> dict:
1505
+ result = {}
1506
+ for name in kwargs:
1507
+ if kwargs[name]:
1508
+ result[name] = kwargs[name]
1509
+ return result
1510
+
1511
+
1504
1512
  def is_c4_arn(arn: str) -> bool:
1505
1513
  """
1506
1514
  Returns True iff the given (presumed) AWS ARN string value looks like it
@@ -1,3 +1,4 @@
1
+ from collections import deque
1
2
  import copy
2
3
  from functools import lru_cache
3
4
  import json
@@ -15,8 +16,8 @@ from dcicutils.common import OrchestratedApp, APP_CGAP, APP_FOURFRONT, APP_SMAHT
15
16
  from dcicutils.creds_utils import CGAPKeyManager, FourfrontKeyManager, SMaHTKeyManager
16
17
  from dcicutils.data_readers import CsvReader, Excel, RowReader
17
18
  from dcicutils.ff_utils import get_metadata, get_schema, patch_metadata, post_metadata
18
- from dcicutils.misc_utils import (load_json_if, merge_objects, remove_empty_properties, right_trim, split_string,
19
- to_boolean, to_camel_case, to_enum, to_float, to_integer, VirtualApp)
19
+ from dcicutils.misc_utils import (create_object, load_json_if, merge_objects, remove_empty_properties, right_trim,
20
+ split_string, to_boolean, to_camel_case, to_enum, to_float, to_integer, VirtualApp)
20
21
  from dcicutils.zip_utils import temporary_file, unpack_gz_file_to_temporary_file, unpack_files
21
22
 
22
23
 
@@ -52,7 +53,10 @@ class StructuredDataSet:
52
53
  self._portal = Portal(portal, data=self.data, schemas=schemas) if portal else None
53
54
  self._order = order
54
55
  self._prune = prune
55
- self._issues = None
56
+ self._warnings = {}
57
+ self._errors = {}
58
+ self._resolved_refs = []
59
+ self._validated = False
56
60
  self._load_file(file) if file else None
57
61
 
58
62
  @staticmethod
@@ -61,16 +65,85 @@ class StructuredDataSet:
61
65
  order: Optional[List[str]] = None, prune: bool = True) -> StructuredDataSet:
62
66
  return StructuredDataSet(file=file, portal=portal, schemas=schemas, order=order, prune=prune)
63
67
 
64
- def validate(self) -> Optional[List[str]]:
65
- issues = []
68
+ def validate(self, force: bool = False) -> None:
69
+ if self._validated and not force:
70
+ return
71
+ self._validated = True
66
72
  for type_name in self.data:
67
73
  if (schema := Schema.load_by_name(type_name, portal=self._portal)):
68
- item_number = 0
74
+ row_number = 0
69
75
  for data in self.data[type_name]:
70
- item_number += 1
71
- if (validation_issues := schema.validate(data)) is not None:
72
- issues.extend([f"{schema.name} [{item_number}]: {issue}" for issue in validation_issues])
73
- return issues + (self._issues or [])
76
+ row_number += 1
77
+ if (validation_errors := schema.validate(data)) is not None:
78
+ for validation_error in validation_errors:
79
+ self._note_error({"src": create_object(type=schema.name, row=row_number),
80
+ "error": validation_error}, "validation")
81
+
82
+ @property
83
+ def warnings(self) -> dict:
84
+ return self._warnings
85
+
86
+ @property
87
+ def reader_warnings(self) -> List[dict]:
88
+ return self._warnings.get("reader") or []
89
+
90
+ @property
91
+ def errors(self) -> dict:
92
+ return self._errors
93
+
94
+ @property
95
+ def ref_errors(self) -> List[dict]:
96
+ return self._errors.get("ref") or []
97
+
98
+ @property
99
+ def validation_errors(self) -> List[dict]:
100
+ return self._errors.get("validation") or []
101
+
102
+ @property
103
+ def resolved_refs(self) -> List[str]:
104
+ return self._resolved_refs
105
+
106
+ @staticmethod
107
+ def format_issue(issue: dict, original_file: Optional[str] = None) -> str:
108
+ def src_string(issue: dict) -> str:
109
+ if not isinstance(issue, dict) or not isinstance(issue_src := issue.get("src"), dict):
110
+ return ""
111
+ show_file = original_file and (original_file.endswith(".zip") or
112
+ original_file.endswith(".tgz") or original_file.endswith(".gz"))
113
+ src_file = issue_src.get("file") if show_file else ""
114
+ src_type = issue_src.get("type")
115
+ src_column = issue_src.get("column")
116
+ src_row = issue_src.get("row", 0)
117
+ if src_file:
118
+ src = f"{os.path.basename(src_file)}"
119
+ sep = ":"
120
+ else:
121
+ src = ""
122
+ sep = "."
123
+ if src_type:
124
+ src += (sep if src else "") + src_type
125
+ sep = "."
126
+ if src_column:
127
+ src += (sep if src else "") + src_column
128
+ if src_row > 0:
129
+ src += (" " if src else "") + f"[{src_row}]"
130
+ if not src:
131
+ if issue.get("warning"):
132
+ src = "Warning"
133
+ elif issue.get("error"):
134
+ src = "Error"
135
+ else:
136
+ src = "Issue"
137
+ return src
138
+ issue_message = None
139
+ if issue:
140
+ if error := issue.get("error"):
141
+ issue_message = error
142
+ elif warning := issue.get("warning"):
143
+ issue_message = warning
144
+ elif issue.get("truncated"):
145
+ return f"Truncated result set | More: {issue.get('more')} | See: {issue.get('details')}"
146
+ return f"{src_string(issue)}: {issue_message}" if issue_message else ""
74
147
 
75
148
  def _load_file(self, file: str) -> None:
76
149
  # Returns a dictionary where each property is the name (i.e. the type) of the data,
@@ -100,15 +173,13 @@ class StructuredDataSet:
100
173
  self._load_file(file)
101
174
 
102
175
  def _load_csv_file(self, file: str) -> None:
103
- self._load_reader(reader := CsvReader(file), type_name=Schema.type_name(file))
104
- self._note_issues(reader.issues, os.path.basename(file))
176
+ self._load_reader(CsvReader(file), type_name=Schema.type_name(file))
105
177
 
106
178
  def _load_excel_file(self, file: str) -> None:
107
179
  excel = Excel(file) # Order the sheet names by any specified ordering (e.g. ala snovault.loadxl).
108
180
  order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {}
109
181
  for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)):
110
- self._load_reader(reader := excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name))
111
- self._note_issues(reader.issues, f"{file}:{sheet_name}")
182
+ self._load_reader(excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name))
112
183
 
113
184
  def _load_json_file(self, file: str) -> None:
114
185
  with open(file) as f:
@@ -119,16 +190,20 @@ class StructuredDataSet:
119
190
  noschema = False
120
191
  structured_row_template = None
121
192
  for row in reader:
122
- if not structured_row_template: # Delay creation just so we don't create it if there are no rows.
193
+ if not structured_row_template: # Delay schema creation so we don't reference it if there are no rows.
123
194
  if not schema and not noschema and not (schema := Schema.load_by_name(type_name, portal=self._portal)):
124
195
  noschema = True
196
+ elif schema and (schema_name := schema.name):
197
+ type_name = schema_name
125
198
  structured_row_template = _StructuredRowTemplate(reader.header, schema)
126
199
  structured_row = structured_row_template.create_row()
127
200
  for column_name, value in row.items():
128
- structured_row_template.set_value(structured_row, column_name, value, reader.location)
129
- if schema and (schema_name := schema.name):
130
- type_name = schema_name
201
+ structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number)
131
202
  self._add(type_name, structured_row)
203
+ self._note_warning(reader.warnings, "reader")
204
+ if schema:
205
+ self._note_error(schema._unresolved_refs, "ref")
206
+ self._resolved_refs = schema._resolved_refs
132
207
 
133
208
  def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None:
134
209
  if self._prune:
@@ -138,11 +213,19 @@ class StructuredDataSet:
138
213
  else:
139
214
  self.data[type_name] = [data] if isinstance(data, dict) else data
140
215
 
141
- def _note_issues(self, issues: Optional[List[str]], source: str) -> None:
142
- if issues:
143
- if not self._issues:
144
- self._issues = []
145
- self._issues.append({source: issues})
216
+ def _note_warning(self, item: Optional[Union[dict, List[dict]]], group: str) -> None:
217
+ self._note_issue(self._warnings, item, group)
218
+
219
+ def _note_error(self, item: Optional[Union[dict, List[dict]]], group: str) -> None:
220
+ self._note_issue(self._errors, item, group)
221
+
222
+ def _note_issue(self, issues: dict, item: Optional[Union[dict, List[dict]]], group: str) -> None:
223
+ if isinstance(item, dict) and item:
224
+ item = [item]
225
+ if isinstance(item, list) and item:
226
+ if not issues.get(group):
227
+ issues[group] = []
228
+ issues[group].extend(item)
146
229
 
147
230
 
148
231
  class _StructuredRowTemplate:
@@ -155,10 +238,10 @@ class _StructuredRowTemplate:
155
238
  def create_row(self) -> dict:
156
239
  return copy.deepcopy(self._template)
157
240
 
158
- def set_value(self, data: dict, column_name: str, value: str, loc: int = -1) -> None:
241
+ def set_value(self, data: dict, column_name: str, value: str, file: Optional[str], row_number: int = -1) -> None:
159
242
  if (set_value_function := self._set_value_functions.get(column_name)):
160
- src = (f"{f'{self._schema.name}.' if self._schema else ''}" +
161
- f"{f'{column_name}' if column_name else ''}{f' [{loc}]' if loc else ''}")
243
+ src = create_object(type=self._schema.name if self._schema else None,
244
+ column=column_name, file=file, row=row_number)
162
245
  set_value_function(data, value, src)
163
246
 
164
247
  def _create_row_template(self, column_names: List[str]) -> dict: # Surprisingly tricky code here.
@@ -262,17 +345,27 @@ class Schema:
262
345
  "number": self._map_function_number,
263
346
  "string": self._map_function_string
264
347
  }
348
+ self._resolved_refs = set()
349
+ self._unresolved_refs = []
265
350
  self._typeinfo = self._create_typeinfo(schema_json)
266
351
 
267
352
  @staticmethod
268
353
  def load_by_name(name: str, portal: Portal) -> Optional[dict]:
269
354
  return Schema(portal.get_schema(Schema.type_name(name)), portal) if portal else None
270
355
 
271
- def validate(self, data: dict) -> Optional[List[str]]:
272
- issues = []
273
- for issue in SchemaValidator(self.data, format_checker=SchemaValidator.FORMAT_CHECKER).iter_errors(data):
274
- issues.append(issue.message)
275
- return issues if issues else None
356
+ def validate(self, data: dict) -> List[str]:
357
+ errors = []
358
+ for error in SchemaValidator(self.data, format_checker=SchemaValidator.FORMAT_CHECKER).iter_errors(data):
359
+ errors.append(error.message)
360
+ return errors
361
+
362
+ @property
363
+ def unresolved_refs(self) -> List[dict]:
364
+ return self._unresolved_refs
365
+
366
+ @property
367
+ def resolved_refs(self) -> List[str]:
368
+ return list(self._resolved_refs)
276
369
 
277
370
  def get_map_value_function(self, column_name: str) -> Optional[Any]:
278
371
  return (self._get_typeinfo(column_name) or {}).get("map")
@@ -333,14 +426,16 @@ class Schema:
333
426
  def _map_function_ref(self, typeinfo: dict) -> Callable:
334
427
  def map_ref(value: str, link_to: str, portal: Optional[Portal], src: Optional[str]) -> Any:
335
428
  nonlocal self, typeinfo
336
- exception = None
337
429
  if not value:
338
430
  if (column := typeinfo.get("column")) and column in self.data.get("required", []):
339
- exception = f"No required reference (linkTo) value for: {link_to}"
340
- elif portal and not portal.ref_exists(link_to, value):
341
- exception = f"Cannot resolve reference (linkTo) for: {link_to}"
342
- if exception:
343
- raise Exception(exception + f"{f'/{value}' if value else ''}{f' from {src}' if src else ''}")
431
+ self._unresolved_refs.append({"src": src, "error": f"/{link_to}/<null>"})
432
+ elif portal:
433
+ if not (resolved := portal.ref_exists(link_to, value)):
434
+ self._unresolved_refs.append({"src": src, "error": f"/{link_to}/{value}"})
435
+ elif len(resolved) > 1:
436
+ self._unresolved_refs.append({"src": src, "error": f"/{link_to}/{value}", "types": resolved})
437
+ else:
438
+ self._resolved_refs.add(f"/{link_to}/{value}")
344
439
  return value
345
440
  return lambda value, src: map_ref(value, typeinfo.get("linkTo"), self._portal, src)
346
441
 
@@ -481,15 +576,8 @@ class PortalBase:
481
576
  env: Optional[str] = None, app: OrchestratedApp = APP_SMAHT, server: Optional[str] = None,
482
577
  key: Optional[Union[dict, tuple]] = None,
483
578
  portal: Optional[Union[VirtualApp, TestApp, Router, Portal, str]] = None) -> PortalBase:
484
- if isinstance(arg, VirtualApp) and not portal:
485
- portal = arg
486
- elif isinstance(arg, TestApp) and not portal:
487
- portal = arg
488
- elif isinstance(arg, Router) and not portal:
489
- portal = arg
490
- elif isinstance(arg, Portal) and not portal:
491
- portal = arg
492
- elif isinstance(arg, str) and arg.endswith(".ini") and not portal:
579
+ if ((isinstance(arg, (VirtualApp, TestApp, Router, Portal)) or
580
+ isinstance(arg, str) and arg.endswith(".ini")) and not portal):
493
581
  portal = arg
494
582
  elif isinstance(arg, str) and not env:
495
583
  env = arg
@@ -539,15 +627,12 @@ class PortalBase:
539
627
  return post_metadata(schema_name=object_type, post_item=data, key=self._key)
540
628
  return self.post(f"/{object_type}", data)
541
629
 
542
- def get_schema(self, schema_name: str) -> Optional[dict]:
543
- return get_schema(schema_name, portal_vapp=self._vapp, key=self._key)
544
-
545
630
  def get(self, uri: str, follow: bool = True, **kwargs) -> Optional[Union[RequestResponse, TestResponse]]:
546
631
  if isinstance(self._vapp, (VirtualApp, TestApp)):
547
632
  response = self._vapp.get(self._uri(uri), **self._kwargs(**kwargs))
548
633
  if response and response.status_code in [301, 302, 303, 307, 308] and follow:
549
634
  response = response.follow()
550
- return response
635
+ return self._response(response)
551
636
  return requests.get(self._uri(uri), allow_redirects=follow, **self._kwargs(**kwargs))
552
637
 
553
638
  def patch(self, uri: str, data: Optional[dict] = None,
@@ -565,6 +650,12 @@ class PortalBase:
565
650
  return self._vapp.post_json(self._uri(uri), json or data, upload_files=files, **self._kwargs(**kwargs))
566
651
  return requests.post(self._uri(uri), json=json or data, files=files, **self._kwargs(**kwargs))
567
652
 
653
+ def get_schema(self, schema_name: str) -> Optional[dict]:
654
+ return get_schema(schema_name, portal_vapp=self._vapp, key=self._key)
655
+
656
+ def get_schemas(self) -> dict:
657
+ return self.get("/profiles/").json()
658
+
568
659
  def _uri(self, uri: str) -> str:
569
660
  if not isinstance(uri, str) or not uri:
570
661
  return "/"
@@ -582,6 +673,19 @@ class PortalBase:
582
673
  result_kwargs["timeout"] = timeout
583
674
  return result_kwargs
584
675
 
676
+ def _response(self, response) -> Optional[RequestResponse]:
677
+ if response and isinstance(getattr(response.__class__, "json"), property):
678
+ class RequestResponseWrapper: # For consistency change json property to method.
679
+ def __init__(self, respnose, **kwargs):
680
+ super().__init__(**kwargs)
681
+ self._response = response
682
+ def __getattr__(self, attr): # noqa
683
+ return getattr(self._response, attr)
684
+ def json(self): # noqa
685
+ return self._response.json
686
+ response = RequestResponseWrapper(response)
687
+ return response
688
+
585
689
  @staticmethod
586
690
  def create_for_testing(ini_file: Optional[str] = None) -> PortalBase:
587
691
  if isinstance(ini_file, str):
@@ -592,7 +696,7 @@ class PortalBase:
592
696
 
593
697
  @staticmethod
594
698
  def create_for_testing_local(ini_file: Optional[str] = None) -> Portal:
595
- if isinstance(ini_file, str):
699
+ if isinstance(ini_file, str) and ini_file:
596
700
  return Portal(Portal._create_testapp(ini_file))
597
701
  minimal_ini_for_testing_local = "\n".join([
598
702
  "[app:app]\nuse = egg:encoded\nfile_upload_bucket = dummy",
@@ -655,23 +759,67 @@ class Portal(PortalBase):
655
759
 
656
760
  @lru_cache(maxsize=256)
657
761
  def get_schema(self, schema_name: str) -> Optional[dict]:
658
- def get_schema_exact(schema_name: str) -> Optional[dict]: # noqa
659
- return (next((schema for schema in self._schemas or []
660
- if Schema.type_name(schema.get("title")) == Schema.type_name(schema_name)), None) or
661
- super(Portal, self).get_schema(schema_name))
662
- try:
663
- if (schema := get_schema_exact(schema_name)):
664
- return schema
665
- except Exception: # Try/force camel-case if all upper/lower-case.
666
- if schema_name == schema_name.upper():
667
- if (schema := get_schema_exact(schema_name.lower().title())):
668
- return schema
669
- elif schema_name == schema_name.lower():
670
- if (schema := get_schema_exact(schema_name.title())):
671
- return schema
672
- raise
673
-
674
- def ref_exists(self, type_name: str, value: str) -> bool:
762
+ if (schemas := self.get_schemas()) and (schema := schemas.get(schema_name := Schema.type_name(schema_name))):
763
+ return schema
764
+ if schema_name == schema_name.upper() and (schema := schemas.get(schema_name.lower().title())):
765
+ return schema
766
+ if schema_name == schema_name.lower() and (schema := schemas.get(schema_name.title())):
767
+ return schema
768
+
769
+ @lru_cache(maxsize=1)
770
+ def get_schemas(self) -> dict:
771
+ schemas = super(Portal, self).get_schemas()
772
+ if self._schemas:
773
+ schemas = copy.deepcopy(schemas)
774
+ for user_specified_schema in self._schemas:
775
+ if user_specified_schema.get("title"):
776
+ schemas[user_specified_schema["title"]] = user_specified_schema
777
+ return schemas
778
+
779
+ @lru_cache(maxsize=1)
780
+ def get_schemas_super_type_map(self) -> dict:
781
+ """
782
+ Returns the "super type map" for all of the known schemas (via /profiles).
783
+ This is a dictionary of all types which have (one or more) sub-types whose value is
784
+ an array of all of those sub-types (direct and all descendents), in breadth first order.
785
+ """
786
+ def breadth_first(super_type_map: dict, super_type_name: str) -> dict:
787
+ result = []
788
+ queue = deque(super_type_map.get(super_type_name, []))
789
+ while queue:
790
+ result.append(sub_type_name := queue.popleft())
791
+ if sub_type_name in super_type_map:
792
+ queue.extend(super_type_map[sub_type_name])
793
+ return result
794
+ if not (schemas := self.get_schemas()):
795
+ return {}
796
+ super_type_map = {}
797
+ for type_name in schemas:
798
+ if super_type_name := schemas[type_name].get("rdfs:subClassOf"):
799
+ super_type_name = super_type_name.replace("/profiles/", "").replace(".json", "")
800
+ if super_type_name != "Item":
801
+ if not super_type_map.get(super_type_name):
802
+ super_type_map[super_type_name] = [type_name]
803
+ elif type_name not in super_type_map[super_type_name]:
804
+ super_type_map[super_type_name].append(type_name)
805
+ super_type_map_flattened = {}
806
+ for super_type_name in super_type_map:
807
+ super_type_map_flattened[super_type_name] = breadth_first(super_type_map, super_type_name)
808
+ return super_type_map_flattened
809
+
810
+ def ref_exists(self, type_name: str, value: str) -> List[str]:
811
+ resolved = []
812
+ if self._ref_exists_single(type_name, value):
813
+ resolved.append(type_name)
814
+ # Check for the given ref in all sub-types of the given type.
815
+ if (schemas_super_type_map := self.get_schemas_super_type_map()):
816
+ if (sub_type_names := schemas_super_type_map.get(type_name)):
817
+ for sub_type_name in sub_type_names:
818
+ if self._ref_exists_single(sub_type_name, value):
819
+ resolved.append(type_name)
820
+ return resolved
821
+
822
+ def _ref_exists_single(self, type_name: str, value: str) -> bool:
675
823
  if self._data and (items := self._data.get(type_name)) and (schema := self.get_schema(type_name)):
676
824
  iproperties = set(schema.get("identifyingProperties", [])) | {"identifier", "uuid"}
677
825
  for item in items:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dcicutils"
3
- version = "8.4.0.1b3" # TODO: To become 8.4.1
3
+ version = "8.4.0.1b5" # TODO: To become 8.4.1
4
4
  description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
5
5
  authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
6
6
  license = "MIT"
File without changes
File without changes