dcicutils 8.7.1.1b3__tar.gz → 8.7.1.1b5__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/PKG-INFO +1 -1
  2. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/data_readers.py +14 -3
  3. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/misc_utils.py +10 -4
  4. dcicutils-8.7.1.1b5/dcicutils/portal_object_utils.py +234 -0
  5. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/portal_utils.py +2 -2
  6. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/structured_data.py +44 -23
  7. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/pyproject.toml +1 -1
  8. dcicutils-8.7.1.1b3/dcicutils/portal_object_utils.py +0 -177
  9. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/LICENSE.txt +0 -0
  10. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/README.rst +0 -0
  11. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/__init__.py +0 -0
  12. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/base.py +0 -0
  13. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/beanstalk_utils.py +0 -0
  14. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/bundle_utils.py +0 -0
  15. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/cloudformation_utils.py +0 -0
  16. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/codebuild_utils.py +0 -0
  17. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/command_utils.py +0 -0
  18. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/common.py +0 -0
  19. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/contribution_scripts.py +0 -0
  20. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/contribution_utils.py +0 -0
  21. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/creds_utils.py +0 -0
  22. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/data_utils.py +0 -0
  23. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/deployment_utils.py +0 -0
  24. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/diff_utils.py +0 -0
  25. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/docker_utils.py +0 -0
  26. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/ecr_scripts.py +0 -0
  27. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/ecr_utils.py +0 -0
  28. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/ecs_utils.py +0 -0
  29. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/env_base.py +0 -0
  30. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/env_manager.py +0 -0
  31. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/env_scripts.py +0 -0
  32. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/env_utils.py +0 -0
  33. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/env_utils_legacy.py +0 -0
  34. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/es_utils.py +0 -0
  35. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/exceptions.py +0 -0
  36. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/ff_mocks.py +0 -0
  37. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/ff_utils.py +0 -0
  38. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/file_utils.py +0 -0
  39. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/function_cache_decorator.py +0 -0
  40. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/glacier_utils.py +0 -0
  41. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/jh_utils.py +0 -0
  42. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/kibana/dashboards.json +0 -0
  43. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/kibana/readme.md +0 -0
  44. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/lang_utils.py +0 -0
  45. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/license_policies/c4-infrastructure.jsonc +0 -0
  46. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/license_policies/c4-python-infrastructure.jsonc +0 -0
  47. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/license_policies/park-lab-common-server.jsonc +0 -0
  48. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/license_policies/park-lab-common.jsonc +0 -0
  49. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc +0 -0
  50. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/license_policies/park-lab-pipeline.jsonc +0 -0
  51. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/license_utils.py +0 -0
  52. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/log_utils.py +0 -0
  53. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/obfuscation_utils.py +0 -0
  54. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/opensearch_utils.py +0 -0
  55. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/project_utils.py +0 -0
  56. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/qa_checkers.py +0 -0
  57. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/qa_utils.py +0 -0
  58. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/redis_tools.py +0 -0
  59. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/redis_utils.py +0 -0
  60. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/s3_utils.py +0 -0
  61. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/schema_utils.py +0 -0
  62. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/scripts/publish_to_pypi.py +0 -0
  63. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/scripts/run_license_checker.py +0 -0
  64. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/secrets_utils.py +0 -0
  65. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/sheet_utils.py +0 -0
  66. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/snapshot_utils.py +0 -0
  67. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/ssl_certificate_utils.py +0 -0
  68. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/task_utils.py +0 -0
  69. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/tmpfile_utils.py +0 -0
  70. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/trace_utils.py +0 -0
  71. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/validation_utils.py +0 -0
  72. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/variant_utils.py +0 -0
  73. {dcicutils-8.7.1.1b3 → dcicutils-8.7.1.1b5}/dcicutils/zip_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.7.1.1b3
3
+ Version: 8.7.1.1b5
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -7,10 +7,21 @@ from dcicutils.misc_utils import create_dict, right_trim
7
7
  # Forward type references for type hints.
8
8
  Excel = Type["Excel"]
9
9
 
10
+ # Cell values(s) indicating property deletion.
11
+ _CELL_DELETION_VALUES = ["*delete*"]
12
+
13
+
14
+ # Special cell deletion sentinel value (note make sure on deepcopy it remains the same).
15
+ class _CellDeletionSentinal(str):
16
+ def __new__(cls):
17
+ return super(_CellDeletionSentinal, cls).__new__(cls, _CELL_DELETION_VALUES[0])
18
+ def __deepcopy__(self, memo): # noqa
19
+ return self
20
+
10
21
 
11
22
  class RowReader(abc.ABC):
12
23
 
13
- DELETION_CELL_VALUE = "*delete*"
24
+ CELL_DELETION_SENTINEL = _CellDeletionSentinal()
14
25
 
15
26
  def __init__(self):
16
27
  self.header = None
@@ -50,8 +61,8 @@ class RowReader(abc.ABC):
50
61
  def cell_value(self, value: Optional[Any]) -> str:
51
62
  if value is None:
52
63
  return ""
53
- elif (value := str(value).strip()) == RowReader.DELETION_CELL_VALUE:
54
- return RowReader.DELETION_CELL_VALUE
64
+ elif (value := str(value).strip()) in _CELL_DELETION_VALUES:
65
+ return RowReader.CELL_DELETION_SENTINEL
55
66
  else:
56
67
  return value
57
68
 
@@ -1148,16 +1148,22 @@ def remove_suffix(suffix: str, text: str, required: bool = False):
1148
1148
  return text[:len(text)-len(suffix)]
1149
1149
 
1150
1150
 
1151
- def remove_empty_properties(data: Optional[Union[list, dict]]) -> None:
1151
+ def remove_empty_properties(data: Optional[Union[list, dict]],
1152
+ isempty: Optional[Callable] = None,
1153
+ isempty_array_element: Optional[Callable] = None) -> None:
1154
+ def _isempty(value: Any) -> bool: # noqa
1155
+ return isempty(value) if callable(isempty) else value in [None, "", {}, []]
1152
1156
  if isinstance(data, dict):
1153
1157
  for key in list(data.keys()):
1154
- if (value := data[key]) in [None, "", {}, []]:
1158
+ if _isempty(value := data[key]):
1155
1159
  del data[key]
1156
1160
  else:
1157
- remove_empty_properties(value)
1161
+ remove_empty_properties(value, isempty=isempty, isempty_array_element=isempty_array_element)
1158
1162
  elif isinstance(data, list):
1159
1163
  for item in data:
1160
- remove_empty_properties(item)
1164
+ remove_empty_properties(item, isempty=isempty, isempty_array_element=isempty_array_element)
1165
+ if callable(isempty_array_element):
1166
+ data[:] = [item for item in data if not isempty_array_element(item)]
1161
1167
 
1162
1168
 
1163
1169
  class ObsoleteError(Exception):
@@ -0,0 +1,234 @@
1
+ from copy import deepcopy
2
+ from functools import lru_cache
3
+ import re
4
+ from typing import Any, List, Optional, Tuple, Type, Union
5
+ from dcicutils.data_readers import RowReader
6
+ from dcicutils.portal_utils import Portal
7
+ from dcicutils.schema_utils import Schema
8
+
9
+ PortalObject = Type["PortalObject"] # Forward type reference for type hints.
10
+
11
+
12
+ class PortalObject:
13
+
14
+ _PROPERTY_DELETION_SENTINEL = RowReader.CELL_DELETION_SENTINEL
15
+
16
+ def __init__(self, portal: Portal, portal_object: dict, portal_object_type: Optional[str] = None) -> None:
17
+ self._portal = portal
18
+ self._data = portal_object
19
+ self._type = portal_object_type if isinstance(portal_object_type, str) and portal_object_type else None
20
+
21
+ @property
22
+ def data(self):
23
+ return self._data
24
+
25
+ @property
26
+ def portal(self):
27
+ return self._portal
28
+
29
+ @property
30
+ @lru_cache(maxsize=1)
31
+ def type(self):
32
+ return self._type or Portal.get_schema_type(self._data)
33
+
34
+ @property
35
+ @lru_cache(maxsize=1)
36
+ def types(self):
37
+ return self._type or Portal.get_schema_types(self._data)
38
+
39
+ @property
40
+ @lru_cache(maxsize=1)
41
+ def uuid(self) -> Optional[str]:
42
+ return self._data.get("uuid") if isinstance(self._data, dict) else None
43
+
44
+ @property
45
+ @lru_cache(maxsize=1)
46
+ def schema(self):
47
+ return self._portal.get_schema(self.type)
48
+
49
+ def copy(self) -> PortalObject:
50
+ return PortalObject(self.portal, deepcopy(self.data), self.type)
51
+
52
+ @property
53
+ @lru_cache(maxsize=1)
54
+ def identifying_properties(self) -> List[str]:
55
+ """
56
+ Returns the list of all identifying property names of this Portal object which actually have values.
57
+ Implicitly include "uuid" and "identifier" properties as identifying properties if they are actually
58
+ properties in the object schema, and favor these (first); defavor "aliases"; no other ordering defined.
59
+ """
60
+ if not (schema := self.schema) or not (schema_identifying_properties := schema.get("identifyingProperties")):
61
+ return []
62
+ identifying_properties = []
63
+ for identifying_property in schema_identifying_properties:
64
+ if identifying_property not in ["uuid", "identifier", "aliases"]:
65
+ if self._data.get(identifying_property):
66
+ identifying_properties.append(identifying_property)
67
+ if self._data.get("identifier"):
68
+ identifying_properties.insert(0, "identifier")
69
+ if self._data.get("uuid"):
70
+ identifying_properties.insert(0, "uuid")
71
+ if "aliases" in schema_identifying_properties and self._data.get("aliases"):
72
+ identifying_properties.append("aliases")
73
+ return identifying_properties
74
+
75
+ @property
76
+ @lru_cache(maxsize=1)
77
+ def identifying_paths(self) -> List[str]:
78
+ """
79
+ Returns a list of the possible Portal URL paths identifying this Portal object.
80
+ """
81
+ if not (identifying_properties := self.identifying_properties):
82
+ return []
83
+ identifying_paths = []
84
+ for identifying_property in identifying_properties:
85
+ if (identifying_value := self._data.get(identifying_property)):
86
+ if identifying_property == "uuid":
87
+ identifying_paths.append(f"/{self.type}/{identifying_value}")
88
+ identifying_paths.append(f"/{identifying_value}")
89
+ # For now at least we include the path both with and without the schema type component,
90
+ # as for some identifying values, it works (only) with, and some, it works (only) without.
91
+ # For example: If we have FileSet with "accession", an identifying property, with value
92
+ # SMAFSFXF1RO4 then /SMAFSFXF1RO4 works but /FileSet/SMAFSFXF1RO4 does not; and
93
+ # conversely using "submitted_id", also an identifying property, with value
94
+ # UW_FILE-SET_COLO-829BL_HI-C_1 then /UW_FILE-SET_COLO-829BL_HI-C_1 does
95
+ # not work but /FileSet/UW_FILE-SET_COLO-829BL_HI-C_1 does work.
96
+ elif isinstance(identifying_value, list):
97
+ for identifying_value_item in identifying_value:
98
+ identifying_paths.append(f"/{self.type}/{identifying_value_item}")
99
+ identifying_paths.append(f"/{identifying_value_item}")
100
+ else:
101
+ identifying_paths.append(f"/{self.type}/{identifying_value}")
102
+ identifying_paths.append(f"/{identifying_value}")
103
+ return identifying_paths
104
+
105
+ @property
106
+ @lru_cache(maxsize=1)
107
+ def identifying_path(self) -> Optional[str]:
108
+ if identifying_paths := self.identifying_paths:
109
+ return identifying_paths[0]
110
+
111
+ def lookup(self, include_identifying_path: bool = False,
112
+ raw: bool = False) -> Optional[Union[Tuple[PortalObject, str], PortalObject]]:
113
+ return self._lookup(raw=raw) if include_identifying_path else self._lookup(raw=raw)[0]
114
+
115
+ def lookup_identifying_path(self) -> Optional[str]:
116
+ return self._lookup()[1]
117
+
118
+ def _lookup(self, raw: bool = False) -> Tuple[Optional[PortalObject], Optional[str]]:
119
+ try:
120
+ for identifying_path in self.identifying_paths:
121
+ if (value := self._portal.get(identifying_path, raw=raw)) and (value.status_code == 200):
122
+ return PortalObject(self._portal, value.json(), self.type if raw else None), identifying_path
123
+ except Exception:
124
+ pass
125
+ return None, self.identifying_path
126
+
127
+ def compare(self, value: Union[dict, PortalObject],
128
+ consider_refs: bool = False, resolved_refs: List[dict] = None) -> dict:
129
+ if consider_refs and isinstance(resolved_refs, list):
130
+ this_data = self.normalized_refs(refs=resolved_refs).data
131
+ else:
132
+ this_data = self.data
133
+ if isinstance(value, PortalObject):
134
+ comparing_data = value.data
135
+ elif isinstance(value, dict):
136
+ comparing_data = value
137
+ else:
138
+ return {}
139
+ return PortalObject._compare(this_data, comparing_data)
140
+
141
+ _ARRAY_KEY_REGULAR_EXPRESSION = re.compile(rf"^({Schema._ARRAY_NAME_SUFFIX_CHAR}\d+)$")
142
+
143
+ @staticmethod
144
+ def _compare(a: Any, b: Any, _path: Optional[str] = None) -> dict:
145
+ diffs = {}
146
+ if isinstance(a, dict) and isinstance(b, dict):
147
+ for key in a:
148
+ path = f"{_path}.{key}" if _path else key
149
+ if key not in b:
150
+ if a[key] != PortalObject._PROPERTY_DELETION_SENTINEL:
151
+ diffs[path] = {"value": a[key], "creating_value": True}
152
+ else:
153
+ diffs.update(PortalObject._compare(a[key], b[key], _path=path))
154
+ elif isinstance(a, list) and isinstance(b, list):
155
+ # Ignore order of array elements; not absolutely technically correct but suits our purpose.
156
+ for index in range(len(a)):
157
+ path = f"{_path or ''}#{index}"
158
+ if not isinstance(a[index], dict) and not isinstance(a[index], list):
159
+ if a[index] not in b:
160
+ if a[index] != PortalObject._PROPERTY_DELETION_SENTINEL:
161
+ if index < len(b):
162
+ diffs[path] = {"value": a[index], "updating_value": b[index]}
163
+ else:
164
+ diffs[path] = {"value": a[index], "creating_value": True}
165
+ else:
166
+ if index < len(b):
167
+ diffs[path] = {"value": b[index], "deleting_value": True}
168
+ elif len(b) < index:
169
+ diffs.update(PortalObject._compare(a[index], b[index], _path=path))
170
+ else:
171
+ diffs[path] = {"value": a[index], "creating_value": True}
172
+ elif a != b:
173
+ if a == PortalObject._PROPERTY_DELETION_SENTINEL:
174
+ diffs[_path] = {"value": b, "deleting_value": True}
175
+ else:
176
+ diffs[_path] = {"value": a, "updating_value": b}
177
+ return diffs
178
+
179
+ def normalize_refs(self, refs: List[dict]) -> None:
180
+ """
181
+ Turns any (linkTo) references which are paths (e.g. /SubmissionCenter/uwsc_gcc) within
182
+ this Portal object into the uuid style reference (e.g. d1b67068-300f-483f-bfe8-63d23c93801f),
183
+ based on the given "refs" list which is assumed to be a list of dictionaries, where each
184
+ contains a "path" and a "uuid" property; this list is typically (for our first usage of
185
+ this function) the value of structured_data.StructuredDataSet.resolved_refs_with_uuid.
186
+ Change is made to this Portal object in place; use normalized_refs function to make a copy.
187
+ If there are no "refs" (None or empty) then the references will be looked up via Portal calls.
188
+ """
189
+ PortalObject._normalize_refs(self.data, refs=refs, schema=self.schema, portal=self.portal)
190
+
191
+ def normalized_refs(self, refs: List[dict]) -> PortalObject:
192
+ """
193
+ Same as normalize_ref but does not make this change to this Portal object in place,
194
+ rather it returns a new instance of this Portal object wrapped in a new PortalObject.
195
+ """
196
+ portal_object = self.copy()
197
+ portal_object.normalize_refs(refs)
198
+ return portal_object
199
+
200
+ @staticmethod
201
+ def _normalize_refs(value: Any, refs: List[dict], schema: dict, portal: Portal, _path: Optional[str] = None) -> Any:
202
+ if not value or not isinstance(schema, dict):
203
+ return value
204
+ if isinstance(value, dict):
205
+ for key in value:
206
+ path = f"{_path}.{key}" if _path else key
207
+ value[key] = PortalObject._normalize_refs(value[key], refs=refs,
208
+ schema=schema, portal=portal, _path=path)
209
+ elif isinstance(value, list):
210
+ for index in range(len(value)):
211
+ path = f"{_path or ''}#{index}"
212
+ value[index] = PortalObject._normalize_refs(value[index], refs=refs,
213
+ schema=schema, portal=portal, _path=path)
214
+ elif value_type := Schema.get_property_by_path(schema, _path):
215
+ if link_to := value_type.get("linkTo"):
216
+ ref_path = f"/{link_to}/{value}"
217
+ if not isinstance(refs, list):
218
+ refs = []
219
+ if ref_uuids := [ref.get("uuid") for ref in refs if ref.get("path") == ref_path]:
220
+ ref_uuid = ref_uuids[0]
221
+ else:
222
+ ref_uuid = None
223
+ if ref_uuid:
224
+ return ref_uuid
225
+ # Here our (linkTo) reference appears not to be in the given refs; if these refs came
226
+ # from structured_data.StructuredDataSet.resolved_refs_with_uuid (in the context of
227
+ # smaht-submitr, which is the typical/first use case for this function) then this could
228
+ # be because the reference was to an internal object, i.e. another object existing within
229
+ # the data/spreadsheet being submitted. In any case, we don't have the associated uuid
230
+ # so let us look it up here.
231
+ if isinstance(portal, Portal):
232
+ if (ref_object := portal.get_metadata(ref_path)) and (ref_uuid := ref_object.get("uuid")):
233
+ return ref_uuid
234
+ return value
@@ -264,12 +264,12 @@ class Portal:
264
264
  add_on = ""
265
265
  return get_metadata(obj_id=object_id, vapp=self.vapp, key=self.key, add_on=add_on)
266
266
 
267
- def patch_metadata(self, object_id: str, data: str) -> Optional[dict]:
267
+ def patch_metadata(self, object_id: str, data: dict) -> Optional[dict]:
268
268
  if self.key:
269
269
  return patch_metadata(obj_id=object_id, patch_item=data, key=self.key)
270
270
  return self.patch(f"/{object_id}", data).json()
271
271
 
272
- def post_metadata(self, object_type: str, data: str) -> Optional[dict]:
272
+ def post_metadata(self, object_type: str, data: dict) -> Optional[dict]:
273
273
  if self.key:
274
274
  return post_metadata(schema_name=object_type, post_item=data, key=self.key)
275
275
  return self.post(f"/{object_type}", data).json()
@@ -46,8 +46,8 @@ class StructuredDataSet:
46
46
  def __init__(self, file: Optional[str] = None, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None,
47
47
  schemas: Optional[List[dict]] = None, autoadd: Optional[dict] = None,
48
48
  order: Optional[List[str]] = None, prune: bool = True) -> None:
49
- self.data = {}
50
- self._portal = Portal(portal, data=self.data, schemas=schemas) if portal else None
49
+ self._data = {}
50
+ self._portal = Portal(portal, data=self._data, schemas=schemas) if portal else None
51
51
  self._order = order
52
52
  self._prune = prune
53
53
  self._warnings = {}
@@ -57,6 +57,10 @@ class StructuredDataSet:
57
57
  self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None
58
58
  self._load_file(file) if file else None
59
59
 
60
+ @property
61
+ def data(self) -> dict:
62
+ return self._data
63
+
60
64
  @staticmethod
61
65
  def load(file: str, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None,
62
66
  schemas: Optional[List[dict]] = None, autoadd: Optional[dict] = None,
@@ -64,6 +68,17 @@ class StructuredDataSet:
64
68
  return StructuredDataSet(file=file, portal=portal, schemas=schemas, autoadd=autoadd, order=order, prune=prune)
65
69
 
66
70
  def validate(self, force: bool = False) -> None:
71
+ def data_without_deleted_properties(data: dict) -> dict:
72
+ nonlocal self
73
+ def isempty(value: Any) -> bool: # noqa
74
+ if value == RowReader.CELL_DELETION_SENTINEL:
75
+ return True
76
+ return self._prune and value in [None, "", {}, []]
77
+ def isempty_array_element(value: Any) -> bool: # noqa
78
+ return value == RowReader.CELL_DELETION_SENTINEL
79
+ data = copy.deepcopy(data)
80
+ remove_empty_properties(data, isempty=isempty, isempty_array_element=isempty_array_element)
81
+ return data
67
82
  if self._validated and not force:
68
83
  return
69
84
  self._validated = True
@@ -71,6 +86,7 @@ class StructuredDataSet:
71
86
  if (schema := Schema.load_by_name(type_name, portal=self._portal)):
72
87
  row_number = 0
73
88
  for data in self.data[type_name]:
89
+ data = data_without_deleted_properties(data)
74
90
  row_number += 1
75
91
  if (validation_errors := schema.validate(data)) is not None:
76
92
  for validation_error in validation_errors:
@@ -99,7 +115,11 @@ class StructuredDataSet:
99
115
 
100
116
  @property
101
117
  def resolved_refs(self) -> List[str]:
102
- return list(self._resolved_refs)
118
+ return list([resolved_ref[0] for resolved_ref in self._resolved_refs])
119
+
120
+ @property
121
+ def resolved_refs_with_uuids(self) -> List[str]:
122
+ return list([{"path": resolved_ref[0], "uuid": resolved_ref[1]} for resolved_ref in self._resolved_refs])
103
123
 
104
124
  @property
105
125
  def upload_files(self) -> List[str]:
@@ -185,10 +205,10 @@ class StructuredDataSet:
185
205
  def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None:
186
206
  if self._prune:
187
207
  remove_empty_properties(data)
188
- if type_name in self.data:
189
- self.data[type_name].extend([data] if isinstance(data, dict) else data)
208
+ if type_name in self._data:
209
+ self._data[type_name].extend([data] if isinstance(data, dict) else data)
190
210
  else:
191
- self.data[type_name] = [data] if isinstance(data, dict) else data
211
+ self._data[type_name] = [data] if isinstance(data, dict) else data
192
212
 
193
213
  def _add_properties(self, structured_row: dict, properties: dict, schema: Optional[dict] = None) -> None:
194
214
  for name in properties:
@@ -352,14 +372,6 @@ class Schema:
352
372
  errors.append(error.message)
353
373
  return errors
354
374
 
355
- @property
356
- def unresolved_refs(self) -> List[dict]:
357
- return self._unresolved_refs
358
-
359
- @property
360
- def resolved_refs(self) -> List[str]:
361
- return list(self._resolved_refs)
362
-
363
375
  def get_typeinfo(self, column_name: str) -> Optional[dict]:
364
376
  if isinstance(info := self._typeinfo.get(column_name), str):
365
377
  info = self._typeinfo.get(info)
@@ -423,9 +435,14 @@ class Schema:
423
435
  if not (resolved := portal.ref_exists(link_to, value)):
424
436
  self._unresolved_refs.append({"src": src, "error": f"/{link_to}/{value}"})
425
437
  elif len(resolved) > 1:
426
- self._unresolved_refs.append({"src": src, "error": f"/{link_to}/{value}", "types": resolved})
438
+ # TODO: Don't think we need this anymore; see TODO on Portal.ref_exists.
439
+ self._unresolved_refs.append({
440
+ "src": src,
441
+ "error": f"/{link_to}/{value}",
442
+ "types": [resolved_ref["type"] for resolved_ref in resolved]})
427
443
  else:
428
- self._resolved_refs.add(f"/{link_to}/{value}")
444
+ # A resolved-ref set value is a tuple of the reference path and its uuid.
445
+ self._resolved_refs.add((f"/{link_to}/{value}", resolved[0].get("uuid")))
429
446
  return value
430
447
  return lambda value, src: map_ref(value, typeinfo.get("linkTo"), self._portal, src)
431
448
 
@@ -617,8 +634,9 @@ class Portal(PortalBase):
617
634
 
618
635
  def ref_exists(self, type_name: str, value: str) -> List[str]:
619
636
  resolved = []
620
- if self._ref_exists_single(type_name, value):
621
- resolved.append(type_name)
637
+ is_resolved, resolved_uuid = self._ref_exists_single(type_name, value)
638
+ if is_resolved:
639
+ resolved.append({"type": type_name, "uuid": resolved_uuid})
622
640
  # TODO: Added this return on 2024-01-14 (dmichaels).
623
641
  # Why did I orginally check for multiple existing values?
624
642
  # Why not just return right away if I find that the ref exists?
@@ -631,20 +649,23 @@ class Portal(PortalBase):
631
649
  if (schemas_super_type_map := self.get_schemas_super_type_map()):
632
650
  if (sub_type_names := schemas_super_type_map.get(type_name)):
633
651
  for sub_type_name in sub_type_names:
634
- if self._ref_exists_single(sub_type_name, value):
635
- resolved.append(type_name)
652
+ is_resolved, resolved_uuid = self._ref_exists_single(sub_type_name, value)
653
+ if is_resolved:
654
+ resolved.append({"type": type_name, "uuid": resolved_uuid})
636
655
  # TODO: Added this return on 2024-01-14 (dmichaels). See above TODO.
637
656
  return resolved
638
657
  return resolved
639
658
 
640
- def _ref_exists_single(self, type_name: str, value: str) -> bool:
659
+ def _ref_exists_single(self, type_name: str, value: str) -> Tuple[bool, Optional[str]]:
641
660
  if self._data and (items := self._data.get(type_name)) and (schema := self.get_schema(type_name)):
642
661
  iproperties = set(schema.get("identifyingProperties", [])) | {"identifier", "uuid"}
643
662
  for item in items:
644
663
  if (ivalue := next((item[iproperty] for iproperty in iproperties if iproperty in item), None)):
645
664
  if isinstance(ivalue, list) and value in ivalue or ivalue == value:
646
- return True
647
- return self.get_metadata(f"/{type_name}/{value}") is not None
665
+ return True, None
666
+ if (value := self.get_metadata(f"/{type_name}/{value}")) is None:
667
+ return False, None
668
+ return True, value.get("uuid")
648
669
 
649
670
  @staticmethod
650
671
  def create_for_testing(arg: Optional[Union[str, bool, List[dict], dict, Callable]] = None,
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dcicutils"
3
- version = "8.7.1.1b3" # TODO: To become 8.7.2
3
+ version = "8.7.1.1b5" # TODO: To become 8.7.2
4
4
  description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
5
5
  authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
6
6
  license = "MIT"
@@ -1,177 +0,0 @@
1
- from functools import lru_cache
2
- import re
3
- from typing import Any, Callable, List, Optional, Tuple, Type, Union
4
- from dcicutils.portal_utils import Portal
5
- from dcicutils.schema_utils import Schema
6
-
7
- PortalObject = Type["PortalObject"] # Forward type reference for type hints.
8
-
9
-
10
- class PortalObject:
11
-
12
- def __init__(self, portal: Portal, portal_object: dict, portal_object_type: Optional[str] = None) -> None:
13
- self._portal = portal
14
- self._data = portal_object
15
- self._type = portal_object_type if isinstance(portal_object_type, str) and portal_object_type else None
16
-
17
- @property
18
- def data(self):
19
- return self._data
20
-
21
- @property
22
- @lru_cache(maxsize=1)
23
- def type(self):
24
- return self._type or Portal.get_schema_type(self._data)
25
-
26
- @property
27
- @lru_cache(maxsize=1)
28
- def types(self):
29
- return self._type or Portal.get_schema_types(self._data)
30
-
31
- @property
32
- @lru_cache(maxsize=1)
33
- def uuid(self) -> Optional[str]:
34
- return self._data.get("uuid") if isinstance(self._data, dict) else None
35
-
36
- @property
37
- @lru_cache(maxsize=1)
38
- def schema(self):
39
- return self._portal.get_schema(self.type)
40
-
41
- @property
42
- @lru_cache(maxsize=1)
43
- def identifying_properties(self) -> List[str]:
44
- """
45
- Returns the list of all identifying property names of this Portal object which actually have values.
46
- Implicitly include "uuid" and "identifier" properties as identifying properties if they are actually
47
- properties in the object schema, and favor these (first); defavor "aliases"; no other ordering defined.
48
- """
49
- if not (schema := self.schema) or not (schema_identifying_properties := schema.get("identifyingProperties")):
50
- return []
51
- identifying_properties = []
52
- for identifying_property in schema_identifying_properties:
53
- if identifying_property not in ["uuid", "identifier", "aliases"]:
54
- if self._data.get(identifying_property):
55
- identifying_properties.append(identifying_property)
56
- if self._data.get("identifier"):
57
- identifying_properties.insert(0, "identifier")
58
- if self._data.get("uuid"):
59
- identifying_properties.insert(0, "uuid")
60
- if "aliases" in schema_identifying_properties and self._data.get("aliases"):
61
- identifying_properties.append("aliases")
62
- return identifying_properties
63
-
64
- @property
65
- @lru_cache(maxsize=1)
66
- def identifying_paths(self) -> List[str]:
67
- """
68
- Returns a list of the possible Portal URL paths identifying this Portal object.
69
- """
70
- if not (identifying_properties := self.identifying_properties):
71
- return []
72
- identifying_paths = []
73
- for identifying_property in identifying_properties:
74
- if (identifying_value := self._data.get(identifying_property)):
75
- if identifying_property == "uuid":
76
- identifying_paths.append(f"/{self.type}/{identifying_value}")
77
- identifying_paths.append(f"/{identifying_value}")
78
- # For now at least we include the path both with and without the schema type component,
79
- # as for some identifying values, it works (only) with, and some, it works (only) without.
80
- # For example: If we have FileSet with "accession", an identifying property, with value
81
- # SMAFSFXF1RO4 then /SMAFSFXF1RO4 works but /FileSet/SMAFSFXF1RO4 does not; and
82
- # conversely using "submitted_id", also an identifying property, with value
83
- # UW_FILE-SET_COLO-829BL_HI-C_1 then /UW_FILE-SET_COLO-829BL_HI-C_1 does
84
- # not work but /FileSet/UW_FILE-SET_COLO-829BL_HI-C_1 does work.
85
- elif isinstance(identifying_value, list):
86
- for identifying_value_item in identifying_value:
87
- identifying_paths.append(f"/{self.type}/{identifying_value_item}")
88
- identifying_paths.append(f"/{identifying_value_item}")
89
- else:
90
- identifying_paths.append(f"/{self.type}/{identifying_value}")
91
- identifying_paths.append(f"/{identifying_value}")
92
- return identifying_paths
93
-
94
- @property
95
- @lru_cache(maxsize=1)
96
- def identifying_path(self) -> Optional[str]:
97
- if identifying_paths := self.identifying_paths:
98
- return identifying_paths[0]
99
-
100
- def lookup(self, include_identifying_path: bool = False,
101
- raw: bool = False) -> Optional[Union[Tuple[PortalObject, str], PortalObject]]:
102
- return self._lookup(raw=raw) if include_identifying_path else self._lookup(raw=raw)[0]
103
-
104
- def lookup_identifying_path(self) -> Optional[str]:
105
- return self._lookup()[1]
106
-
107
- def _lookup(self, raw: bool = False) -> Tuple[Optional[PortalObject], Optional[str]]:
108
- try:
109
- for identifying_path in self.identifying_paths:
110
- if (value := self._portal.get(identifying_path, raw=raw)) and (value.status_code == 200):
111
- return PortalObject(self._portal, value.json(), self.type if raw else None), identifying_path
112
- except Exception:
113
- pass
114
- return None, self.identifying_path
115
-
116
- def compare(self, value: Union[dict, PortalObject], consider_link_to: bool = False) -> dict:
117
- """
118
- Compares this Portal object against the given Portal object value; noting differences values of properites
119
- which they have in common; and properties which are in this Portal object and not in the given Portal object;
120
- we do NOT check the converse, i.e. properties in the given Portal object which are not in this Portal object.
121
- Returns a dictionary with a description of the differences. If the given consider_link_to flag is True then
122
- for differences detected linkTo reference values, we will actually check that the object which is being
123
- referenced is different or the same, e.g. the file_format reference (linkTo) property value "fastq" looks
124
- different from "eb417c0a-70dd-42e3-9841-ac7f1ee22962" but they (may) refer to the same object.
125
- """
126
- def are_properties_equal(property_path: str, property_value_a: Any, property_value_b: Any) -> bool:
127
- if property_value_a == property_value_b:
128
- return True
129
- nonlocal self
130
- if (schema := self.schema) and (property_type := Schema.get_property_by_path(schema, property_path)):
131
- if link_to := property_type.get("linkTo"):
132
- """
133
- This works basically except not WRT sub/super-types (e.g. CellCultureSample vs Sample);
134
- this is only preferable as it only requires one Portal GET rather than two, as below.
135
- if (a := self._portal.get(f"/{link_to}/{property_value_a}")) and (a.status_code == 200):
136
- if a_identifying_paths := PortalObject(self._portal, a.json()).identifying_paths:
137
- if f"/{link_to}/{property_value_b}" in a_identifying_paths:
138
- return True
139
- """
140
- if a := self._portal.get(f"/{link_to}/{property_value_a}", raw=True):
141
- if (a.status_code == 200) and (a := a.json()):
142
- if b := self._portal.get(f"/{link_to}/{property_value_b}", raw=True):
143
- if (b.status_code == 200) and (b := b.json()):
144
- return a == b
145
- return False
146
- return PortalObject._compare(self._data, value.data if isinstance(value, PortalObject) else value,
147
- compare=are_properties_equal if consider_link_to else None)
148
-
149
- _ARRAY_KEY_REGULAR_EXPRESSION = re.compile(rf"^({Schema._ARRAY_NAME_SUFFIX_CHAR}\d+)$")
150
-
151
- @staticmethod
152
- def _compare(a: dict, b: dict, compare: Optional[Callable] = None, _path: Optional[str] = None) -> dict:
153
- def key_to_path(key: str) -> Optional[str]: # noqa
154
- nonlocal _path
155
- if match := PortalObject._ARRAY_KEY_REGULAR_EXPRESSION.search(key):
156
- return f"{_path}{match.group(1)}" if _path else match.group(1)
157
- return f"{_path}.{key}" if _path else key
158
- def list_to_dictionary(value: list) -> dict: # noqa
159
- result = {}
160
- for index, item in enumerate(sorted(value)): # ignore array order
161
- result[f"#{index}"] = item
162
- return result
163
- diffs = {}
164
- for key in a:
165
- path = key_to_path(key)
166
- if key not in b:
167
- diffs[path] = {"value": a[key], "missing_value": True}
168
- else:
169
- if isinstance(a[key], dict) and isinstance(b[key], dict):
170
- diffs.update(PortalObject._compare(a[key], b[key], compare=compare, _path=path))
171
- elif isinstance(a[key], list) and isinstance(b[key], list):
172
- diffs.update(PortalObject._compare(list_to_dictionary(a[key]),
173
- list_to_dictionary(b[key]), compare=compare, _path=path))
174
- elif a[key] != b[key]:
175
- if not callable(compare) or not compare(path, a[key], b[key]):
176
- diffs[path] = {"value": a[key], "differing_value": b[key]}
177
- return diffs
File without changes
File without changes