dcicutils 8.7.1.1b4__tar.gz → 8.7.1.1b6__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/PKG-INFO +1 -1
  2. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/data_readers.py +13 -3
  3. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/misc_utils.py +23 -4
  4. dcicutils-8.7.1.1b6/dcicutils/portal_object_utils.py +248 -0
  5. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/structured_data.py +42 -28
  6. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/pyproject.toml +1 -1
  7. dcicutils-8.7.1.1b4/dcicutils/portal_object_utils.py +0 -190
  8. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/LICENSE.txt +0 -0
  9. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/README.rst +0 -0
  10. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/__init__.py +0 -0
  11. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/base.py +0 -0
  12. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/beanstalk_utils.py +0 -0
  13. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/bundle_utils.py +0 -0
  14. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/cloudformation_utils.py +0 -0
  15. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/codebuild_utils.py +0 -0
  16. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/command_utils.py +0 -0
  17. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/common.py +0 -0
  18. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/contribution_scripts.py +0 -0
  19. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/contribution_utils.py +0 -0
  20. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/creds_utils.py +0 -0
  21. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/data_utils.py +0 -0
  22. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/deployment_utils.py +0 -0
  23. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/diff_utils.py +0 -0
  24. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/docker_utils.py +0 -0
  25. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/ecr_scripts.py +0 -0
  26. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/ecr_utils.py +0 -0
  27. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/ecs_utils.py +0 -0
  28. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/env_base.py +0 -0
  29. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/env_manager.py +0 -0
  30. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/env_scripts.py +0 -0
  31. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/env_utils.py +0 -0
  32. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/env_utils_legacy.py +0 -0
  33. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/es_utils.py +0 -0
  34. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/exceptions.py +0 -0
  35. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/ff_mocks.py +0 -0
  36. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/ff_utils.py +0 -0
  37. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/file_utils.py +0 -0
  38. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/function_cache_decorator.py +0 -0
  39. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/glacier_utils.py +0 -0
  40. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/jh_utils.py +0 -0
  41. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/kibana/dashboards.json +0 -0
  42. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/kibana/readme.md +0 -0
  43. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/lang_utils.py +0 -0
  44. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/license_policies/c4-infrastructure.jsonc +0 -0
  45. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/license_policies/c4-python-infrastructure.jsonc +0 -0
  46. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/license_policies/park-lab-common-server.jsonc +0 -0
  47. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/license_policies/park-lab-common.jsonc +0 -0
  48. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc +0 -0
  49. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/license_policies/park-lab-pipeline.jsonc +0 -0
  50. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/license_utils.py +0 -0
  51. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/log_utils.py +0 -0
  52. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/obfuscation_utils.py +0 -0
  53. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/opensearch_utils.py +0 -0
  54. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/portal_utils.py +0 -0
  55. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/project_utils.py +0 -0
  56. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/qa_checkers.py +0 -0
  57. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/qa_utils.py +0 -0
  58. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/redis_tools.py +0 -0
  59. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/redis_utils.py +0 -0
  60. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/s3_utils.py +0 -0
  61. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/schema_utils.py +0 -0
  62. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/scripts/publish_to_pypi.py +0 -0
  63. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/scripts/run_license_checker.py +0 -0
  64. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/secrets_utils.py +0 -0
  65. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/sheet_utils.py +0 -0
  66. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/snapshot_utils.py +0 -0
  67. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/ssl_certificate_utils.py +0 -0
  68. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/task_utils.py +0 -0
  69. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/tmpfile_utils.py +0 -0
  70. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/trace_utils.py +0 -0
  71. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/validation_utils.py +0 -0
  72. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/variant_utils.py +0 -0
  73. {dcicutils-8.7.1.1b4 → dcicutils-8.7.1.1b6}/dcicutils/zip_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.7.1.1b4
3
+ Version: 8.7.1.1b6
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -7,11 +7,21 @@ from dcicutils.misc_utils import create_dict, right_trim
7
7
  # Forward type references for type hints.
8
8
  Excel = Type["Excel"]
9
9
 
10
+ # Cell values(s) indicating property deletion.
11
+ _CELL_DELETION_VALUES = ["*delete*"]
12
+
13
+
14
+ # Special cell deletion sentinel value (note make sure on deepcopy it remains the same).
15
+ class _CellDeletionSentinal(str):
16
+ def __new__(cls):
17
+ return super(_CellDeletionSentinal, cls).__new__(cls, _CELL_DELETION_VALUES[0])
18
+ def __deepcopy__(self, memo): # noqa
19
+ return self
20
+
10
21
 
11
22
  class RowReader(abc.ABC):
12
23
 
13
- CELL_DELETION_VALUES = ["*delete*"] # cell values(s) indicating property deletion
14
- CELL_DELETION_SENTINEL = object() # special cell deletion sentinel value
24
+ CELL_DELETION_SENTINEL = _CellDeletionSentinal()
15
25
 
16
26
  def __init__(self):
17
27
  self.header = None
@@ -51,7 +61,7 @@ class RowReader(abc.ABC):
51
61
  def cell_value(self, value: Optional[Any]) -> str:
52
62
  if value is None:
53
63
  return ""
54
- elif (value := str(value).strip()) in RowReader.CELL_DELETION_VALUES:
64
+ elif (value := str(value).strip()) in _CELL_DELETION_VALUES:
55
65
  return RowReader.CELL_DELETION_SENTINEL
56
66
  else:
57
67
  return value
@@ -2,6 +2,7 @@
2
2
  This file contains functions that might be generally useful.
3
3
  """
4
4
 
5
+ from collections import namedtuple
5
6
  import contextlib
6
7
  import datetime
7
8
  import functools
@@ -17,6 +18,7 @@ import re
17
18
  import rfc3986.validators
18
19
  import rfc3986.exceptions
19
20
  import time
21
+ import uuid
20
22
  import warnings
21
23
  import webtest # importing the library makes it easier to mock testing
22
24
 
@@ -1148,16 +1150,22 @@ def remove_suffix(suffix: str, text: str, required: bool = False):
1148
1150
  return text[:len(text)-len(suffix)]
1149
1151
 
1150
1152
 
1151
- def remove_empty_properties(data: Optional[Union[list, dict]]) -> None:
1153
+ def remove_empty_properties(data: Optional[Union[list, dict]],
1154
+ isempty: Optional[Callable] = None,
1155
+ isempty_array_element: Optional[Callable] = None) -> None:
1156
+ def _isempty(value: Any) -> bool: # noqa
1157
+ return isempty(value) if callable(isempty) else value in [None, "", {}, []]
1152
1158
  if isinstance(data, dict):
1153
1159
  for key in list(data.keys()):
1154
- if (value := data[key]) in [None, "", {}, []]:
1160
+ if _isempty(value := data[key]):
1155
1161
  del data[key]
1156
1162
  else:
1157
- remove_empty_properties(value)
1163
+ remove_empty_properties(value, isempty=isempty, isempty_array_element=isempty_array_element)
1158
1164
  elif isinstance(data, list):
1159
1165
  for item in data:
1160
- remove_empty_properties(item)
1166
+ remove_empty_properties(item, isempty=isempty, isempty_array_element=isempty_array_element)
1167
+ if callable(isempty_array_element):
1168
+ data[:] = [item for item in data if not isempty_array_element(item)]
1161
1169
 
1162
1170
 
1163
1171
  class ObsoleteError(Exception):
@@ -1519,6 +1527,17 @@ def create_dict(**kwargs) -> dict:
1519
1527
  return result
1520
1528
 
1521
1529
 
1530
+ def create_readonly_object(**kwargs):
1531
+ """
1532
+ Returns a new/unique object instance with readonly properties equal to the give kwargs.
1533
+ """
1534
+ readonly_class_name = "readonlyclass_" + str(uuid.uuid4()).replace("-", "")
1535
+ readonly_class_args = " ".join(kwargs.keys())
1536
+ readonly_class = namedtuple(readonly_class_name, readonly_class_args)
1537
+ readonly_object = readonly_class(**kwargs)
1538
+ return readonly_object
1539
+
1540
+
1522
1541
  def is_c4_arn(arn: str) -> bool:
1523
1542
  """
1524
1543
  Returns True iff the given (presumed) AWS ARN string value looks like it
@@ -0,0 +1,248 @@
1
+ from copy import deepcopy
2
+ from functools import lru_cache
3
+ import re
4
+ from typing import Any, List, Optional, Tuple, Type, Union
5
+ from dcicutils.data_readers import RowReader
6
+ from dcicutils.misc_utils import create_readonly_object
7
+ from dcicutils.portal_utils import Portal
8
+ from dcicutils.schema_utils import Schema
9
+
10
+ PortalObject = Type["PortalObject"] # Forward type reference for type hints.
11
+
12
+
13
+ class PortalObject:
14
+
15
+ _PROPERTY_DELETION_SENTINEL = RowReader.CELL_DELETION_SENTINEL
16
+
17
+ def __init__(self, portal: Portal, portal_object: dict, portal_object_type: Optional[str] = None) -> None:
18
+ self._portal = portal
19
+ self._data = portal_object
20
+ self._type = portal_object_type if isinstance(portal_object_type, str) and portal_object_type else None
21
+
22
+ @property
23
+ def data(self):
24
+ return self._data
25
+
26
+ @property
27
+ def portal(self):
28
+ return self._portal
29
+
30
+ @property
31
+ @lru_cache(maxsize=1)
32
+ def type(self):
33
+ return self._type or Portal.get_schema_type(self._data)
34
+
35
+ @property
36
+ @lru_cache(maxsize=1)
37
+ def types(self):
38
+ return self._type or Portal.get_schema_types(self._data)
39
+
40
+ @property
41
+ @lru_cache(maxsize=1)
42
+ def uuid(self) -> Optional[str]:
43
+ return self._data.get("uuid") if isinstance(self._data, dict) else None
44
+
45
+ @property
46
+ @lru_cache(maxsize=1)
47
+ def schema(self):
48
+ return self._portal.get_schema(self.type)
49
+
50
+ def copy(self) -> PortalObject:
51
+ return PortalObject(self.portal, deepcopy(self.data), self.type)
52
+
53
+ @property
54
+ @lru_cache(maxsize=1)
55
+ def identifying_properties(self) -> List[str]:
56
+ """
57
+ Returns the list of all identifying property names of this Portal object which actually have values.
58
+ Implicitly include "uuid" and "identifier" properties as identifying properties if they are actually
59
+ properties in the object schema, and favor these (first); defavor "aliases"; no other ordering defined.
60
+ """
61
+ if not (schema := self.schema) or not (schema_identifying_properties := schema.get("identifyingProperties")):
62
+ return []
63
+ identifying_properties = []
64
+ for identifying_property in schema_identifying_properties:
65
+ if identifying_property not in ["uuid", "identifier", "aliases"]:
66
+ if self._data.get(identifying_property):
67
+ identifying_properties.append(identifying_property)
68
+ if self._data.get("identifier"):
69
+ identifying_properties.insert(0, "identifier")
70
+ if self._data.get("uuid"):
71
+ identifying_properties.insert(0, "uuid")
72
+ if "aliases" in schema_identifying_properties and self._data.get("aliases"):
73
+ identifying_properties.append("aliases")
74
+ return identifying_properties
75
+
76
+ @property
77
+ @lru_cache(maxsize=1)
78
+ def identifying_paths(self) -> List[str]:
79
+ """
80
+ Returns a list of the possible Portal URL paths identifying this Portal object.
81
+ """
82
+ if not (identifying_properties := self.identifying_properties):
83
+ return []
84
+ identifying_paths = []
85
+ for identifying_property in identifying_properties:
86
+ if (identifying_value := self._data.get(identifying_property)):
87
+ if identifying_property == "uuid":
88
+ identifying_paths.append(f"/{self.type}/{identifying_value}")
89
+ identifying_paths.append(f"/{identifying_value}")
90
+ # For now at least we include the path both with and without the schema type component,
91
+ # as for some identifying values, it works (only) with, and some, it works (only) without.
92
+ # For example: If we have FileSet with "accession", an identifying property, with value
93
+ # SMAFSFXF1RO4 then /SMAFSFXF1RO4 works but /FileSet/SMAFSFXF1RO4 does not; and
94
+ # conversely using "submitted_id", also an identifying property, with value
95
+ # UW_FILE-SET_COLO-829BL_HI-C_1 then /UW_FILE-SET_COLO-829BL_HI-C_1 does
96
+ # not work but /FileSet/UW_FILE-SET_COLO-829BL_HI-C_1 does work.
97
+ elif isinstance(identifying_value, list):
98
+ for identifying_value_item in identifying_value:
99
+ identifying_paths.append(f"/{self.type}/{identifying_value_item}")
100
+ identifying_paths.append(f"/{identifying_value_item}")
101
+ else:
102
+ identifying_paths.append(f"/{self.type}/{identifying_value}")
103
+ identifying_paths.append(f"/{identifying_value}")
104
+ return identifying_paths
105
+
106
+ @property
107
+ @lru_cache(maxsize=1)
108
+ def identifying_path(self) -> Optional[str]:
109
+ if identifying_paths := self.identifying_paths:
110
+ return identifying_paths[0]
111
+
112
+ def lookup(self, include_identifying_path: bool = False,
113
+ raw: bool = False) -> Optional[Union[Tuple[PortalObject, str], PortalObject]]:
114
+ return self._lookup(raw=raw) if include_identifying_path else self._lookup(raw=raw)[0]
115
+
116
+ def lookup_identifying_path(self) -> Optional[str]:
117
+ return self._lookup()[1]
118
+
119
+ def _lookup(self, raw: bool = False) -> Tuple[Optional[PortalObject], Optional[str]]:
120
+ try:
121
+ for identifying_path in self.identifying_paths:
122
+ if (value := self._portal.get(identifying_path, raw=raw)) and (value.status_code == 200):
123
+ return PortalObject(self._portal, value.json(), self.type if raw else None), identifying_path
124
+ except Exception:
125
+ pass
126
+ return None, self.identifying_path
127
+
128
+ def compare(self, value: Union[dict, PortalObject],
129
+ consider_refs: bool = False, resolved_refs: List[dict] = None) -> dict:
130
+ if consider_refs and isinstance(resolved_refs, list):
131
+ this_data = self.normalized_refs(refs=resolved_refs).data
132
+ else:
133
+ this_data = self.data
134
+ if isinstance(value, PortalObject):
135
+ comparing_data = value.data
136
+ elif isinstance(value, dict):
137
+ comparing_data = value
138
+ else:
139
+ return {}
140
+ return PortalObject._compare(this_data, comparing_data, self.type)
141
+
142
+ _ARRAY_KEY_REGULAR_EXPRESSION = re.compile(rf"^({Schema._ARRAY_NAME_SUFFIX_CHAR}\d+)$")
143
+
144
+ @staticmethod
145
+ def _compare(a: Any, b: Any, value_type: str, _path: Optional[str] = None) -> dict:
146
+ def diff_creating(value: Any) -> object: # noqa
147
+ nonlocal value_type
148
+ return create_readonly_object(value=value, type=value_type,
149
+ creating_value=True, updating_value=None, deleting_value=False)
150
+ def diff_updating(value: Any, updating_value: Any) -> object: # noqa
151
+ nonlocal value_type
152
+ return create_readonly_object(value=value, type=value_type,
153
+ creating_value=False, updating_value=updating_value, deleting_value=False)
154
+ def diff_deleting(value: Any) -> object: # noqa
155
+ nonlocal value_type
156
+ return create_readonly_object(value=value, type=value_type,
157
+ creating_value=False, updating_value=None, deleting_value=True)
158
+ diffs = {}
159
+ if isinstance(a, dict) and isinstance(b, dict):
160
+ for key in a:
161
+ path = f"{_path}.{key}" if _path else key
162
+ if key not in b:
163
+ if a[key] != PortalObject._PROPERTY_DELETION_SENTINEL:
164
+ diffs[path] = diff_creating(a[key])
165
+ else:
166
+ diffs.update(PortalObject._compare(a[key], b[key], type, _path=path))
167
+ elif isinstance(a, list) and isinstance(b, list):
168
+ # Ignore order of array elements; not absolutely technically correct but suits our purpose.
169
+ for index in range(len(a)):
170
+ path = f"{_path or ''}#{index}"
171
+ if not isinstance(a[index], dict) and not isinstance(a[index], list):
172
+ if a[index] not in b:
173
+ if a[index] != PortalObject._PROPERTY_DELETION_SENTINEL:
174
+ if index < len(b):
175
+ diffs[path] = diff_updating(a[index], b[index])
176
+ else:
177
+ diffs[path] = diff_creating(a[index])
178
+ else:
179
+ if index < len(b):
180
+ diffs[path] = diff_deleting(b[index])
181
+ elif len(b) < index:
182
+ diffs.update(PortalObject._compare(a[index], b[index], value_type, _path=path))
183
+ else:
184
+ diffs[path] = diff_creating(a[index])
185
+ elif a != b:
186
+ if a == PortalObject._PROPERTY_DELETION_SENTINEL:
187
+ diffs[_path] = diff_deleting(b)
188
+ else:
189
+ diffs[_path] = diff_updating(a, b)
190
+ return diffs
191
+
192
+ def normalize_refs(self, refs: List[dict]) -> None:
193
+ """
194
+ Turns any (linkTo) references which are paths (e.g. /SubmissionCenter/uwsc_gcc) within
195
+ this Portal object into the uuid style reference (e.g. d1b67068-300f-483f-bfe8-63d23c93801f),
196
+ based on the given "refs" list which is assumed to be a list of dictionaries, where each
197
+ contains a "path" and a "uuid" property; this list is typically (for our first usage of
198
+ this function) the value of structured_data.StructuredDataSet.resolved_refs_with_uuid.
199
+ Changes are made to this Portal object in place; use normalized_refs function to make a copy.
200
+ If there are no "refs" (None or empty) or if the speicified reference is not found in this
201
+ list then the references will be looked up via Portal calls (via Portal.get_metadata).
202
+ """
203
+ PortalObject._normalize_refs(self.data, refs=refs, schema=self.schema, portal=self.portal)
204
+
205
+ def normalized_refs(self, refs: List[dict]) -> PortalObject:
206
+ """
207
+ Same as normalize_ref but does not make this change to this Portal object in place,
208
+ rather it returns a new instance of this Portal object wrapped in a new PortalObject.
209
+ """
210
+ portal_object = self.copy()
211
+ portal_object.normalize_refs(refs)
212
+ return portal_object
213
+
214
+ @staticmethod
215
+ def _normalize_refs(value: Any, refs: List[dict], schema: dict, portal: Portal, _path: Optional[str] = None) -> Any:
216
+ if not value or not isinstance(schema, dict):
217
+ return value
218
+ if isinstance(value, dict):
219
+ for key in value:
220
+ path = f"{_path}.{key}" if _path else key
221
+ value[key] = PortalObject._normalize_refs(value[key], refs=refs,
222
+ schema=schema, portal=portal, _path=path)
223
+ elif isinstance(value, list):
224
+ for index in range(len(value)):
225
+ path = f"{_path or ''}#{index}"
226
+ value[index] = PortalObject._normalize_refs(value[index], refs=refs,
227
+ schema=schema, portal=portal, _path=path)
228
+ elif value_type := Schema.get_property_by_path(schema, _path):
229
+ if link_to := value_type.get("linkTo"):
230
+ ref_path = f"/{link_to}/{value}"
231
+ if not isinstance(refs, list):
232
+ refs = []
233
+ if ref_uuids := [ref.get("uuid") for ref in refs if ref.get("path") == ref_path]:
234
+ ref_uuid = ref_uuids[0]
235
+ else:
236
+ ref_uuid = None
237
+ if ref_uuid:
238
+ return ref_uuid
239
+ # Here our (linkTo) reference appears not to be in the given refs; if these refs came
240
+ # from structured_data.StructuredDataSet.resolved_refs_with_uuid (in the context of
241
+ # smaht-submitr, which is the typical/first use case for this function) then this could
242
+ # be because the reference was to an internal object, i.e. another object existing within
243
+ # the data/spreadsheet being submitted. In any case, we don't have the associated uuid
244
+ # so let us look it up here.
245
+ if isinstance(portal, Portal):
246
+ if (ref_object := portal.get_metadata(ref_path)) and (ref_uuid := ref_object.get("uuid")):
247
+ return ref_uuid
248
+ return value
@@ -46,8 +46,8 @@ class StructuredDataSet:
46
46
  def __init__(self, file: Optional[str] = None, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None,
47
47
  schemas: Optional[List[dict]] = None, autoadd: Optional[dict] = None,
48
48
  order: Optional[List[str]] = None, prune: bool = True) -> None:
49
- self.data = {}
50
- self._portal = Portal(portal, data=self.data, schemas=schemas) if portal else None
49
+ self._data = {}
50
+ self._portal = Portal(portal, data=self._data, schemas=schemas) if portal else None
51
51
  self._order = order
52
52
  self._prune = prune
53
53
  self._warnings = {}
@@ -57,6 +57,10 @@ class StructuredDataSet:
57
57
  self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None
58
58
  self._load_file(file) if file else None
59
59
 
60
+ @property
61
+ def data(self) -> dict:
62
+ return self._data
63
+
60
64
  @staticmethod
61
65
  def load(file: str, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None,
62
66
  schemas: Optional[List[dict]] = None, autoadd: Optional[dict] = None,
@@ -66,10 +70,15 @@ class StructuredDataSet:
66
70
  def validate(self, force: bool = False) -> None:
67
71
  def data_without_deleted_properties(data: dict) -> dict:
68
72
  nonlocal self
69
- if self._prune:
70
- return {key: value for key, value in data.items() if value != RowReader.CELL_DELETION_SENTINEL}
71
- else:
72
- return {key: "" if value == RowReader.CELL_DELETION_SENTINEL else value for key, value in data.items()}
73
+ def isempty(value: Any) -> bool: # noqa
74
+ if value == RowReader.CELL_DELETION_SENTINEL:
75
+ return True
76
+ return self._prune and value in [None, "", {}, []]
77
+ def isempty_array_element(value: Any) -> bool: # noqa
78
+ return value == RowReader.CELL_DELETION_SENTINEL
79
+ data = copy.deepcopy(data)
80
+ remove_empty_properties(data, isempty=isempty, isempty_array_element=isempty_array_element)
81
+ return data
73
82
  if self._validated and not force:
74
83
  return
75
84
  self._validated = True
@@ -106,7 +115,11 @@ class StructuredDataSet:
106
115
 
107
116
  @property
108
117
  def resolved_refs(self) -> List[str]:
109
- return list(self._resolved_refs)
118
+ return list([resolved_ref[0] for resolved_ref in self._resolved_refs])
119
+
120
+ @property
121
+ def resolved_refs_with_uuids(self) -> List[str]:
122
+ return list([{"path": resolved_ref[0], "uuid": resolved_ref[1]} for resolved_ref in self._resolved_refs])
110
123
 
111
124
  @property
112
125
  def upload_files(self) -> List[str]:
@@ -192,10 +205,10 @@ class StructuredDataSet:
192
205
  def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None:
193
206
  if self._prune:
194
207
  remove_empty_properties(data)
195
- if type_name in self.data:
196
- self.data[type_name].extend([data] if isinstance(data, dict) else data)
208
+ if type_name in self._data:
209
+ self._data[type_name].extend([data] if isinstance(data, dict) else data)
197
210
  else:
198
- self.data[type_name] = [data] if isinstance(data, dict) else data
211
+ self._data[type_name] = [data] if isinstance(data, dict) else data
199
212
 
200
213
  def _add_properties(self, structured_row: dict, properties: dict, schema: Optional[dict] = None) -> None:
201
214
  for name in properties:
@@ -356,17 +369,9 @@ class Schema:
356
369
  def validate(self, data: dict) -> List[str]:
357
370
  errors = []
358
371
  for error in SchemaValidator(self.data, format_checker=SchemaValidator.FORMAT_CHECKER).iter_errors(data):
359
- errors.append(error.message)
372
+ errors.append(f"Validation error at '{error.json_path}': {error.message}")
360
373
  return errors
361
374
 
362
- @property
363
- def unresolved_refs(self) -> List[dict]:
364
- return self._unresolved_refs
365
-
366
- @property
367
- def resolved_refs(self) -> List[str]:
368
- return list(self._resolved_refs)
369
-
370
375
  def get_typeinfo(self, column_name: str) -> Optional[dict]:
371
376
  if isinstance(info := self._typeinfo.get(column_name), str):
372
377
  info = self._typeinfo.get(info)
@@ -430,9 +435,14 @@ class Schema:
430
435
  if not (resolved := portal.ref_exists(link_to, value)):
431
436
  self._unresolved_refs.append({"src": src, "error": f"/{link_to}/{value}"})
432
437
  elif len(resolved) > 1:
433
- self._unresolved_refs.append({"src": src, "error": f"/{link_to}/{value}", "types": resolved})
438
+ # TODO: Don't think we need this anymore; see TODO on Portal.ref_exists.
439
+ self._unresolved_refs.append({
440
+ "src": src,
441
+ "error": f"/{link_to}/{value}",
442
+ "types": [resolved_ref["type"] for resolved_ref in resolved]})
434
443
  else:
435
- self._resolved_refs.add(f"/{link_to}/{value}")
444
+ # A resolved-ref set value is a tuple of the reference path and its uuid.
445
+ self._resolved_refs.add((f"/{link_to}/{value}", resolved[0].get("uuid")))
436
446
  return value
437
447
  return lambda value, src: map_ref(value, typeinfo.get("linkTo"), self._portal, src)
438
448
 
@@ -624,8 +634,9 @@ class Portal(PortalBase):
624
634
 
625
635
  def ref_exists(self, type_name: str, value: str) -> List[str]:
626
636
  resolved = []
627
- if self._ref_exists_single(type_name, value):
628
- resolved.append(type_name)
637
+ is_resolved, resolved_uuid = self._ref_exists_single(type_name, value)
638
+ if is_resolved:
639
+ resolved.append({"type": type_name, "uuid": resolved_uuid})
629
640
  # TODO: Added this return on 2024-01-14 (dmichaels).
630
641
  # Why did I orginally check for multiple existing values?
631
642
  # Why not just return right away if I find that the ref exists?
@@ -638,20 +649,23 @@ class Portal(PortalBase):
638
649
  if (schemas_super_type_map := self.get_schemas_super_type_map()):
639
650
  if (sub_type_names := schemas_super_type_map.get(type_name)):
640
651
  for sub_type_name in sub_type_names:
641
- if self._ref_exists_single(sub_type_name, value):
642
- resolved.append(type_name)
652
+ is_resolved, resolved_uuid = self._ref_exists_single(sub_type_name, value)
653
+ if is_resolved:
654
+ resolved.append({"type": type_name, "uuid": resolved_uuid})
643
655
  # TODO: Added this return on 2024-01-14 (dmichaels). See above TODO.
644
656
  return resolved
645
657
  return resolved
646
658
 
647
- def _ref_exists_single(self, type_name: str, value: str) -> bool:
659
+ def _ref_exists_single(self, type_name: str, value: str) -> Tuple[bool, Optional[str]]:
648
660
  if self._data and (items := self._data.get(type_name)) and (schema := self.get_schema(type_name)):
649
661
  iproperties = set(schema.get("identifyingProperties", [])) | {"identifier", "uuid"}
650
662
  for item in items:
651
663
  if (ivalue := next((item[iproperty] for iproperty in iproperties if iproperty in item), None)):
652
664
  if isinstance(ivalue, list) and value in ivalue or ivalue == value:
653
- return True
654
- return self.get_metadata(f"/{type_name}/{value}") is not None
665
+ return True, None
666
+ if (value := self.get_metadata(f"/{type_name}/{value}")) is None:
667
+ return False, None
668
+ return True, value.get("uuid")
655
669
 
656
670
  @staticmethod
657
671
  def create_for_testing(arg: Optional[Union[str, bool, List[dict], dict, Callable]] = None,
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dcicutils"
3
- version = "8.7.1.1b4" # TODO: To become 8.7.2
3
+ version = "8.7.1.1b6" # TODO: To become 8.7.2
4
4
  description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
5
5
  authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
6
6
  license = "MIT"
@@ -1,190 +0,0 @@
1
- from functools import lru_cache
2
- import re
3
- from typing import Any, Callable, List, Optional, Tuple, Type, Union
4
- from dcicutils.data_readers import RowReader
5
- from dcicutils.portal_utils import Portal
6
- from dcicutils.schema_utils import Schema
7
-
8
- PortalObject = Type["PortalObject"] # Forward type reference for type hints.
9
-
10
-
11
- class PortalObject:
12
-
13
- def __init__(self, portal: Portal, portal_object: dict, portal_object_type: Optional[str] = None) -> None:
14
- self._portal = portal
15
- self._data = portal_object
16
- self._type = portal_object_type if isinstance(portal_object_type, str) and portal_object_type else None
17
-
18
- @property
19
- def data(self):
20
- return self._data
21
-
22
- @property
23
- @lru_cache(maxsize=1)
24
- def type(self):
25
- return self._type or Portal.get_schema_type(self._data)
26
-
27
- @property
28
- @lru_cache(maxsize=1)
29
- def types(self):
30
- return self._type or Portal.get_schema_types(self._data)
31
-
32
- @property
33
- @lru_cache(maxsize=1)
34
- def uuid(self) -> Optional[str]:
35
- return self._data.get("uuid") if isinstance(self._data, dict) else None
36
-
37
- @property
38
- @lru_cache(maxsize=1)
39
- def schema(self):
40
- return self._portal.get_schema(self.type)
41
-
42
- @property
43
- @lru_cache(maxsize=1)
44
- def identifying_properties(self) -> List[str]:
45
- """
46
- Returns the list of all identifying property names of this Portal object which actually have values.
47
- Implicitly include "uuid" and "identifier" properties as identifying properties if they are actually
48
- properties in the object schema, and favor these (first); defavor "aliases"; no other ordering defined.
49
- """
50
- if not (schema := self.schema) or not (schema_identifying_properties := schema.get("identifyingProperties")):
51
- return []
52
- identifying_properties = []
53
- for identifying_property in schema_identifying_properties:
54
- if identifying_property not in ["uuid", "identifier", "aliases"]:
55
- if self._data.get(identifying_property):
56
- identifying_properties.append(identifying_property)
57
- if self._data.get("identifier"):
58
- identifying_properties.insert(0, "identifier")
59
- if self._data.get("uuid"):
60
- identifying_properties.insert(0, "uuid")
61
- if "aliases" in schema_identifying_properties and self._data.get("aliases"):
62
- identifying_properties.append("aliases")
63
- return identifying_properties
64
-
65
- @property
66
- @lru_cache(maxsize=1)
67
- def identifying_paths(self) -> List[str]:
68
- """
69
- Returns a list of the possible Portal URL paths identifying this Portal object.
70
- """
71
- if not (identifying_properties := self.identifying_properties):
72
- return []
73
- identifying_paths = []
74
- for identifying_property in identifying_properties:
75
- if (identifying_value := self._data.get(identifying_property)):
76
- if identifying_property == "uuid":
77
- identifying_paths.append(f"/{self.type}/{identifying_value}")
78
- identifying_paths.append(f"/{identifying_value}")
79
- # For now at least we include the path both with and without the schema type component,
80
- # as for some identifying values, it works (only) with, and some, it works (only) without.
81
- # For example: If we have FileSet with "accession", an identifying property, with value
82
- # SMAFSFXF1RO4 then /SMAFSFXF1RO4 works but /FileSet/SMAFSFXF1RO4 does not; and
83
- # conversely using "submitted_id", also an identifying property, with value
84
- # UW_FILE-SET_COLO-829BL_HI-C_1 then /UW_FILE-SET_COLO-829BL_HI-C_1 does
85
- # not work but /FileSet/UW_FILE-SET_COLO-829BL_HI-C_1 does work.
86
- elif isinstance(identifying_value, list):
87
- for identifying_value_item in identifying_value:
88
- identifying_paths.append(f"/{self.type}/{identifying_value_item}")
89
- identifying_paths.append(f"/{identifying_value_item}")
90
- else:
91
- identifying_paths.append(f"/{self.type}/{identifying_value}")
92
- identifying_paths.append(f"/{identifying_value}")
93
- return identifying_paths
94
-
95
- @property
96
- @lru_cache(maxsize=1)
97
- def identifying_path(self) -> Optional[str]:
98
- if identifying_paths := self.identifying_paths:
99
- return identifying_paths[0]
100
-
101
- def lookup(self, include_identifying_path: bool = False,
102
- raw: bool = False) -> Optional[Union[Tuple[PortalObject, str], PortalObject]]:
103
- return self._lookup(raw=raw) if include_identifying_path else self._lookup(raw=raw)[0]
104
-
105
- def lookup_identifying_path(self) -> Optional[str]:
106
- return self._lookup()[1]
107
-
108
- def _lookup(self, raw: bool = False) -> Tuple[Optional[PortalObject], Optional[str]]:
109
- try:
110
- for identifying_path in self.identifying_paths:
111
- if (value := self._portal.get(identifying_path, raw=raw)) and (value.status_code == 200):
112
- return PortalObject(self._portal, value.json(), self.type if raw else None), identifying_path
113
- except Exception:
114
- pass
115
- return None, self.identifying_path
116
-
117
- def compare(self, value: Union[dict, PortalObject], consider_link_to: bool = False) -> dict:
118
- """
119
- Compares this Portal object against the given Portal object value; noting differences values of properites
120
- which they have in common; and properties which are in this Portal object and not in the given Portal object;
121
- we do NOT check the converse, i.e. properties in the given Portal object which are not in this Portal object.
122
- Returns a dictionary with a description of the differences. If the given consider_link_to flag is True then
123
- for differences detected linkTo reference values, we will actually check that the object which is being
124
- referenced is different or the same, e.g. the file_format reference (linkTo) property value "fastq" looks
125
- different from "eb417c0a-70dd-42e3-9841-ac7f1ee22962" but they (may) refer to the same object.
126
- """
127
- def are_properties_equal(property_path: str, property_value_a: Any, property_value_b: Any) -> bool:
128
- if property_value_a == property_value_b:
129
- return True
130
- nonlocal self
131
- if (schema := self.schema) and (property_type := Schema.get_property_by_path(schema, property_path)):
132
- if link_to := property_type.get("linkTo"):
133
- """
134
- This works basically except not WRT sub/super-types (e.g. CellCultureSample vs Sample);
135
- this is only preferable as it only requires one Portal GET rather than two, as below.
136
- if (a := self._portal.get(f"/{link_to}/{property_value_a}")) and (a.status_code == 200):
137
- if a_identifying_paths := PortalObject(self._portal, a.json()).identifying_paths:
138
- if f"/{link_to}/{property_value_b}" in a_identifying_paths:
139
- return True
140
- """
141
- if a := self._portal.get(f"/{link_to}/{property_value_a}", raw=True):
142
- if (a.status_code == 200) and (a := a.json()):
143
- if b := self._portal.get(f"/{link_to}/{property_value_b}", raw=True):
144
- if (b.status_code == 200) and (b := b.json()):
145
- return a == b
146
- return False
147
- return PortalObject._compare(self._data, value.data if isinstance(value, PortalObject) else value,
148
- compare=are_properties_equal if consider_link_to else None)
149
-
150
- _ARRAY_KEY_REGULAR_EXPRESSION = re.compile(rf"^({Schema._ARRAY_NAME_SUFFIX_CHAR}\d+)$")
151
-
152
- @staticmethod
153
- def _compare(a: dict, b: dict, compare: Optional[Callable] = None, _path: Optional[str] = None) -> dict:
154
- def key_to_path(key: str) -> Optional[str]: # noqa
155
- nonlocal _path
156
- if match := PortalObject._ARRAY_KEY_REGULAR_EXPRESSION.search(key):
157
- return f"{_path}{match.group(1)}" if _path else match.group(1)
158
- return f"{_path}.{key}" if _path else key
159
- def list_to_dictionary(value: list) -> dict: # noqa
160
- result = {}
161
- for index, item in enumerate(value):
162
- result[f"#{index}"] = item
163
- return result
164
- diffs = {}
165
- for key in a:
166
- path = key_to_path(key)
167
- if key not in b:
168
- if a[key] != RowReader.CELL_DELETION_SENTINEL:
169
- diffs[path] = {"value": a[key], "creating_value": True}
170
- else:
171
- if isinstance(a[key], dict) and isinstance(b[key], dict):
172
- diffs.update(PortalObject._compare(a[key], b[key], compare=compare, _path=path))
173
- elif isinstance(a[key], list) and isinstance(b[key], list):
174
- # Note that lists will be compared in order, which means the when dealing with
175
- # insertions/deletions to/from the list, we my easily mistakenly regard elements
176
- # of the list to be different when they are really the same, since they occupy
177
- # different indices within the array. This is just a known restriction of this
178
- # comparison functionality; and perhaps actually technically correct, but probably
179
- # in practice, at the application/semantic level, we likely regard the order of
180
- # lists as unimportant, and with a little more work here we could try to detect
181
- # and exclude from the diffs for a list, those elements in the list which are
182
- # equal to each other but which reside at different indices with then two lists.
183
- diffs.update(PortalObject._compare(list_to_dictionary(a[key]),
184
- list_to_dictionary(b[key]), compare=compare, _path=path))
185
- elif a[key] != b[key]:
186
- if a[key] == RowReader.CELL_DELETION_SENTINEL:
187
- diffs[path] = {"value": b[key], "deleting_value": True}
188
- elif not callable(compare) or not compare(path, a[key], b[key]):
189
- diffs[path] = {"value": a[key], "updating_value": b[key]}
190
- return diffs
File without changes
File without changes