dcicutils 8.7.0.1b34__py3-none-any.whl → 8.7.0.1b35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,10 @@
1
1
  from functools import lru_cache
2
2
  import re
3
- from typing import List, Optional, Tuple, Union
4
- from dcicutils.schema_utils import get_identifying_properties
3
+ from typing import Any, Callable, List, Optional, Tuple, Type, Union
5
4
  from dcicutils.portal_utils import Portal
5
+ from dcicutils.schema_utils import Schema
6
+
7
+ PortalObject = Type["PortalObject"] # Forward type reference for type hints.
6
8
 
7
9
 
8
10
  class PortalObject:
@@ -18,34 +20,23 @@ class PortalObject:
18
20
 
19
21
  @property
20
22
  @lru_cache(maxsize=1)
21
- def schema(self):
22
- return self._portal.get_schema(self.schema_type)
23
-
24
- @property
25
- @lru_cache(maxsize=1)
26
- def schema_type(self):
23
+ def type(self):
27
24
  return self._type or Portal.get_schema_type(self._data)
28
25
 
29
26
  @property
30
27
  @lru_cache(maxsize=1)
31
- def schema_types(self):
28
+ def types(self):
32
29
  return self._type or Portal.get_schema_types(self._data)
33
30
 
34
31
  @property
35
32
  @lru_cache(maxsize=1)
36
- def schema_identifying_properties(self) -> list:
37
- if not (schema := self.schema):
38
- return []
39
- return get_identifying_properties(schema)
33
+ def uuid(self) -> Optional[str]:
34
+ return self._data.get("uuid") if isinstance(self._data, dict) else None
40
35
 
41
36
  @property
42
37
  @lru_cache(maxsize=1)
43
- def uuid(self) -> Optional[str]:
44
- return PortalObject.get_uuid(self._data)
45
-
46
- @staticmethod
47
- def get_uuid(portal_object: dict) -> Optional[str]:
48
- return portal_object.get("uuid") if isinstance(portal_object, dict) else None
38
+ def schema(self):
39
+ return self._portal.get_schema(self.type)
49
40
 
50
41
  @property
51
42
  @lru_cache(maxsize=1)
@@ -55,8 +46,10 @@ class PortalObject:
55
46
  Implicitly include "uuid" and "identifier" properties as identifying properties if they are actually
56
47
  properties in the object schema, and favor these (first); defavor "aliases"; no other ordering defined.
57
48
  """
49
+ if not (schema := self.schema) or not (schema_identifying_properties := schema.get("identifyingProperties")):
50
+ return []
58
51
  identifying_properties = []
59
- for identifying_property in self.schema_identifying_properties:
52
+ for identifying_property in schema_identifying_properties:
60
53
  if identifying_property not in ["uuid", "identifier", "aliases"]:
61
54
  if self._data.get(identifying_property):
62
55
  identifying_properties.append(identifying_property)
@@ -64,7 +57,7 @@ class PortalObject:
64
57
  identifying_properties.insert(0, "identifier")
65
58
  if self._data.get("uuid"):
66
59
  identifying_properties.insert(0, "uuid")
67
- if "aliases" in self.schema_identifying_properties and self._data.get("aliases"):
60
+ if "aliases" in schema_identifying_properties and self._data.get("aliases"):
68
61
  identifying_properties.append("aliases")
69
62
  return identifying_properties
70
63
 
@@ -81,8 +74,8 @@ class PortalObject:
81
74
  if (identifying_value := self._data.get(identifying_property)):
82
75
  if identifying_property == "uuid":
83
76
  identifying_paths.append(f"/{identifying_value}")
84
- # For now at least we include the path both with and without the schema type component
85
- # as for some identifying values it works (only) with and some it works (only) without.
77
+ # For now at least we include the path both with and without the schema type component,
78
+ # as for some identifying values, it works (only) with, and some, it works (only) without.
86
79
  # For example: If we have FileSet with "accession", an identifying property, with value
87
80
  # SMAFSFXF1RO4 then /SMAFSFXF1RO4 works but /FileSet/SMAFSFXF1RO4 does not; and
88
81
  # conversely using "submitted_id", also an identifying property, with value
@@ -90,10 +83,10 @@ class PortalObject:
90
83
  # not work but /FileSet/UW_FILE-SET_COLO-829BL_HI-C_1 does work.
91
84
  elif isinstance(identifying_value, list):
92
85
  for identifying_value_item in identifying_value:
93
- identifying_paths.append(f"/{self.schema_type}/{identifying_value_item}")
86
+ identifying_paths.append(f"/{self.type}/{identifying_value_item}")
94
87
  identifying_paths.append(f"/{identifying_value_item}")
95
88
  else:
96
- identifying_paths.append(f"/{self.schema_type}/{identifying_value}")
89
+ identifying_paths.append(f"/{self.type}/{identifying_value}")
97
90
  identifying_paths.append(f"/{identifying_value}")
98
91
  return identifying_paths
99
92
 
@@ -104,34 +97,47 @@ class PortalObject:
104
97
  return identifying_paths[0]
105
98
 
106
99
  def lookup(self, include_identifying_path: bool = False,
107
- raw: bool = False) -> Optional[Union[Tuple[dict, str], dict]]:
100
+ raw: bool = False) -> Optional[Union[Tuple[PortalObject, str], PortalObject]]:
108
101
  return self._lookup(raw=raw) if include_identifying_path else self._lookup(raw=raw)[0]
109
102
 
110
103
  def lookup_identifying_path(self) -> Optional[str]:
111
104
  return self._lookup()[1]
112
105
 
113
- def _lookup(self, raw: bool = False) -> Tuple[Optional[dict], Optional[str]]:
106
+ def _lookup(self, raw: bool = False) -> Tuple[Optional[PortalObject], Optional[str]]:
114
107
  try:
115
108
  for identifying_path in self.identifying_paths:
116
109
  if (value := self._portal.get(identifying_path, raw=raw)) and (value.status_code == 200):
117
- return value.json(), identifying_path
110
+ return PortalObject(self._portal, value.json(), self.type if raw else None), identifying_path
118
111
  except Exception:
119
112
  pass
120
113
  return None, self.identifying_path
121
114
 
122
- def compare(self, value: dict) -> dict:
115
+ def compare(self, value: Union[dict, PortalObject], consider_link_to: bool = False) -> dict:
123
116
  """
124
117
  Compares this Portal object against the given Portal object value; noting differences values of properites
125
118
  which they have in common; and properties which are in this Portal object and not in the given Portal object;
126
119
  we do NOT check the converse, i.e. properties in the given Portal object which are not in this Portal object.
127
120
  Returns a dictionary with a description of the differences.
128
121
  """
129
- return PortalObject._compare(self._data, value.data if isinstance(value, PortalObject) else value)
130
-
131
- _ARRAY_KEY_REGULAR_EXPRESSION = re.compile(r"^(#\d+)$")
122
+ def are_properties_equal(property_path: str, property_value_a: Any, property_value_b: Any) -> bool:
123
+ if property_value_a == property_value_b:
124
+ return True
125
+ nonlocal self
126
+ if (schema := self.schema) and (property_type := Schema.get_property_by_path(schema, property_path)):
127
+ if link_to := property_type.get("linkTo"):
128
+ if a := self._portal.get(f"/{link_to}/{property_value_a}", raw=True):
129
+ if (a.status_code == 200) and (a := a.json()):
130
+ if b := self._portal.get(f"/{link_to}/{property_value_b}", raw=True):
131
+ if (b.status_code == 200) and (b := b.json()):
132
+ return a == b
133
+ return False
134
+ return PortalObject._compare(self._data, value.data if isinstance(value, PortalObject) else value,
135
+ compare=are_properties_equal if consider_link_to else None)
136
+
137
+ _ARRAY_KEY_REGULAR_EXPRESSION = re.compile(rf"^({Schema._ARRAY_NAME_SUFFIX_CHAR}\d+)$")
132
138
 
133
139
  @staticmethod
134
- def _compare(a: dict, b: dict, _path: Optional[str] = None) -> dict:
140
+ def _compare(a: dict, b: dict, compare: Optional[Callable] = None, _path: Optional[str] = None) -> dict:
135
141
  def key_to_path(key: str) -> Optional[str]: # noqa
136
142
  nonlocal _path
137
143
  if match := PortalObject._ARRAY_KEY_REGULAR_EXPRESSION.search(key):
@@ -149,10 +155,11 @@ class PortalObject:
149
155
  diffs[path] = {"value": a[key], "missing_value": True}
150
156
  else:
151
157
  if isinstance(a[key], dict) and isinstance(b[key], dict):
152
- diffs.update(PortalObject._compare(a[key], b[key], _path=path))
158
+ diffs.update(PortalObject._compare(a[key], b[key], compare=compare, _path=path))
153
159
  elif isinstance(a[key], list) and isinstance(b[key], list):
154
160
  diffs.update(PortalObject._compare(list_to_dictionary(a[key]),
155
- list_to_dictionary(b[key]), _path=path))
161
+ list_to_dictionary(b[key]), compare=compare, _path=path))
156
162
  elif a[key] != b[key]:
157
- diffs[path] = {"value": a[key], "differing_value": b[key]}
163
+ if not callable(compare) or not compare(path, a[key], b[key]):
164
+ diffs[path] = {"value": a[key], "differing_value": b[key]}
158
165
  return diffs
dcicutils/schema_utils.py CHANGED
@@ -1,4 +1,6 @@
1
- from typing import Any, Dict, List
1
+ import os
2
+ from typing import Any, Dict, List, Optional, Tuple
3
+ from dcicutils.misc_utils import to_camel_case
2
4
 
3
5
 
4
6
  class JsonSchemaConstants:
@@ -183,3 +185,79 @@ def get_one_of_formats(schema: Dict[str, Any]) -> List[str]:
183
185
  for one_of_schema in get_one_of(schema)
184
186
  if get_format(one_of_schema)
185
187
  ]
188
+
189
+
190
+ class Schema:
191
+
192
+ def __init__(self, schema: dict, schema_type: Optional[str] = None) -> None:
193
+ self._data = schema if isinstance(schema, dict) else (schema.data if isinstance(schema, Schema) else {})
194
+ self._type = (isinstance(schema_type, str) and schema_type) or Schema.type_name(self._data.get("title", ""))
195
+
196
+ @property
197
+ def data(self) -> dict:
198
+ return self._data
199
+
200
+ @property
201
+ def type(self) -> str:
202
+ return self._type
203
+
204
+ @staticmethod
205
+ def type_name(value: str) -> Optional[str]: # File or other name.
206
+ if isinstance(value, str) and (value := os.path.basename(value.replace(" ", ""))):
207
+ return to_camel_case(value[0:dot] if (dot := value.rfind(".")) >= 0 else value)
208
+
209
+ def property_by_path(self, property_path: str) -> Optional[dict]:
210
+ """
211
+ TODO
212
+ """
213
+ return Schema.get_property_by_path(self._data, property_path)
214
+
215
+ _ARRAY_NAME_SUFFIX_CHAR = "#"
216
+ _DOTTED_NAME_DELIMITER_CHAR = "."
217
+
218
+ @staticmethod
219
+ def get_property_by_path(schema: dict, property_path: str) -> Optional[dict]:
220
+ if not isinstance(schema, dict) or not isinstance(property_path, str):
221
+ return None
222
+ elif not (schema_properties := schema.get("properties")):
223
+ return None
224
+ property_paths = property_path.split(Schema._DOTTED_NAME_DELIMITER_CHAR)
225
+ for property_index, property_name in enumerate(property_paths):
226
+ property_name, array_specifiers = Schema._unarrayize_property_name(property_name)
227
+ if not (property_value := schema_properties.get(property_name)):
228
+ return None
229
+ elif (property_type := property_value.get("type")) == "object":
230
+ property_paths_tail = Schema._DOTTED_NAME_DELIMITER_CHAR.join(property_paths[property_index + 1:])
231
+ return Schema.get_property_by_path(property_value, property_paths_tail)
232
+ elif (property_type := property_value.get("type")) == "array":
233
+ if not array_specifiers:
234
+ if property_index == len(property_paths) - 1:
235
+ return property_value
236
+ return None
237
+ for array_index in range(len(array_specifiers)):
238
+ if property_type != "array":
239
+ return None
240
+ elif not (array_items := property_value.get("items")):
241
+ return None
242
+ property_type = (property_value := array_items).get("type")
243
+ if property_type == "object":
244
+ if property_index == len(property_paths) - 1:
245
+ return property_value
246
+ property_paths_tail = Schema._DOTTED_NAME_DELIMITER_CHAR.join(property_paths[property_index + 1:])
247
+ return Schema.get_property_by_path(property_value, property_paths_tail)
248
+ return property_value
249
+
250
+ @staticmethod
251
+ def _unarrayize_property_name(property_name: str) -> Tuple[str, Optional[List[int]]]:
252
+ if len(components := (property_name := property_name.strip()).split(Schema._ARRAY_NAME_SUFFIX_CHAR)) < 2:
253
+ return property_name, None
254
+ unarrayized_property_name = components[0].strip()
255
+ array_specifiers = []
256
+ for component in components[1:]:
257
+ if component.isdigit():
258
+ array_specifiers.append(int(component))
259
+ elif component == "":
260
+ array_specifiers.append(0)
261
+ else:
262
+ return property_name, None
263
+ return unarrayized_property_name, array_specifiers
@@ -74,7 +74,7 @@ class StructuredDataSet:
74
74
  row_number += 1
75
75
  if (validation_errors := schema.validate(data)) is not None:
76
76
  for validation_error in validation_errors:
77
- self._note_error({"src": create_dict(type=schema.name, row=row_number),
77
+ self._note_error({"src": create_dict(type=schema.type, row=row_number),
78
78
  "error": validation_error}, "validation")
79
79
 
80
80
  @property
@@ -168,7 +168,7 @@ class StructuredDataSet:
168
168
  if not structured_row_template: # Delay creation just so we don't reference schema if there are no rows.
169
169
  if not schema and not noschema and not (schema := Schema.load_by_name(type_name, portal=self._portal)):
170
170
  noschema = True
171
- elif schema and (schema_name := schema.name):
171
+ elif schema and (schema_name := schema.type):
172
172
  type_name = schema_name
173
173
  structured_row_template = _StructuredRowTemplate(reader.header, schema)
174
174
  structured_row = structured_row_template.create_row()
@@ -222,7 +222,7 @@ class _StructuredRowTemplate:
222
222
 
223
223
  def set_value(self, data: dict, column_name: str, value: str, file: Optional[str], row_number: int = -1) -> None:
224
224
  if (set_value_function := self._set_value_functions.get(column_name)):
225
- src = create_dict(type=self._schema.name if self._schema else None,
225
+ src = create_dict(type=self._schema.type if self._schema else None,
226
226
  column=column_name, file=file, row=row_number)
227
227
  set_value_function(data, value, src)
228
228
 
@@ -319,8 +319,8 @@ class _StructuredRowTemplate:
319
319
  class Schema:
320
320
 
321
321
  def __init__(self, schema_json: dict, portal: Optional[Portal] = None) -> None:
322
- self.data = schema_json
323
- self.name = Schema.type_name(schema_json.get("title", "")) if schema_json else ""
322
+ self._data = schema_json if isinstance(schema_json, dict) else {}
323
+ self._type = Schema.type_name(schema_json.get("title", ""))
324
324
  self._portal = portal # Needed only to resolve linkTo references.
325
325
  self._map_value_functions = {
326
326
  "boolean": self._map_function_boolean,
@@ -333,6 +333,14 @@ class Schema:
333
333
  self._unresolved_refs = []
334
334
  self._typeinfo = self._create_typeinfo(schema_json)
335
335
 
336
+ @property
337
+ def data(self) -> dict:
338
+ return self._data
339
+
340
+ @property
341
+ def type(self) -> str:
342
+ return self._type
343
+
336
344
  @staticmethod
337
345
  def load_by_name(name: str, portal: Portal) -> Optional[dict]:
338
346
  schema_json = portal.get_schema(Schema.type_name(name)) if portal else None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.7.0.1b34
3
+ Version: 8.7.0.1b35
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -44,7 +44,7 @@ dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
44
44
  dcicutils/misc_utils.py,sha256=bMRWWxdbhuF3PKdCZEH-H4U1ecgT3Nag3EL92D9XGoY,100973
45
45
  dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
46
46
  dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
47
- dcicutils/portal_object_utils.py,sha256=74dGd6pvfOeWWj6JO5bNGfa0RVUwDSu9FaVdEsSIcEY,7289
47
+ dcicutils/portal_object_utils.py,sha256=udjT_-IYWInTc-oXoh0Ie_xVYp28YEg3BRIlS88eUwQ,8352
48
48
  dcicutils/portal_utils.py,sha256=jKYgZUYVdkg6VOs1hsiX4bSULLguOIBJFFRpvvZEklU,26704
49
49
  dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
50
50
  dcicutils/qa_checkers.py,sha256=cdXjeL0jCDFDLT8VR8Px78aS10hwNISOO5G_Zv2TZ6M,20534
@@ -52,22 +52,22 @@ dcicutils/qa_utils.py,sha256=TT0SiJWiuxYvbsIyhK9VO4uV_suxhB6CpuC4qPacCzQ,160208
52
52
  dcicutils/redis_tools.py,sha256=qkcSNMtvqkpvts-Cm9gWhneK523Q_oHwhNUud1be1qk,7055
53
53
  dcicutils/redis_utils.py,sha256=VJ-7g8pOZqR1ZCtdcjKz3-6as2DMUcs1b1zG6wSprH4,6462
54
54
  dcicutils/s3_utils.py,sha256=LauLFQGvZLfpBJ81tYMikjLd3SJRz2R_FrL1n4xSlyI,28868
55
- dcicutils/schema_utils.py,sha256=3Gd9QboOjQ3FHFawerylvYYU8Lor1Ma2pFv4JmezCdg,5501
55
+ dcicutils/schema_utils.py,sha256=h3VlIiBxE8EmxnfcXHF5KZhNeIZIA71LvqeIM-04gKY,9169
56
56
  dcicutils/scripts/publish_to_pypi.py,sha256=LFzNHIQK2EXFr88YcfctyA_WKEBFc1ElnSjWrCXedPM,13889
57
57
  dcicutils/scripts/run_license_checker.py,sha256=z2keYnRDZsHQbTeo1XORAXSXNJK5axVzL5LjiNqZ7jE,4184
58
58
  dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19745
59
59
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
60
60
  dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
61
61
  dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
62
- dcicutils/structured_data.py,sha256=0YYNwGJeev7xAV9sQj0ioP4m_h1gcx--Sj_dzLpO9us,34068
62
+ dcicutils/structured_data.py,sha256=VxkyBBMFoRIEoPZxQuYU1iVO3piQwSsG-W6_pKy3qXE,34225
63
63
  dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
64
64
  dcicutils/tmpfile_utils.py,sha256=n95XF8dZVbQRSXBZTGToXXfSs3JUVRyN6c3ZZ0nhAWI,1403
65
65
  dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
66
66
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
67
67
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
68
68
  dcicutils/zip_utils.py,sha256=rnjNv_k6L9jT2SjDSgVXp4BEJYLtz9XN6Cl2Fy-tqnM,2027
69
- dcicutils-8.7.0.1b34.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
70
- dcicutils-8.7.0.1b34.dist-info/METADATA,sha256=8JWV1-5Q5p5h7fjBmZhXXkpM2-2hMUEpgQPTdeZSZcA,3315
71
- dcicutils-8.7.0.1b34.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
72
- dcicutils-8.7.0.1b34.dist-info/entry_points.txt,sha256=8wbw5csMIgBXhkwfgsgJeuFcoUc0WsucUxmOyml2aoA,209
73
- dcicutils-8.7.0.1b34.dist-info/RECORD,,
69
+ dcicutils-8.7.0.1b35.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
70
+ dcicutils-8.7.0.1b35.dist-info/METADATA,sha256=v9eG3K-Qg11ZBSXujKZI_CGs6AYp8YCAp1OAe6Eywgo,3315
71
+ dcicutils-8.7.0.1b35.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
72
+ dcicutils-8.7.0.1b35.dist-info/entry_points.txt,sha256=8wbw5csMIgBXhkwfgsgJeuFcoUc0WsucUxmOyml2aoA,209
73
+ dcicutils-8.7.0.1b35.dist-info/RECORD,,