dcicutils 8.7.0.1b33__py3-none-any.whl → 8.7.0.1b35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,10 @@
1
- from dcicutils.schema_utils import get_identifying_properties
2
- from dcicutils.portal_utils import Portal
3
1
  from functools import lru_cache
4
- from typing import List, Optional, Tuple, Union
2
+ import re
3
+ from typing import Any, Callable, List, Optional, Tuple, Type, Union
4
+ from dcicutils.portal_utils import Portal
5
+ from dcicutils.schema_utils import Schema
6
+
7
+ PortalObject = Type["PortalObject"] # Forward type reference for type hints.
5
8
 
6
9
 
7
10
  class PortalObject:
@@ -17,34 +20,23 @@ class PortalObject:
17
20
 
18
21
  @property
19
22
  @lru_cache(maxsize=1)
20
- def schema(self):
21
- return self._portal.get_schema(self.schema_type)
22
-
23
- @property
24
- @lru_cache(maxsize=1)
25
- def schema_type(self):
23
+ def type(self):
26
24
  return self._type or Portal.get_schema_type(self._data)
27
25
 
28
26
  @property
29
27
  @lru_cache(maxsize=1)
30
- def schema_types(self):
28
+ def types(self):
31
29
  return self._type or Portal.get_schema_types(self._data)
32
30
 
33
31
  @property
34
32
  @lru_cache(maxsize=1)
35
- def schema_identifying_properties(self) -> list:
36
- if not (schema := self.schema):
37
- return []
38
- return get_identifying_properties(schema)
33
+ def uuid(self) -> Optional[str]:
34
+ return self._data.get("uuid") if isinstance(self._data, dict) else None
39
35
 
40
36
  @property
41
37
  @lru_cache(maxsize=1)
42
- def uuid(self) -> Optional[str]:
43
- return PortalObject.get_uuid(self._data)
44
-
45
- @staticmethod
46
- def get_uuid(portal_object: dict) -> Optional[str]:
47
- return portal_object.get("uuid") if isinstance(portal_object, dict) else None
38
+ def schema(self):
39
+ return self._portal.get_schema(self.type)
48
40
 
49
41
  @property
50
42
  @lru_cache(maxsize=1)
@@ -54,8 +46,10 @@ class PortalObject:
54
46
  Implicitly include "uuid" and "identifier" properties as identifying properties if they are actually
55
47
  properties in the object schema, and favor these (first); defavor "aliases"; no other ordering defined.
56
48
  """
49
+ if not (schema := self.schema) or not (schema_identifying_properties := schema.get("identifyingProperties")):
50
+ return []
57
51
  identifying_properties = []
58
- for identifying_property in self.schema_identifying_properties:
52
+ for identifying_property in schema_identifying_properties:
59
53
  if identifying_property not in ["uuid", "identifier", "aliases"]:
60
54
  if self._data.get(identifying_property):
61
55
  identifying_properties.append(identifying_property)
@@ -63,7 +57,7 @@ class PortalObject:
63
57
  identifying_properties.insert(0, "identifier")
64
58
  if self._data.get("uuid"):
65
59
  identifying_properties.insert(0, "uuid")
66
- if "aliases" in self.schema_identifying_properties and self._data.get("aliases"):
60
+ if "aliases" in schema_identifying_properties and self._data.get("aliases"):
67
61
  identifying_properties.append("aliases")
68
62
  return identifying_properties
69
63
 
@@ -80,8 +74,8 @@ class PortalObject:
80
74
  if (identifying_value := self._data.get(identifying_property)):
81
75
  if identifying_property == "uuid":
82
76
  identifying_paths.append(f"/{identifying_value}")
83
- # For now at least we include the path both with and without the schema type component
84
- # as for some identifying values it works (only) with and some it works (only) without.
77
+ # For now at least we include the path both with and without the schema type component,
78
+ # as for some identifying values, it works (only) with, and some, it works (only) without.
85
79
  # For example: If we have FileSet with "accession", an identifying property, with value
86
80
  # SMAFSFXF1RO4 then /SMAFSFXF1RO4 works but /FileSet/SMAFSFXF1RO4 does not; and
87
81
  # conversely using "submitted_id", also an identifying property, with value
@@ -89,10 +83,10 @@ class PortalObject:
89
83
  # not work but /FileSet/UW_FILE-SET_COLO-829BL_HI-C_1 does work.
90
84
  elif isinstance(identifying_value, list):
91
85
  for identifying_value_item in identifying_value:
92
- identifying_paths.append(f"/{self.schema_type}/{identifying_value_item}")
86
+ identifying_paths.append(f"/{self.type}/{identifying_value_item}")
93
87
  identifying_paths.append(f"/{identifying_value_item}")
94
88
  else:
95
- identifying_paths.append(f"/{self.schema_type}/{identifying_value}")
89
+ identifying_paths.append(f"/{self.type}/{identifying_value}")
96
90
  identifying_paths.append(f"/{identifying_value}")
97
91
  return identifying_paths
98
92
 
@@ -103,17 +97,69 @@ class PortalObject:
103
97
  return identifying_paths[0]
104
98
 
105
99
  def lookup(self, include_identifying_path: bool = False,
106
- raw: bool = False) -> Optional[Union[Tuple[dict, str], dict]]:
100
+ raw: bool = False) -> Optional[Union[Tuple[PortalObject, str], PortalObject]]:
107
101
  return self._lookup(raw=raw) if include_identifying_path else self._lookup(raw=raw)[0]
108
102
 
109
103
  def lookup_identifying_path(self) -> Optional[str]:
110
104
  return self._lookup()[1]
111
105
 
112
- def _lookup(self, raw: bool = False) -> Tuple[Optional[dict], Optional[str]]:
106
+ def _lookup(self, raw: bool = False) -> Tuple[Optional[PortalObject], Optional[str]]:
113
107
  try:
114
108
  for identifying_path in self.identifying_paths:
115
109
  if (value := self._portal.get(identifying_path, raw=raw)) and (value.status_code == 200):
116
- return value.json(), identifying_path
110
+ return PortalObject(self._portal, value.json(), self.type if raw else None), identifying_path
117
111
  except Exception:
118
112
  pass
119
113
  return None, self.identifying_path
114
+
115
+ def compare(self, value: Union[dict, PortalObject], consider_link_to: bool = False) -> dict:
116
+ """
117
+ Compares this Portal object against the given Portal object value; noting differences values of properites
118
+ which they have in common; and properties which are in this Portal object and not in the given Portal object;
119
+ we do NOT check the converse, i.e. properties in the given Portal object which are not in this Portal object.
120
+ Returns a dictionary with a description of the differences.
121
+ """
122
+ def are_properties_equal(property_path: str, property_value_a: Any, property_value_b: Any) -> bool:
123
+ if property_value_a == property_value_b:
124
+ return True
125
+ nonlocal self
126
+ if (schema := self.schema) and (property_type := Schema.get_property_by_path(schema, property_path)):
127
+ if link_to := property_type.get("linkTo"):
128
+ if a := self._portal.get(f"/{link_to}/{property_value_a}", raw=True):
129
+ if (a.status_code == 200) and (a := a.json()):
130
+ if b := self._portal.get(f"/{link_to}/{property_value_b}", raw=True):
131
+ if (b.status_code == 200) and (b := b.json()):
132
+ return a == b
133
+ return False
134
+ return PortalObject._compare(self._data, value.data if isinstance(value, PortalObject) else value,
135
+ compare=are_properties_equal if consider_link_to else None)
136
+
137
+ _ARRAY_KEY_REGULAR_EXPRESSION = re.compile(rf"^({Schema._ARRAY_NAME_SUFFIX_CHAR}\d+)$")
138
+
139
+ @staticmethod
140
+ def _compare(a: dict, b: dict, compare: Optional[Callable] = None, _path: Optional[str] = None) -> dict:
141
+ def key_to_path(key: str) -> Optional[str]: # noqa
142
+ nonlocal _path
143
+ if match := PortalObject._ARRAY_KEY_REGULAR_EXPRESSION.search(key):
144
+ return f"{_path}{match.group(1)}" if _path else match.group(1)
145
+ return f"{_path}.{key}" if _path else key
146
+ def list_to_dictionary(value: list) -> dict: # noqa
147
+ result = {}
148
+ for index, item in enumerate(sorted(value)): # ignore array order
149
+ result[f"#{index}"] = item
150
+ return result
151
+ diffs = {}
152
+ for key in a:
153
+ path = key_to_path(key)
154
+ if key not in b:
155
+ diffs[path] = {"value": a[key], "missing_value": True}
156
+ else:
157
+ if isinstance(a[key], dict) and isinstance(b[key], dict):
158
+ diffs.update(PortalObject._compare(a[key], b[key], compare=compare, _path=path))
159
+ elif isinstance(a[key], list) and isinstance(b[key], list):
160
+ diffs.update(PortalObject._compare(list_to_dictionary(a[key]),
161
+ list_to_dictionary(b[key]), compare=compare, _path=path))
162
+ elif a[key] != b[key]:
163
+ if not callable(compare) or not compare(path, a[key], b[key]):
164
+ diffs[path] = {"value": a[key], "differing_value": b[key]}
165
+ return diffs
dcicutils/schema_utils.py CHANGED
@@ -1,4 +1,6 @@
1
- from typing import Any, Dict, List
1
+ import os
2
+ from typing import Any, Dict, List, Optional, Tuple
3
+ from dcicutils.misc_utils import to_camel_case
2
4
 
3
5
 
4
6
  class JsonSchemaConstants:
@@ -183,3 +185,79 @@ def get_one_of_formats(schema: Dict[str, Any]) -> List[str]:
183
185
  for one_of_schema in get_one_of(schema)
184
186
  if get_format(one_of_schema)
185
187
  ]
188
+
189
+
190
+ class Schema:
191
+
192
+ def __init__(self, schema: dict, schema_type: Optional[str] = None) -> None:
193
+ self._data = schema if isinstance(schema, dict) else (schema.data if isinstance(schema, Schema) else {})
194
+ self._type = (isinstance(schema_type, str) and schema_type) or Schema.type_name(self._data.get("title", ""))
195
+
196
+ @property
197
+ def data(self) -> dict:
198
+ return self._data
199
+
200
+ @property
201
+ def type(self) -> str:
202
+ return self._type
203
+
204
+ @staticmethod
205
+ def type_name(value: str) -> Optional[str]: # File or other name.
206
+ if isinstance(value, str) and (value := os.path.basename(value.replace(" ", ""))):
207
+ return to_camel_case(value[0:dot] if (dot := value.rfind(".")) >= 0 else value)
208
+
209
+ def property_by_path(self, property_path: str) -> Optional[dict]:
210
+ """
211
+ TODO
212
+ """
213
+ return Schema.get_property_by_path(self._data, property_path)
214
+
215
+ _ARRAY_NAME_SUFFIX_CHAR = "#"
216
+ _DOTTED_NAME_DELIMITER_CHAR = "."
217
+
218
+ @staticmethod
219
+ def get_property_by_path(schema: dict, property_path: str) -> Optional[dict]:
220
+ if not isinstance(schema, dict) or not isinstance(property_path, str):
221
+ return None
222
+ elif not (schema_properties := schema.get("properties")):
223
+ return None
224
+ property_paths = property_path.split(Schema._DOTTED_NAME_DELIMITER_CHAR)
225
+ for property_index, property_name in enumerate(property_paths):
226
+ property_name, array_specifiers = Schema._unarrayize_property_name(property_name)
227
+ if not (property_value := schema_properties.get(property_name)):
228
+ return None
229
+ elif (property_type := property_value.get("type")) == "object":
230
+ property_paths_tail = Schema._DOTTED_NAME_DELIMITER_CHAR.join(property_paths[property_index + 1:])
231
+ return Schema.get_property_by_path(property_value, property_paths_tail)
232
+ elif (property_type := property_value.get("type")) == "array":
233
+ if not array_specifiers:
234
+ if property_index == len(property_paths) - 1:
235
+ return property_value
236
+ return None
237
+ for array_index in range(len(array_specifiers)):
238
+ if property_type != "array":
239
+ return None
240
+ elif not (array_items := property_value.get("items")):
241
+ return None
242
+ property_type = (property_value := array_items).get("type")
243
+ if property_type == "object":
244
+ if property_index == len(property_paths) - 1:
245
+ return property_value
246
+ property_paths_tail = Schema._DOTTED_NAME_DELIMITER_CHAR.join(property_paths[property_index + 1:])
247
+ return Schema.get_property_by_path(property_value, property_paths_tail)
248
+ return property_value
249
+
250
+ @staticmethod
251
+ def _unarrayize_property_name(property_name: str) -> Tuple[str, Optional[List[int]]]:
252
+ if len(components := (property_name := property_name.strip()).split(Schema._ARRAY_NAME_SUFFIX_CHAR)) < 2:
253
+ return property_name, None
254
+ unarrayized_property_name = components[0].strip()
255
+ array_specifiers = []
256
+ for component in components[1:]:
257
+ if component.isdigit():
258
+ array_specifiers.append(int(component))
259
+ elif component == "":
260
+ array_specifiers.append(0)
261
+ else:
262
+ return property_name, None
263
+ return unarrayized_property_name, array_specifiers
@@ -74,7 +74,7 @@ class StructuredDataSet:
74
74
  row_number += 1
75
75
  if (validation_errors := schema.validate(data)) is not None:
76
76
  for validation_error in validation_errors:
77
- self._note_error({"src": create_dict(type=schema.name, row=row_number),
77
+ self._note_error({"src": create_dict(type=schema.type, row=row_number),
78
78
  "error": validation_error}, "validation")
79
79
 
80
80
  @property
@@ -168,7 +168,7 @@ class StructuredDataSet:
168
168
  if not structured_row_template: # Delay creation just so we don't reference schema if there are no rows.
169
169
  if not schema and not noschema and not (schema := Schema.load_by_name(type_name, portal=self._portal)):
170
170
  noschema = True
171
- elif schema and (schema_name := schema.name):
171
+ elif schema and (schema_name := schema.type):
172
172
  type_name = schema_name
173
173
  structured_row_template = _StructuredRowTemplate(reader.header, schema)
174
174
  structured_row = structured_row_template.create_row()
@@ -222,7 +222,7 @@ class _StructuredRowTemplate:
222
222
 
223
223
  def set_value(self, data: dict, column_name: str, value: str, file: Optional[str], row_number: int = -1) -> None:
224
224
  if (set_value_function := self._set_value_functions.get(column_name)):
225
- src = create_dict(type=self._schema.name if self._schema else None,
225
+ src = create_dict(type=self._schema.type if self._schema else None,
226
226
  column=column_name, file=file, row=row_number)
227
227
  set_value_function(data, value, src)
228
228
 
@@ -319,8 +319,8 @@ class _StructuredRowTemplate:
319
319
  class Schema:
320
320
 
321
321
  def __init__(self, schema_json: dict, portal: Optional[Portal] = None) -> None:
322
- self.data = schema_json
323
- self.name = Schema.type_name(schema_json.get("title", "")) if schema_json else ""
322
+ self._data = schema_json if isinstance(schema_json, dict) else {}
323
+ self._type = Schema.type_name(schema_json.get("title", ""))
324
324
  self._portal = portal # Needed only to resolve linkTo references.
325
325
  self._map_value_functions = {
326
326
  "boolean": self._map_function_boolean,
@@ -333,6 +333,14 @@ class Schema:
333
333
  self._unresolved_refs = []
334
334
  self._typeinfo = self._create_typeinfo(schema_json)
335
335
 
336
+ @property
337
+ def data(self) -> dict:
338
+ return self._data
339
+
340
+ @property
341
+ def type(self) -> str:
342
+ return self._type
343
+
336
344
  @staticmethod
337
345
  def load_by_name(name: str, portal: Portal) -> Optional[dict]:
338
346
  schema_json = portal.get_schema(Schema.type_name(name)) if portal else None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.7.0.1b33
3
+ Version: 8.7.0.1b35
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -44,7 +44,7 @@ dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
44
44
  dcicutils/misc_utils.py,sha256=bMRWWxdbhuF3PKdCZEH-H4U1ecgT3Nag3EL92D9XGoY,100973
45
45
  dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
46
46
  dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
47
- dcicutils/portal_object_utils.py,sha256=io5XCdfHarFb1H2S3IyDNb25n9LPw2IZjcsiBowmeN4,5251
47
+ dcicutils/portal_object_utils.py,sha256=udjT_-IYWInTc-oXoh0Ie_xVYp28YEg3BRIlS88eUwQ,8352
48
48
  dcicutils/portal_utils.py,sha256=jKYgZUYVdkg6VOs1hsiX4bSULLguOIBJFFRpvvZEklU,26704
49
49
  dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
50
50
  dcicutils/qa_checkers.py,sha256=cdXjeL0jCDFDLT8VR8Px78aS10hwNISOO5G_Zv2TZ6M,20534
@@ -52,22 +52,22 @@ dcicutils/qa_utils.py,sha256=TT0SiJWiuxYvbsIyhK9VO4uV_suxhB6CpuC4qPacCzQ,160208
52
52
  dcicutils/redis_tools.py,sha256=qkcSNMtvqkpvts-Cm9gWhneK523Q_oHwhNUud1be1qk,7055
53
53
  dcicutils/redis_utils.py,sha256=VJ-7g8pOZqR1ZCtdcjKz3-6as2DMUcs1b1zG6wSprH4,6462
54
54
  dcicutils/s3_utils.py,sha256=LauLFQGvZLfpBJ81tYMikjLd3SJRz2R_FrL1n4xSlyI,28868
55
- dcicutils/schema_utils.py,sha256=3Gd9QboOjQ3FHFawerylvYYU8Lor1Ma2pFv4JmezCdg,5501
55
+ dcicutils/schema_utils.py,sha256=h3VlIiBxE8EmxnfcXHF5KZhNeIZIA71LvqeIM-04gKY,9169
56
56
  dcicutils/scripts/publish_to_pypi.py,sha256=LFzNHIQK2EXFr88YcfctyA_WKEBFc1ElnSjWrCXedPM,13889
57
57
  dcicutils/scripts/run_license_checker.py,sha256=z2keYnRDZsHQbTeo1XORAXSXNJK5axVzL5LjiNqZ7jE,4184
58
58
  dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19745
59
59
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
60
60
  dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
61
61
  dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
62
- dcicutils/structured_data.py,sha256=0YYNwGJeev7xAV9sQj0ioP4m_h1gcx--Sj_dzLpO9us,34068
62
+ dcicutils/structured_data.py,sha256=VxkyBBMFoRIEoPZxQuYU1iVO3piQwSsG-W6_pKy3qXE,34225
63
63
  dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
64
64
  dcicutils/tmpfile_utils.py,sha256=n95XF8dZVbQRSXBZTGToXXfSs3JUVRyN6c3ZZ0nhAWI,1403
65
65
  dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
66
66
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
67
67
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
68
68
  dcicutils/zip_utils.py,sha256=rnjNv_k6L9jT2SjDSgVXp4BEJYLtz9XN6Cl2Fy-tqnM,2027
69
- dcicutils-8.7.0.1b33.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
70
- dcicutils-8.7.0.1b33.dist-info/METADATA,sha256=oBnl8kr-p0xukoz_JLaF6utwsnX0CK79u9ykMwe8rCU,3315
71
- dcicutils-8.7.0.1b33.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
72
- dcicutils-8.7.0.1b33.dist-info/entry_points.txt,sha256=8wbw5csMIgBXhkwfgsgJeuFcoUc0WsucUxmOyml2aoA,209
73
- dcicutils-8.7.0.1b33.dist-info/RECORD,,
69
+ dcicutils-8.7.0.1b35.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
70
+ dcicutils-8.7.0.1b35.dist-info/METADATA,sha256=v9eG3K-Qg11ZBSXujKZI_CGs6AYp8YCAp1OAe6Eywgo,3315
71
+ dcicutils-8.7.0.1b35.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
72
+ dcicutils-8.7.0.1b35.dist-info/entry_points.txt,sha256=8wbw5csMIgBXhkwfgsgJeuFcoUc0WsucUxmOyml2aoA,209
73
+ dcicutils-8.7.0.1b35.dist-info/RECORD,,