dcicutils 8.8.0.1b22__py3-none-any.whl → 8.8.0.1b24__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,7 @@
1
1
  from copy import deepcopy
2
2
  from functools import lru_cache
3
3
  import re
4
- from typing import Any, List, Optional, Tuple, Type, Union
4
+ from typing import Any, Callable, List, Optional, Tuple, Type, Union
5
5
  from dcicutils.data_readers import RowReader
6
6
  from dcicutils.misc_utils import create_readonly_object
7
7
  from dcicutils.portal_utils import Portal
@@ -19,7 +19,7 @@ class PortalObject:
19
19
  self._data = data if isinstance(data, dict) else {}
20
20
  self._portal = portal if isinstance(portal, Portal) else None
21
21
  self._schema = schema if isinstance(schema, dict) else (schema.data if isinstance(schema, Schema) else None)
22
- self._type = type if isinstance(type, str) and type else None
22
+ self._type = type if isinstance(type, str) else ""
23
23
 
24
24
  @property
25
25
  def data(self) -> dict:
@@ -31,8 +31,8 @@ class PortalObject:
31
31
 
32
32
  @property
33
33
  @lru_cache(maxsize=1)
34
- def type(self) -> Optional[str]:
35
- return self._type or Portal.get_schema_type(self._data) or (Schema(self._schema).type if self._schema else None)
34
+ def type(self) -> str:
35
+ return self._type or Portal.get_schema_type(self._data) or (Schema(self._schema).type if self._schema else "")
36
36
 
37
37
  @property
38
38
  @lru_cache(maxsize=1)
@@ -75,86 +75,42 @@ class PortalObject:
75
75
  identifying_properties.append("aliases")
76
76
  return identifying_properties or None
77
77
 
78
- @property
79
- @lru_cache(maxsize=1)
80
- def identifying_paths(self) -> Optional[List[str]]:
81
- """
82
- Returns a list of the possible Portal URL paths identifying this Portal object.
83
- """
84
- identifying_paths = []
85
- if not (identifying_properties := self.identifying_properties):
86
- if self.uuid:
87
- if self.type:
88
- identifying_paths.append(f"/{self.type}/{self.uuid}")
89
- identifying_paths.append(f"/{self.uuid}")
90
- return identifying_paths
91
- for identifying_property in identifying_properties:
92
- if (identifying_value := self._data.get(identifying_property)):
93
- if identifying_property == "uuid":
94
- identifying_paths.append(f"/{self.type}/{identifying_value}")
95
- identifying_paths.append(f"/{identifying_value}")
96
- # For now at least we include the path both with and without the schema type component,
97
- # as for some identifying values, it works (only) with, and some, it works (only) without.
98
- # For example: If we have FileSet with "accession", an identifying property, with value
99
- # SMAFSFXF1RO4 then /SMAFSFXF1RO4 works but /FileSet/SMAFSFXF1RO4 does not; and
100
- # conversely using "submitted_id", also an identifying property, with value
101
- # UW_FILE-SET_COLO-829BL_HI-C_1 then /UW_FILE-SET_COLO-829BL_HI-C_1 does
102
- # not work but /FileSet/UW_FILE-SET_COLO-829BL_HI-C_1 does work.
103
- elif isinstance(identifying_value, list):
104
- for identifying_value_item in identifying_value:
105
- if self.type:
106
- identifying_paths.append(f"/{self.type}/{identifying_value_item}")
107
- identifying_paths.append(f"/{identifying_value_item}")
108
- else:
109
- if (schema := self.schema):
110
- if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
111
- if not re.match(pattern, identifying_value):
112
- # If this identifying value is for a (identifying) property which has a
113
- # pattern, and the value does NOT match the pattern, then do NOT include
114
- # this value as an identifying path, since it cannot possibly be found.
115
- continue
116
- if self.type:
117
- identifying_paths.append(f"/{self.type}/{identifying_value}")
118
- identifying_paths.append(f"/{identifying_value}")
119
- return identifying_paths or None
120
-
121
- @property
122
- @lru_cache(maxsize=1)
123
- def identifying_path(self) -> Optional[str]:
124
- if identifying_paths := self.identifying_paths:
125
- return identifying_paths[0]
126
-
127
- def lookup(self, include_identifying_path: bool = False,
128
- raw: bool = False) -> Optional[Union[Tuple[PortalObject, str], PortalObject]]:
129
- return self._lookup(raw=raw) if include_identifying_path else self._lookup(raw=raw)[0]
130
-
131
- def lookup_identifying_path(self) -> Optional[str]:
132
- return self._lookup()[1]
133
-
134
- def _lookup(self, raw: bool = False) -> Tuple[Optional[PortalObject], Optional[str]]:
78
+ @lru_cache(maxsize=8192)
79
+ def lookup(self, raw: bool = False,
80
+ ref_lookup_strategy: Optional[Callable] = None) -> Tuple[Optional[PortalObject], Optional[str], int]:
81
+ nlookups = 0
82
+ first_identifying_path = None
135
83
  try:
136
- if identifying_paths := self.identifying_paths:
84
+ if identifying_paths := self._get_identifying_paths(ref_lookup_strategy=ref_lookup_strategy):
137
85
  for identifying_path in identifying_paths:
86
+ if not first_identifying_path:
87
+ first_identifying_path = identifying_path
88
+ nlookups += 1
138
89
  if (value := self._portal.get(identifying_path, raw=raw)) and (value.status_code == 200):
139
- return PortalObject(value.json(),
140
- portal=self._portal, type=self.type if raw else None), identifying_path
90
+ return (
91
+ PortalObject(value.json(), portal=self._portal, type=self.type if raw else None),
92
+ identifying_path,
93
+ nlookups
94
+ )
141
95
  except Exception:
142
96
  pass
143
- return None, self.identifying_path
97
+ return None, first_identifying_path, nlookups
144
98
 
145
99
  def compare(self, value: Union[dict, PortalObject],
146
- consider_refs: bool = False, resolved_refs: List[dict] = None) -> dict:
100
+ consider_refs: bool = False, resolved_refs: List[dict] = None) -> Tuple[dict, int]:
147
101
  if consider_refs and isinstance(resolved_refs, list):
148
- this_data = self.normalized_refs(refs=resolved_refs).data
102
+ normlized_portal_object, nlookups = self._normalized_refs(refs=resolved_refs)
103
+ this_data = normlized_portal_object.data
149
104
  else:
150
105
  this_data = self.data
106
+ nlookups = 0
151
107
  if isinstance(value, PortalObject):
152
108
  comparing_data = value.data
153
109
  elif isinstance(value, dict):
154
110
  comparing_data = value
155
111
  else:
156
- return {}
157
- return PortalObject._compare(this_data, comparing_data)
112
+ return {}, nlookups
113
+ return PortalObject._compare(this_data, comparing_data), nlookups
158
114
 
159
115
  @staticmethod
160
116
  def _compare(a: Any, b: Any, _path: Optional[str] = None) -> dict:
@@ -201,42 +157,106 @@ class PortalObject:
201
157
  diffs[_path] = diff_updating(a, b)
202
158
  return diffs
203
159
 
204
- def normalize_refs(self, refs: List[dict]) -> None:
160
+ @lru_cache(maxsize=1)
161
+ def _get_identifying_paths(self, ref_lookup_strategy: Optional[Callable] = None) -> Optional[List[str]]:
205
162
  """
206
- Turns any (linkTo) references which are paths (e.g. /SubmissionCenter/uwsc_gcc) within
207
- this Portal object into the uuid style reference (e.g. d1b67068-300f-483f-bfe8-63d23c93801f),
208
- based on the given "refs" list which is assumed to be a list of dictionaries, where each
209
- contains a "path" and a "uuid" property; this list is typically (for our first usage of
210
- this function) the value of structured_data.StructuredDataSet.resolved_refs_with_uuid.
211
- Changes are made to this Portal object in place; use normalized_refs function to make a copy.
212
- If there are no "refs" (None or empty) or if the speicified reference is not found in this
213
- list then the references will be looked up via Portal calls (via Portal.get_metadata).
163
+ Returns a list of the possible Portal URL paths identifying this Portal object.
214
164
  """
215
- PortalObject._normalize_refs(self.data, refs=refs, schema=self.schema, portal=self.portal)
165
+ identifying_paths = []
166
+ if not (identifying_properties := self.identifying_properties):
167
+ if self.uuid:
168
+ if self.type:
169
+ identifying_paths.append(f"/{self.type}/{self.uuid}")
170
+ identifying_paths.append(f"/{self.uuid}")
171
+ return identifying_paths
172
+ for identifying_property in identifying_properties:
173
+ if identifying_value := self._data.get(identifying_property):
174
+ if identifying_property == "uuid":
175
+ if self.type:
176
+ identifying_paths.append(f"/{self.type}/{identifying_value}")
177
+ identifying_paths.append(f"/{identifying_value}")
178
+ # For now at least we include the path both with and without the schema type component,
179
+ # as for some identifying values, it works (only) with, and some, it works (only) without.
180
+ # For example: If we have FileSet with "accession", an identifying property, with value
181
+ # SMAFSFXF1RO4 then /SMAFSFXF1RO4 works but /FileSet/SMAFSFXF1RO4 does not; and
182
+ # conversely using "submitted_id", also an identifying property, with value
183
+ # UW_FILE-SET_COLO-829BL_HI-C_1 then /UW_FILE-SET_COLO-829BL_HI-C_1 does
184
+ # not work but /FileSet/UW_FILE-SET_COLO-829BL_HI-C_1 does work.
185
+ elif isinstance(identifying_value, list):
186
+ for identifying_value_item in identifying_value:
187
+ if self.type:
188
+ identifying_paths.append(f"/{self.type}/{identifying_value_item}")
189
+ identifying_paths.append(f"/{identifying_value_item}")
190
+ else:
191
+ # TODO: Import from somewhere ...
192
+ lookup_options = 0
193
+ if schema := self.schema:
194
+ # TODO: Hook into the ref_lookup_strategy thing in structured_data to make
195
+ # sure we check accession format (since it does not have a pattern).
196
+ if callable(ref_lookup_strategy):
197
+ lookup_options, ref_validator = ref_lookup_strategy(
198
+ self.type, schema, identifying_value)
199
+ if callable(ref_validator):
200
+ if ref_validator(schema, identifying_property, identifying_value) is False:
201
+ continue
202
+ if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
203
+ if not re.match(pattern, identifying_value):
204
+ # If this identifying value is for a (identifying) property which has a
205
+ # pattern, and the value does NOT match the pattern, then do NOT include
206
+ # this value as an identifying path, since it cannot possibly be found.
207
+ continue
208
+ if not lookup_options:
209
+ lookup_options = Portal.LOOKUP_DEFAULT
210
+ if Portal.is_lookup_root_first(lookup_options):
211
+ identifying_paths.append(f"/{identifying_value}")
212
+ if Portal.is_lookup_specified_type(lookup_options) and self.type:
213
+ identifying_paths.append(f"/{self.type}/{identifying_value}")
214
+ if Portal.is_lookup_root(lookup_options) and not Portal.is_lookup_root_first(lookup_options):
215
+ identifying_paths.append(f"/{identifying_value}")
216
+ if Portal.is_lookup_subtypes(lookup_options):
217
+ for subtype_name in self._portal.get_schema_subtype_names(self.type):
218
+ identifying_paths.append(f"/{subtype_name}/{identifying_value}")
219
+ return identifying_paths or None
216
220
 
217
- def normalized_refs(self, refs: List[dict]) -> PortalObject:
221
+ def _normalized_refs(self, refs: List[dict]) -> Tuple[PortalObject, int]:
218
222
  """
219
- Same as normalize_ref but does not make this change to this Portal object in place,
223
+ Same as _normalize_ref but does NOT make this change to this Portal object IN PLACE,
220
224
  rather it returns a new instance of this Portal object wrapped in a new PortalObject.
221
225
  """
222
226
  portal_object = self.copy()
223
- portal_object.normalize_refs(refs)
224
- return portal_object
227
+ nlookups = portal_object._normalize_refs(refs)
228
+ return portal_object, nlookups
229
+
230
+ def _normalize_refs(self, refs: List[dict]) -> int:
231
+ """
232
+ Turns any (linkTo) references which are paths (e.g. /SubmissionCenter/uwsc_gcc) within this
233
+ object IN PLACE into the uuid style reference (e.g. d1b67068-300f-483f-bfe8-63d23c93801f),
234
+ based on the given "refs" list which is assumed to be a list of dictionaries, where each
235
+ contains a "path" and a "uuid" property; this list is typically (for our first usage of
236
+ this function) the value of structured_data.StructuredDataSet.resolved_refs_with_uuid.
237
+ Changes are made to this Portal object IN PLACE; use _normalized_refs function to make a copy.
238
+ If there are no "refs" (None or empty) or if the speicified reference is not found in this
239
+ list then the references will be looked up via Portal calls (via Portal.get_metadata).
240
+ """
241
+ _, nlookups = PortalObject._normalize_data_refs(self.data, refs=refs, schema=self.schema, portal=self.portal)
242
+ return nlookups
225
243
 
226
244
  @staticmethod
227
- def _normalize_refs(value: Any, refs: List[dict], schema: dict, portal: Portal, _path: Optional[str] = None) -> Any:
245
+ def _normalize_data_refs(value: Any, refs: List[dict], schema: dict,
246
+ portal: Portal, _path: Optional[str] = None) -> Tuple[Any, int]:
247
+ nlookups = 0
228
248
  if not value or not isinstance(schema, dict):
229
- return value
249
+ return value, nlookups
230
250
  if isinstance(value, dict):
231
251
  for key in value:
232
252
  path = f"{_path}.{key}" if _path else key
233
- value[key] = PortalObject._normalize_refs(value[key], refs=refs,
234
- schema=schema, portal=portal, _path=path)
253
+ value[key], nlookups = PortalObject._normalize_data_refs(value[key], refs=refs,
254
+ schema=schema, portal=portal, _path=path)
235
255
  elif isinstance(value, list):
236
256
  for index in range(len(value)):
237
257
  path = f"{_path or ''}#{index}"
238
- value[index] = PortalObject._normalize_refs(value[index], refs=refs,
239
- schema=schema, portal=portal, _path=path)
258
+ value[index], nlookups = PortalObject._normalize_data_refs(value[index], refs=refs,
259
+ schema=schema, portal=portal, _path=path)
240
260
  elif value_type := Schema.get_property_by_path(schema, _path):
241
261
  if link_to := value_type.get("linkTo"):
242
262
  ref_path = f"/{link_to}/{value}"
@@ -247,7 +267,7 @@ class PortalObject:
247
267
  else:
248
268
  ref_uuid = None
249
269
  if ref_uuid:
250
- return ref_uuid
270
+ return ref_uuid, nlookups
251
271
  # Here our (linkTo) reference appears not to be in the given refs; if these refs came
252
272
  # from structured_data.StructuredDataSet.resolved_refs_with_uuid (in the context of
253
273
  # smaht-submitr, which is the typical/first use case for this function) then this could
@@ -255,6 +275,7 @@ class PortalObject:
255
275
  # the data/spreadsheet being submitted. In any case, we don't have the associated uuid
256
276
  # so let us look it up here.
257
277
  if isinstance(portal, Portal):
278
+ nlookups += 1
258
279
  if (ref_object := portal.get_metadata(ref_path)) and (ref_uuid := ref_object.get("uuid")):
259
- return ref_uuid
260
- return value
280
+ return ref_uuid, nlookups
281
+ return value, nlookups
dcicutils/portal_utils.py CHANGED
@@ -46,6 +46,22 @@ class Portal:
46
46
  KEYS_FILE_DIRECTORY = "~"
47
47
  MIME_TYPE_JSON = "application/json"
48
48
 
49
+ # Object lookup strategies; on a per-reference (type/value) basis, used currently ONLY by
50
+ # structured_data.py; controlled by an optional ref_lookup_strategy callable; default is
51
+ # lookup at root path but after the specified type path lookup, and then lookup all subtypes;
52
+ # can choose to lookup root path first, or not lookup root path at all, or not lookup
53
+ # subtypes at all; the ref_lookup_strategy callable if specified should take a type_name
54
+ # and value (string) arguements and return an integer of any of the below ORed together.
55
+ # The main purpose of this is optimization; to minimize portal lookups; since for example,
56
+ # currently at least, /{type}/{accession} does not work but /{accession} does; so we
57
+ # currently (smaht-portal/.../ingestion_processors) use LOOKUP_ROOT_FIRST for this.
58
+ # And current usage NEVER has LOOKUP_SUBTYPES turned OFF; but support just in case.
59
+ LOOKUP_SPECIFIED_TYPE = 0x0001
60
+ LOOKUP_ROOT = 0x0002
61
+ LOOKUP_ROOT_FIRST = 0x0004 | LOOKUP_ROOT
62
+ LOOKUP_SUBTYPES = 0x0008
63
+ LOOKUP_DEFAULT = LOOKUP_SPECIFIED_TYPE | LOOKUP_ROOT | LOOKUP_SUBTYPES
64
+
49
65
  def __init__(self,
50
66
  arg: Optional[Union[Portal, TestApp, VirtualApp, PyramidRouter, dict, tuple, str]] = None,
51
67
  env: Optional[str] = None, server: Optional[str] = None,
@@ -188,9 +204,27 @@ class Portal:
188
204
  def vapp(self) -> Optional[TestApp]:
189
205
  return self._vapp
190
206
 
207
+ @staticmethod
208
+ def is_lookup_specified_type(lookup_options: int) -> bool:
209
+ return (lookup_options &
210
+ Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
211
+
212
+ @staticmethod
213
+ def is_lookup_root(lookup_options: int) -> bool:
214
+ return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
215
+
216
+ @staticmethod
217
+ def is_lookup_root_first(lookup_options: int) -> bool:
218
+ return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
219
+
220
+ @staticmethod
221
+ def is_lookup_subtypes(lookup_options: int) -> bool:
222
+ return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
223
+
191
224
  def get(self, url: str, follow: bool = True,
192
225
  raw: bool = False, database: bool = False, raise_for_status: bool = False, **kwargs) -> OptionalResponse:
193
226
  url = self.url(url, raw, database)
227
+ # print(f'xyzzy.portal.get({url})')
194
228
  if not self.vapp:
195
229
  response = requests.get(url, allow_redirects=follow, **self._kwargs(**kwargs))
196
230
  else:
@@ -205,6 +239,7 @@ class Portal:
205
239
  def patch(self, url: str, data: Optional[dict] = None, json: Optional[dict] = None,
206
240
  raise_for_status: bool = False, **kwargs) -> OptionalResponse:
207
241
  url = self.url(url)
242
+ # print(f'xyzzy.portal.patch({url})')
208
243
  if not self.vapp:
209
244
  response = requests.patch(url, data=data, json=json, **self._kwargs(**kwargs))
210
245
  else:
@@ -217,6 +252,7 @@ class Portal:
217
252
  def post(self, url: str, data: Optional[dict] = None, json: Optional[dict] = None, files: Optional[dict] = None,
218
253
  raise_for_status: bool = False, **kwargs) -> OptionalResponse:
219
254
  url = self.url(url)
255
+ # print(f'xyzzy.portal.post({url})')
220
256
  if files and not ("headers" in kwargs):
221
257
  # Setting headers to None when using files implies content-type multipart/form-data.
222
258
  kwargs["headers"] = None
@@ -233,6 +269,7 @@ class Portal:
233
269
  return response
234
270
 
235
271
  def get_metadata(self, object_id: str, raw: bool = False, database: bool = False) -> Optional[dict]:
272
+ # print(f'xyzzy.portal.get_metadata({object_id})')
236
273
  if isinstance(raw, bool) and raw:
237
274
  add_on = "frame=raw" + ("&datastore=database" if isinstance(database, bool) and database else "")
238
275
  elif database:
@@ -242,11 +279,13 @@ class Portal:
242
279
  return get_metadata(obj_id=object_id, vapp=self.vapp, key=self.key, add_on=add_on)
243
280
 
244
281
  def patch_metadata(self, object_id: str, data: dict) -> Optional[dict]:
282
+ # print(f'xyzzy.portal.patch_metadata({object_id})')
245
283
  if self.key:
246
284
  return patch_metadata(obj_id=object_id, patch_item=data, key=self.key)
247
285
  return self.patch(f"/{object_id}", data).json()
248
286
 
249
287
  def post_metadata(self, object_type: str, data: dict) -> Optional[dict]:
288
+ # print(f'xyzzy.portal.post_metadata({object_id})')
250
289
  if self.key:
251
290
  return post_metadata(schema_name=object_type, post_item=data, key=self.key)
252
291
  return self.post(f"/{object_type}", data).json()
@@ -358,6 +397,12 @@ class Portal:
358
397
  super_type_map_flattened[super_type_name] = list_breadth_first(super_type_map, super_type_name)
359
398
  return super_type_map_flattened
360
399
 
400
+ @lru_cache(maxsize=64)
401
+ def get_schema_subtype_names(self, type_name: str) -> List[str]:
402
+ if not (schemas_super_type_map := self.get_schemas_super_type_map()):
403
+ return []
404
+ return schemas_super_type_map.get(type_name, [])
405
+
361
406
  def url(self, url: str, raw: bool = False, database: bool = False) -> str:
362
407
  if not isinstance(url, str) or not url:
363
408
  return "/"
@@ -48,22 +48,6 @@ StructuredDataSet = Type["StructuredDataSet"]
48
48
 
49
49
  class StructuredDataSet:
50
50
 
51
- # Reference (linkTo) lookup strategies; on a per-reference (type/value) basis;
52
- # controlled by optional ref_lookup_strategy callable; default is lookup at root path
53
- # but after the named reference (linkTo) type path lookup, and then lookup all subtypes;
54
- # can choose to lookup root path first, or not lookup root path at all, or not lookup
55
- # subtypes at all; the ref_lookup_strategy callable if specified should take a type_name
56
- # and value (string) arguements and return an integer of any of the below ORed together.
57
- # The main purpose of this is optimization; to minimize portal lookups; since for example,
58
- # currently at least, /{type}/{accession} does not work but /{accession} does; so we
59
- # currently (smaht-portal/.../ingestion_processors) use REF_LOOKUP_ROOT_FIRST for this.
60
- # And current usage NEVER has REF_LOOKUP_SUBTYPES turned OFF; but support just in case.
61
- REF_LOOKUP_SPECIFIED_TYPE = 0x0001
62
- REF_LOOKUP_ROOT = 0x0002
63
- REF_LOOKUP_ROOT_FIRST = 0x0004 | REF_LOOKUP_ROOT
64
- REF_LOOKUP_SUBTYPES = 0x0008
65
- REF_LOOKUP_DEFAULT = REF_LOOKUP_SPECIFIED_TYPE | REF_LOOKUP_ROOT | REF_LOOKUP_SUBTYPES
66
-
67
51
  def __init__(self, file: Optional[str] = None, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None,
68
52
  schemas: Optional[List[dict]] = None, autoadd: Optional[dict] = None,
69
53
  order: Optional[List[str]] = None, prune: bool = True,
@@ -76,6 +60,7 @@ class StructuredDataSet:
76
60
  self._portal = Portal(portal, data=self._data, schemas=schemas,
77
61
  ref_lookup_strategy=ref_lookup_strategy,
78
62
  ref_lookup_nocache=ref_lookup_nocache) if portal else None
63
+ self._ref_lookup_strategy = ref_lookup_strategy
79
64
  self._order = order
80
65
  self._prune = prune
81
66
  self._warnings = {}
@@ -199,25 +184,49 @@ class StructuredDataSet:
199
184
  upload_file["path"] = file_path
200
185
  return upload_files
201
186
 
202
- def compare(self) -> dict:
187
+ def compare(self, progress: Optional[Callable] = None) -> dict:
188
+ def get_counts() -> int:
189
+ ntypes = 0
190
+ nobjects = 0
191
+ if self.data:
192
+ ntypes = len(self.data)
193
+ for type_name in self.data:
194
+ nobjects += len(self.data[type_name])
195
+ return ntypes, nobjects
203
196
  diffs = {}
204
- if self.data or self.portal:
197
+ if callable(progress):
198
+ ntypes, nobjects = get_counts()
199
+ progress({"start": True, "types": ntypes, "objects": nobjects})
200
+ if self.data or self.portal: # TODO: what is this OR biz?
205
201
  refs = self.resolved_refs_with_uuids
206
- for object_type in self.data:
207
- if not diffs.get(object_type):
208
- diffs[object_type] = []
209
- for portal_object in self.data[object_type]:
210
- portal_object = PortalObject(portal_object, portal=self.portal, type=object_type)
211
- existing_object, identifying_path = portal_object.lookup(include_identifying_path=True, raw=True)
202
+ # TODO: Need feedback/progress tracking mechanism here.
203
+ # TODO: Check validity of reference; actually check that earlier on even maybe.
204
+ for type_name in self.data:
205
+ if not diffs.get(type_name):
206
+ diffs[type_name] = []
207
+ for portal_object in self.data[type_name]:
208
+ portal_object = PortalObject(portal_object, portal=self.portal, type=type_name)
209
+ existing_object, identifying_path, nlookups = (
210
+ portal_object.lookup(raw=True, ref_lookup_strategy=self._ref_lookup_strategy))
212
211
  if existing_object:
213
- object_diffs = portal_object.compare(existing_object, consider_refs=True, resolved_refs=refs)
214
- diffs[object_type].append(create_readonly_object(path=identifying_path,
215
- uuid=existing_object.uuid,
216
- diffs=object_diffs or None))
212
+ object_diffs, nlookups_compare = portal_object.compare(
213
+ existing_object, consider_refs=True, resolved_refs=refs)
214
+ diffs[type_name].append(create_readonly_object(path=identifying_path,
215
+ uuid=existing_object.uuid,
216
+ diffs=object_diffs or None))
217
+ if callable(progress):
218
+ progress({"update": True, "lookups": nlookups + nlookups_compare})
217
219
  elif identifying_path:
218
220
  # If there is no existing object we still create a record for this object
219
221
  # but with no uuid which will be the indication that it does not exist.
220
- diffs[object_type].append(create_readonly_object(path=identifying_path, uuid=None, diffs=None))
222
+ diffs[type_name].append(create_readonly_object(path=identifying_path, uuid=None, diffs=None))
223
+ if callable(progress):
224
+ progress({"create": True, "lookups": nlookups})
225
+ else:
226
+ if callable(progress):
227
+ progress({"lookups": nlookups})
228
+ if callable(progress):
229
+ progress({"finish": True})
221
230
  return diffs
222
231
 
223
232
  def _load_file(self, file: str) -> None:
@@ -251,7 +260,7 @@ class StructuredDataSet:
251
260
  self._load_reader(CsvReader(file), type_name=Schema.type_name(file))
252
261
 
253
262
  def _load_excel_file(self, file: str) -> None:
254
- def calculate_total_rows_to_process() -> Tuple[int, int]:
263
+ def get_counts() -> Tuple[int, int]:
255
264
  nonlocal file
256
265
  excel = Excel(file)
257
266
  nrows = 0
@@ -260,18 +269,27 @@ class StructuredDataSet:
260
269
  nrows += 1
261
270
  return nrows, len(excel.sheet_names)
262
271
  if self._progress:
263
- self._progress_update(calculate_total_rows_to_process)
272
+ nrows, nsheets = get_counts()
273
+ self._progress({"start": True, "sheets": nsheets, "rows": nrows})
274
+ """
275
+ if self._progress:
276
+ self._progress_update(get_counts)
277
+ """
264
278
  excel = Excel(file) # Order the sheet names by any specified ordering (e.g. ala snovault.loadxl).
265
279
  order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {}
266
280
  for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)):
267
281
  self._load_reader(excel.sheet_reader(sheet_name), type_name=Schema.type_name(sheet_name))
282
+ if self._progress:
283
+ self._progress({"finish": True})
284
+ # TODO: Do we really need progress reporting for the below?
268
285
  # Check for unresolved reference errors which really are not because of ordering.
269
286
  # Yes such internal references will be handled correctly on actual database update via snovault.loadxl.
270
287
  if ref_errors := self.ref_errors:
271
288
  ref_errors_actual = []
272
289
  for ref_error in ref_errors:
273
290
  if not (resolved := self.portal.ref_exists(ref := ref_error["error"])):
274
- # if not (resolved := self.portal.ref_exists_internally(ref := ref_error["error"])): # TODO
291
+ # TODO: Probably do this instead; and if so then no progress needed (per question above).
292
+ # if not (resolved := self.portal.ref_exists_internally(ref := ref_error["error"])):
275
293
  ref_errors_actual.append(ref_error)
276
294
  else:
277
295
  self._resolved_refs.add((ref, resolved.get("uuid")))
@@ -304,13 +322,15 @@ class StructuredDataSet:
304
322
  self._add_properties(structured_row, self._autoadd_properties, schema)
305
323
  self._add(type_name, structured_row)
306
324
  if self._progress:
307
- self._progress_update(-1,
308
- self.ref_total_count,
309
- self.ref_total_found_count,
310
- self.ref_total_notfound_count,
311
- self.ref_lookup_count,
312
- self.ref_lookup_cache_hit_count,
313
- self.ref_invalid_identifying_property_count)
325
+ self._progress({
326
+ "parse": True,
327
+ "refs": self.ref_total_count,
328
+ "refs_found": self.ref_total_found_count,
329
+ "refs_not_found": self.ref_total_notfound_count,
330
+ "refs_lookup": self.ref_lookup_count,
331
+ "refs_cache_hit": self.ref_lookup_cache_hit_count,
332
+ "refs_invalid": self.ref_invalid_identifying_property_count
333
+ })
314
334
  self._note_warning(reader.warnings, "reader")
315
335
  if schema:
316
336
  self._note_error(schema._unresolved_refs, "ref")
@@ -331,16 +351,16 @@ class StructuredDataSet:
331
351
 
332
352
  def _is_ref_lookup_specified_type(ref_lookup_flags: int) -> bool:
333
353
  return (ref_lookup_flags &
334
- StructuredDataSet.REF_LOOKUP_SPECIFIED_TYPE) == StructuredDataSet.REF_LOOKUP_SPECIFIED_TYPE
354
+ Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
335
355
 
336
356
  def _is_ref_lookup_root(ref_lookup_flags: int) -> bool:
337
- return (ref_lookup_flags & StructuredDataSet.REF_LOOKUP_ROOT) == StructuredDataSet.REF_LOOKUP_ROOT
357
+ return (ref_lookup_flags & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
338
358
 
339
359
  def _is_ref_lookup_root_first(ref_lookup_flags: int) -> bool:
340
- return (ref_lookup_flags & StructuredDataSet.REF_LOOKUP_ROOT_FIRST) == StructuredDataSet.REF_LOOKUP_ROOT_FIRST
360
+ return (ref_lookup_flags & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
341
361
 
342
362
  def _is_ref_lookup_subtypes(ref_lookup_flags: int) -> bool:
343
- return (ref_lookup_flags & StructuredDataSet.REF_LOOKUP_SUBTYPES) == StructuredDataSet.REF_LOOKUP_SUBTYPES
363
+ return (ref_lookup_flags & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
344
364
 
345
365
  @property
346
366
  def ref_total_count(self) -> int:
@@ -786,7 +806,7 @@ class Portal(PortalBase):
786
806
  if callable(ref_lookup_strategy):
787
807
  self._ref_lookup_strategy = ref_lookup_strategy
788
808
  else:
789
- self._ref_lookup_strategy = lambda type_name, schema, value: (StructuredDataSet.REF_LOOKUP_DEFAULT, None)
809
+ self._ref_lookup_strategy = lambda type_name, schema, value: (Portal.LOOKUP_DEFAULT, None)
790
810
  if ref_lookup_nocache is True:
791
811
  self.ref_lookup = self.ref_lookup_uncached
792
812
  self._ref_cache = None
@@ -844,7 +864,7 @@ class Portal(PortalBase):
844
864
  return schemas
845
865
 
846
866
  @lru_cache(maxsize=64)
847
- def _get_schema_subtypes_names(self, type_name: str) -> List[str]:
867
+ def _get_schema_subtype_names(self, type_name: str) -> List[str]:
848
868
  if not (schemas_super_type_map := self.get_schemas_super_type_map()):
849
869
  return []
850
870
  return schemas_super_type_map.get(type_name, [])
@@ -907,7 +927,7 @@ class Portal(PortalBase):
907
927
  lookup_paths.append(f"/{type_name}/{value}")
908
928
  if is_ref_lookup_root and not is_ref_lookup_root_first:
909
929
  lookup_paths.append(f"/{value}")
910
- subtype_names = self._get_schema_subtypes_names(type_name) if is_ref_lookup_subtypes else []
930
+ subtype_names = self._get_schema_subtype_names(type_name) if is_ref_lookup_subtypes else []
911
931
  for subtype_name in subtype_names:
912
932
  lookup_paths.append(f"/{subtype_name}/{value}")
913
933
  if not lookup_paths:
@@ -946,7 +966,7 @@ class Portal(PortalBase):
946
966
  ref_lookup_strategy, ref_validator = (
947
967
  self._ref_lookup_strategy(type_name, self.get_schema(type_name), value))
948
968
  is_ref_lookup_subtypes = StructuredDataSet._is_ref_lookup_subtypes(ref_lookup_strategy)
949
- subtype_names = self._get_schema_subtypes_names(type_name) if is_ref_lookup_subtypes else []
969
+ subtype_names = self._get_schema_subtype_names(type_name) if is_ref_lookup_subtypes else []
950
970
  for type_name in [type_name] + subtype_names:
951
971
  is_resolved, resolved_item = self._ref_exists_single_internally(type_name, value)
952
972
  if is_resolved:
@@ -1008,7 +1028,7 @@ class Portal(PortalBase):
1008
1028
  if not is_uuid(property_value):
1009
1029
  return False
1010
1030
  return True
1011
- for schema_name in [type_name] + self._get_schema_subtypes_names(type_name):
1031
+ for schema_name in [type_name] + self._get_schema_subtype_names(type_name):
1012
1032
  if schema := self.get_schema(schema_name):
1013
1033
  if identifying_properties := schema.get("identifyingProperties"):
1014
1034
  for identifying_property in identifying_properties:
@@ -1033,7 +1053,7 @@ class Portal(PortalBase):
1033
1053
 
1034
1054
  def _cache_ref(self, type_name: str, value: str, resolved: List[str]) -> None:
1035
1055
  if self._ref_cache is not None:
1036
- subtype_names = self._get_schema_subtypes_names(type_name)
1056
+ subtype_names = self._get_schema_subtype_names(type_name)
1037
1057
  for type_name in [type_name] + subtype_names:
1038
1058
  self._ref_cache[f"/{type_name}/{value}"] = resolved
1039
1059
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.8.0.1b22
3
+ Version: 8.8.0.1b24
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -46,8 +46,8 @@ dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
46
46
  dcicutils/misc_utils.py,sha256=zVc4urdVGgnWjQ4UQlrGH-URAzr2l_PwZWI3u_GJdFE,102210
47
47
  dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
48
48
  dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
49
- dcicutils/portal_object_utils.py,sha256=khp7E0nb-aXuZWkL8_siftSlYCTOMHAd_2xHStM-maY,13613
50
- dcicutils/portal_utils.py,sha256=OR1wJNiZVEsxcweArA4-K6yiuy7_bCqawxhZnuFsUtM,27686
49
+ dcicutils/portal_object_utils.py,sha256=7gteQ5CM6IVDfHx-UPFiOfeE1fJYOir_uwWdRTykExQ,15374
50
+ dcicutils/portal_utils.py,sha256=trM8L9J1CPXntdsKSL56hy7SMpftNNpIReAn5iihGes,30050
51
51
  dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
52
52
  dcicutils/qa_checkers.py,sha256=cdXjeL0jCDFDLT8VR8Px78aS10hwNISOO5G_Zv2TZ6M,20534
53
53
  dcicutils/qa_utils.py,sha256=TT0SiJWiuxYvbsIyhK9VO4uV_suxhB6CpuC4qPacCzQ,160208
@@ -62,15 +62,15 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
62
62
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
63
63
  dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
64
64
  dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
65
- dcicutils/structured_data.py,sha256=Yrpv1EgCOhwSaNk2ZMuzbgyxpmAVXpuJ6_4g4LMC2Q8,57180
65
+ dcicutils/structured_data.py,sha256=aXyLqYlgp5DInGiWw6WZk08vfvKI0IW2eWW57n0zhz0,57549
66
66
  dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
67
67
  dcicutils/tmpfile_utils.py,sha256=n95XF8dZVbQRSXBZTGToXXfSs3JUVRyN6c3ZZ0nhAWI,1403
68
68
  dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
69
69
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
70
70
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
71
71
  dcicutils/zip_utils.py,sha256=rnjNv_k6L9jT2SjDSgVXp4BEJYLtz9XN6Cl2Fy-tqnM,2027
72
- dcicutils-8.8.0.1b22.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
73
- dcicutils-8.8.0.1b22.dist-info/METADATA,sha256=LKPWDLxkixPVLiNnypRH46KEEcwkr_beWBf6hEwz22E,3357
74
- dcicutils-8.8.0.1b22.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
75
- dcicutils-8.8.0.1b22.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
76
- dcicutils-8.8.0.1b22.dist-info/RECORD,,
72
+ dcicutils-8.8.0.1b24.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
73
+ dcicutils-8.8.0.1b24.dist-info/METADATA,sha256=z5fS6VG0wLUCd3NTEvsMF5rdWwLg7TmWlTs1Syg-7Sk,3357
74
+ dcicutils-8.8.0.1b24.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
75
+ dcicutils-8.8.0.1b24.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
76
+ dcicutils-8.8.0.1b24.dist-info/RECORD,,