dcicutils 8.8.6__py3-none-any.whl → 8.8.6.1b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dcicutils/portal_utils.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from collections import deque
2
2
  from functools import lru_cache
3
+ from dcicutils.function_cache_decorator import function_cache
3
4
  import io
4
5
  import json
5
6
  from pyramid.config import Configurator as PyramidConfigurator
@@ -18,6 +19,7 @@ from wsgiref.simple_server import make_server as wsgi_make_server
18
19
  from dcicutils.common import APP_SMAHT, OrchestratedApp, ORCHESTRATED_APPS
19
20
  from dcicutils.ff_utils import get_metadata, get_schema, patch_metadata, post_metadata
20
21
  from dcicutils.misc_utils import to_camel_case, VirtualApp
22
+ from dcicutils.schema_utils import get_identifying_properties
21
23
  from dcicutils.tmpfile_utils import temporary_file
22
24
 
23
25
  Portal = Type["Portal"] # Forward type reference for type hints.
@@ -416,6 +418,54 @@ class Portal:
416
418
  return []
417
419
  return schemas_super_type_map.get(type_name, [])
418
420
 
421
+ @function_cache(maxsize=100, serialize_key=True)
422
+ def get_identifying_paths(self, portal_object: dict, portal_type: Optional[str] = None) -> List[str]:
423
+ """
424
+ Returns the list of the identifying Portal (URL) paths for the given Portal object. Favors any
425
+ uuid based path and defavors aliases based paths (ala self.get_identifying_property_names);
426
+ no other ordering defined. Returns empty list of none or otherwise not found.
427
+ """
428
+ results = []
429
+ if not isinstance(portal_object, dict):
430
+ return results
431
+ if not isinstance(portal_type, str) or not portal_type:
432
+ if not (portal_type := self.get_schema_type(portal_object)):
433
+ return results
434
+ for identifying_property in self.get_identifying_property_names(portal_type):
435
+ if identifying_value := portal_object.get(identifying_property):
436
+ if isinstance(identifying_value, list):
437
+ for identifying_value_item in identifying_value:
438
+ results.append(f"/{portal_type}/{identifying_value_item}")
439
+ elif identifying_property == "uuid":
440
+ results.append(f"/{identifying_value}")
441
+ else:
442
+ results.append(f"/{portal_type}/{identifying_value}")
443
+ return results
444
+
445
+ @function_cache(maxsize=100, serialize_key=True)
446
+ def get_identifying_property_names(self, schema: Union[str, dict]) -> List[str]:
447
+ """
448
+ Returns the list of identifying property names for the given Portal schema, which may
449
+ be either a schema name or a schema object; empty list of none or otherwise not found.
450
+ """
451
+ results = []
452
+ if isinstance(schema, str):
453
+ try:
454
+ if not (schema := self.get_schema(schema)):
455
+ return results
456
+ except Exception:
457
+ return results
458
+ elif not isinstance(schema, dict):
459
+ return results
460
+ if not (identifying_properties := get_identifying_properties(schema)):
461
+ return results
462
+ identifying_properties = [*identifying_properties]
463
+ for favored_identifying_property in reversed(["uuid", "identifier"]):
464
+ if favored_identifying_property in identifying_properties:
465
+ identifying_properties.remove(favored_identifying_property)
466
+ identifying_properties.insert(0, favored_identifying_property)
467
+ return identifying_properties
468
+
419
469
  def url(self, url: str, raw: bool = False, database: bool = False) -> str:
420
470
  if not isinstance(url, str) or not url:
421
471
  return "/"
@@ -516,6 +566,22 @@ class Portal:
516
566
  response = TestResponseWrapper(response)
517
567
  return response
518
568
 
569
+ @staticmethod
570
+ def _create_vapp(arg: Union[TestApp, VirtualApp, PyramidRouter, str] = None) -> TestApp:
571
+ if isinstance(arg, TestApp):
572
+ return arg
573
+ elif isinstance(arg, VirtualApp):
574
+ if not isinstance(arg.wrapped_app, TestApp):
575
+ raise Exception("Portal._create_vapp VirtualApp argument error.")
576
+ return arg.wrapped_app
577
+ if isinstance(arg, PyramidRouter):
578
+ router = arg
579
+ elif isinstance(arg, str) or not arg:
580
+ router = pyramid_get_app(arg or "development.ini", "app")
581
+ else:
582
+ raise Exception("Portal._create_vapp argument error.")
583
+ return TestApp(router, {"HTTP_ACCEPT": Portal.MIME_TYPE_JSON, "REMOTE_USER": "TEST"})
584
+
519
585
  @staticmethod
520
586
  def create_for_testing(arg: Optional[Union[str, bool, List[dict], dict, Callable]] = None) -> Portal:
521
587
  if isinstance(arg, list) or isinstance(arg, dict) or isinstance(arg, Callable):
@@ -547,22 +613,6 @@ class Portal:
547
613
  with temporary_file(content=minimal_ini_for_testing, suffix=".ini") as ini_file:
548
614
  return Portal(ini_file)
549
615
 
550
- @staticmethod
551
- def _create_vapp(arg: Union[TestApp, VirtualApp, PyramidRouter, str] = None) -> TestApp:
552
- if isinstance(arg, TestApp):
553
- return arg
554
- elif isinstance(arg, VirtualApp):
555
- if not isinstance(arg.wrapped_app, TestApp):
556
- raise Exception("Portal._create_vapp VirtualApp argument error.")
557
- return arg.wrapped_app
558
- if isinstance(arg, PyramidRouter):
559
- router = arg
560
- elif isinstance(arg, str) or not arg:
561
- router = pyramid_get_app(arg or "development.ini", "app")
562
- else:
563
- raise Exception("Portal._create_vapp argument error.")
564
- return TestApp(router, {"HTTP_ACCEPT": Portal.MIME_TYPE_JSON, "REMOTE_USER": "TEST"})
565
-
566
616
  @staticmethod
567
617
  def _create_router_for_testing(endpoints: Optional[List[Dict[str, Union[str, Callable]]]] = None) -> PyramidRouter:
568
618
  if isinstance(endpoints, dict):
@@ -56,7 +56,7 @@ class StructuredDataSet:
56
56
  remove_empty_objects_from_lists: bool = True,
57
57
  ref_lookup_strategy: Optional[Callable] = None,
58
58
  ref_lookup_nocache: bool = False,
59
- norefs: bool = False,
59
+ norefs: bool = False, merge: bool = False,
60
60
  progress: Optional[Callable] = None,
61
61
  debug_sleep: Optional[str] = None) -> None:
62
62
  self._progress = progress if callable(progress) else None
@@ -75,6 +75,7 @@ class StructuredDataSet:
75
75
  self._nrows = 0
76
76
  self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None
77
77
  self._norefs = True if norefs is True else False
78
+ self._merge = True if merge is True else False
78
79
  self._debug_sleep = None
79
80
  if debug_sleep:
80
81
  try:
@@ -98,13 +99,13 @@ class StructuredDataSet:
98
99
  remove_empty_objects_from_lists: bool = True,
99
100
  ref_lookup_strategy: Optional[Callable] = None,
100
101
  ref_lookup_nocache: bool = False,
101
- norefs: bool = False,
102
+ norefs: bool = False, merge: bool = False,
102
103
  progress: Optional[Callable] = None,
103
104
  debug_sleep: Optional[str] = None) -> StructuredDataSet:
104
105
  return StructuredDataSet(file=file, portal=portal, schemas=schemas, autoadd=autoadd, order=order, prune=prune,
105
106
  remove_empty_objects_from_lists=remove_empty_objects_from_lists,
106
107
  ref_lookup_strategy=ref_lookup_strategy, ref_lookup_nocache=ref_lookup_nocache,
107
- norefs=norefs, progress=progress, debug_sleep=debug_sleep)
108
+ norefs=norefs, merge=merge, progress=progress, debug_sleep=debug_sleep)
108
109
 
109
110
  def validate(self, force: bool = False) -> None:
110
111
  def data_without_deleted_properties(data: dict) -> dict:
@@ -350,18 +351,23 @@ class StructuredDataSet:
350
351
 
351
352
  def _load_json_file(self, file: str) -> None:
352
353
  with open(file) as f:
353
- file_json = json.load(f)
354
- schema_inferred_from_file_name = Schema.type_name(file)
355
- if self._portal.get_schema(schema_inferred_from_file_name) is not None:
354
+ item = json.load(f)
355
+ if ((schema_name_inferred_from_file_name := Schema.type_name(file)) and
356
+ (self._portal.get_schema(schema_name_inferred_from_file_name) is not None)): # noqa
356
357
  # If the JSON file name looks like a schema name then assume it
357
358
  # contains an object or an array of object of that schema type.
358
- self._add(Schema.type_name(file), file_json)
359
- elif isinstance(file_json, dict):
359
+ if self._merge:
360
+ item = self._merge_with_existing_portal_object(item, schema_name_inferred_from_file_name)
361
+ self._add(Schema.type_name(file), item)
362
+ elif isinstance(item, dict):
360
363
  # Otherwise if the JSON file name does not look like a schema name then
361
364
  # assume it a dictionary where each property is the name of a schema, and
362
365
  # which (each property) contains a list of object of that schema type.
363
- for schema_name in file_json:
364
- self._add(schema_name, file_json[schema_name])
366
+ for schema_name in item:
367
+ item = item[schema_name]
368
+ if self._merge:
369
+ item = self._merge_with_existing_portal_object(item, schema_name)
370
+ self._add(schema_name, item)
365
371
 
366
372
  def _load_reader(self, reader: RowReader, type_name: str) -> None:
367
373
  schema = None
@@ -383,11 +389,14 @@ class StructuredDataSet:
383
389
  structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number)
384
390
  if self._autoadd_properties:
385
391
  self._add_properties(structured_row, self._autoadd_properties, schema)
392
+ # New merge functionality (2024-05-25).
393
+ if self._merge:
394
+ structured_row = self._merge_with_existing_portal_object(structured_row, schema_name)
386
395
  if (prune_error := self._prune_structured_row(structured_row)) is not None:
387
396
  self._note_error({"src": create_dict(type=schema_name, row=reader.row_number),
388
397
  "error": prune_error}, "validation")
389
398
  else:
390
- self._add(type_name, structured_row)
399
+ self._add(type_name, structured_row) # TODO: why type_name and not schema_name?
391
400
  if self._progress:
392
401
  self._progress({
393
402
  PROGRESS.LOAD_ITEM: self._nrows,
@@ -428,6 +437,18 @@ class StructuredDataSet:
428
437
  if name not in structured_row and (not schema or schema.data.get("properties", {}).get(name)):
429
438
  structured_row[name] = properties[name]
430
439
 
440
+ def _merge_with_existing_portal_object(self, portal_object: dict, portal_type: str) -> dict:
441
+ """
442
+ Given a Portal object (presumably/in-practice from the given metadata), if there is
443
+ an existing Portal item, identified by the identifying properties for the given object,
444
+ then merges the given object into the existing one and returns the result; otherwise
445
+ just returns the given object. Note that the given object may be CHANGED in place.
446
+ """
447
+ for identifying_path in self._portal.get_identifying_paths(portal_object, portal_type):
448
+ if existing_portal_object := self._portal.get_metadata(identifying_path, raw=True):
449
+ return merge_objects(existing_portal_object, portal_object)
450
+ return portal_object
451
+
431
452
  def _is_ref_lookup_specified_type(ref_lookup_flags: int) -> bool:
432
453
  return (ref_lookup_flags &
433
454
  Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.8.6
3
+ Version: 8.8.6.1b2
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -48,7 +48,7 @@ dcicutils/misc_utils.py,sha256=zHwsxxEn24muLBP7mDvMa8I9VdMejwW8HMuCL5xbhhw,10769
48
48
  dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
49
49
  dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
50
50
  dcicutils/portal_object_utils.py,sha256=gDXRgPsRvqCFwbC8WatsuflAxNiigOnqr0Hi93k3AgE,15422
51
- dcicutils/portal_utils.py,sha256=DYyE5o15GekDgzpJWas9iS7klAYbjJZUPW0G42McArk,30779
51
+ dcicutils/portal_utils.py,sha256=54e0utkLQxQv2_bD37P1ZGeyG63b2W7nCte6KT9eCY0,33402
52
52
  dcicutils/progress_bar.py,sha256=UT7lxb-rVF_gp4yjY2Tg4eun1naaH__hB4_v3O85bcE,19468
53
53
  dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
54
54
  dcicutils/qa_checkers.py,sha256=cdXjeL0jCDFDLT8VR8Px78aS10hwNISOO5G_Zv2TZ6M,20534
@@ -64,7 +64,7 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
64
64
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
65
65
  dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
66
66
  dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
67
- dcicutils/structured_data.py,sha256=XOMxrmkJohdCAyCJU09uI8ivthTKrtSSYReFbC9VYMs,63058
67
+ dcicutils/structured_data.py,sha256=yaG5zIdlJLb9-1-SvNBBRLtrioa3Kaf6gT9uIzZOh48,64493
68
68
  dcicutils/submitr/progress_constants.py,sha256=5bxyX77ql8qEJearfHEvsvXl7D0GuUODW0T65mbRmnE,2895
69
69
  dcicutils/submitr/ref_lookup_strategy.py,sha256=Js2cVznTmgjciLWBPLCvMiwLIHXjDn3jww-gJPjYuFw,3467
70
70
  dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
@@ -73,8 +73,8 @@ dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
73
73
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
74
74
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
75
75
  dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
76
- dcicutils-8.8.6.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
77
- dcicutils-8.8.6.dist-info/METADATA,sha256=7VdJjqhSwF0whauIEE6ky-Dh5EE9we5MdC_1GHnV8sA,3435
78
- dcicutils-8.8.6.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
79
- dcicutils-8.8.6.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
80
- dcicutils-8.8.6.dist-info/RECORD,,
76
+ dcicutils-8.8.6.1b2.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
77
+ dcicutils-8.8.6.1b2.dist-info/METADATA,sha256=O63mM_Rd6EsJsJkt4a8aq_Gz9JwDzIACwanAjc8HXYs,3439
78
+ dcicutils-8.8.6.1b2.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
79
+ dcicutils-8.8.6.1b2.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
80
+ dcicutils-8.8.6.1b2.dist-info/RECORD,,