dcicutils 8.8.6.1b10__py3-none-any.whl → 8.9.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,6 @@
1
1
  from copy import deepcopy
2
2
  from functools import lru_cache
3
+ import re
3
4
  from typing import Any, Callable, List, Optional, Tuple, Type, Union
4
5
  from dcicutils.data_readers import RowReader
5
6
  from dcicutils.misc_utils import create_readonly_object
@@ -13,9 +14,11 @@ class PortalObject:
13
14
 
14
15
  _PROPERTY_DELETION_SENTINEL = RowReader.CELL_DELETION_SENTINEL
15
16
 
16
- def __init__(self, data: dict, portal: Optional[Portal] = None, type: Optional[str] = None) -> None:
17
+ def __init__(self, data: dict, portal: Portal = None,
18
+ schema: Optional[Union[dict, Schema]] = None, type: Optional[str] = None) -> None:
17
19
  self._data = data if isinstance(data, dict) else {}
18
20
  self._portal = portal if isinstance(portal, Portal) else None
21
+ self._schema = schema if isinstance(schema, dict) else (schema.data if isinstance(schema, Schema) else None)
19
22
  self._type = type if isinstance(type, str) else ""
20
23
 
21
24
  @property
@@ -29,7 +32,7 @@ class PortalObject:
29
32
  @property
30
33
  @lru_cache(maxsize=1)
31
34
  def type(self) -> str:
32
- return self._type or Portal.get_schema_type(self._data) or ""
35
+ return self._type or Portal.get_schema_type(self._data) or (Schema(self._schema).type if self._schema else "")
33
36
 
34
37
  @property
35
38
  @lru_cache(maxsize=1)
@@ -44,7 +47,7 @@ class PortalObject:
44
47
  @property
45
48
  @lru_cache(maxsize=1)
46
49
  def schema(self) -> Optional[dict]:
47
- return self._portal.get_schema(self.type) if self._portal else None
50
+ return self._schema if self._schema else (self._portal.get_schema(self.type) if self._portal else None)
48
51
 
49
52
  def copy(self) -> PortalObject:
50
53
  return PortalObject(deepcopy(self.data), portal=self.portal, type=self.type)
@@ -56,29 +59,39 @@ class PortalObject:
56
59
  Returns the list of all identifying property names of this Portal object which actually have values.
57
60
  Implicitly include "uuid" and "identifier" properties as identifying properties if they are actually
58
61
  properties in the object schema, and favor these (first); defavor "aliases"; no other ordering defined.
59
- Changed (2024-05-26) to use portal_utils.get_identifying_property_names; migrating some intricate stuff there.
60
62
  """
61
- # Migrating to and unifying this in portal_utils.Portal.get_identifying_paths (2024-05-26).
62
- return self._portal.get_identifying_property_names(self.type, portal_object=self._data) if self._portal else []
63
+ if not (schema := self.schema) or not (schema_identifying_properties := schema.get("identifyingProperties")):
64
+ return None
65
+ identifying_properties = []
66
+ for identifying_property in schema_identifying_properties:
67
+ if identifying_property not in ["uuid", "identifier", "aliases"]:
68
+ if self._data.get(identifying_property):
69
+ identifying_properties.append(identifying_property)
70
+ if self._data.get("identifier"):
71
+ identifying_properties.insert(0, "identifier")
72
+ if self._data.get("uuid"):
73
+ identifying_properties.insert(0, "uuid")
74
+ if "aliases" in schema_identifying_properties and self._data.get("aliases"):
75
+ identifying_properties.append("aliases")
76
+ return identifying_properties or None
63
77
 
64
78
  @lru_cache(maxsize=8192)
65
79
  def lookup(self, raw: bool = False,
66
80
  ref_lookup_strategy: Optional[Callable] = None) -> Tuple[Optional[PortalObject], Optional[str], int]:
67
- if not (identifying_paths := self._get_identifying_paths(ref_lookup_strategy=ref_lookup_strategy)):
68
- return None, None, 0
69
81
  nlookups = 0
70
82
  first_identifying_path = None
71
83
  try:
72
- for identifying_path in identifying_paths:
73
- if not first_identifying_path:
74
- first_identifying_path = identifying_path
75
- nlookups += 1
76
- if self._portal and (item := self._portal.get(identifying_path, raw=raw)) and (item.status_code == 200):
77
- return (
78
- PortalObject(item.json(), portal=self._portal, type=self.type if raw else None),
79
- identifying_path,
80
- nlookups
81
- )
84
+ if identifying_paths := self._get_identifying_paths(ref_lookup_strategy=ref_lookup_strategy):
85
+ for identifying_path in identifying_paths:
86
+ if not first_identifying_path:
87
+ first_identifying_path = identifying_path
88
+ nlookups += 1
89
+ if (value := self._portal.get(identifying_path, raw=raw)) and (value.status_code == 200):
90
+ return (
91
+ PortalObject(value.json(), portal=self._portal, type=self.type if raw else None),
92
+ identifying_path,
93
+ nlookups
94
+ )
82
95
  except Exception:
83
96
  pass
84
97
  return None, first_identifying_path, nlookups
@@ -146,12 +159,64 @@ class PortalObject:
146
159
 
147
160
  @lru_cache(maxsize=1)
148
161
  def _get_identifying_paths(self, ref_lookup_strategy: Optional[Callable] = None) -> Optional[List[str]]:
149
- if not self._portal and (uuid := self.uuid):
150
- return [f"/{uuid}"]
151
- # Migrating to and unifying this in portal_utils.Portal.get_identifying_paths (2024-05-26).
152
- return self._portal.get_identifying_paths(self._data,
153
- portal_type=self.schema,
154
- lookup_strategy=ref_lookup_strategy) if self._portal else None
162
+ """
163
+ Returns a list of the possible Portal URL paths identifying this Portal object.
164
+ """
165
+ identifying_paths = []
166
+ if not (identifying_properties := self.identifying_properties):
167
+ if self.uuid:
168
+ if self.type:
169
+ identifying_paths.append(f"/{self.type}/{self.uuid}")
170
+ identifying_paths.append(f"/{self.uuid}")
171
+ return identifying_paths
172
+ for identifying_property in identifying_properties:
173
+ if identifying_value := self._data.get(identifying_property):
174
+ if identifying_property == "uuid":
175
+ if self.type:
176
+ identifying_paths.append(f"/{self.type}/{identifying_value}")
177
+ identifying_paths.append(f"/{identifying_value}")
178
+ # For now at least we include the path both with and without the schema type component,
179
+ # as for some identifying values, it works (only) with, and some, it works (only) without.
180
+ # For example: If we have FileSet with "accession", an identifying property, with value
181
+ # SMAFSFXF1RO4 then /SMAFSFXF1RO4 works but /FileSet/SMAFSFXF1RO4 does not; and
182
+ # conversely using "submitted_id", also an identifying property, with value
183
+ # UW_FILE-SET_COLO-829BL_HI-C_1 then /UW_FILE-SET_COLO-829BL_HI-C_1 does
184
+ # not work but /FileSet/UW_FILE-SET_COLO-829BL_HI-C_1 does work.
185
+ elif isinstance(identifying_value, list):
186
+ for identifying_value_item in identifying_value:
187
+ if self.type:
188
+ identifying_paths.append(f"/{self.type}/{identifying_value_item}")
189
+ identifying_paths.append(f"/{identifying_value_item}")
190
+ else:
191
+ # TODO: Import from somewhere ...
192
+ lookup_options = 0
193
+ if schema := self.schema:
194
+ # TODO: Hook into the ref_lookup_strategy thing in structured_data to make
195
+ # sure we check accession format (since it does not have a pattern).
196
+ if callable(ref_lookup_strategy):
197
+ lookup_options, ref_validator = ref_lookup_strategy(
198
+ self._portal, self.type, schema, identifying_value)
199
+ if callable(ref_validator):
200
+ if ref_validator(schema, identifying_property, identifying_value) is False:
201
+ continue
202
+ if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
203
+ if not re.match(pattern, identifying_value):
204
+ # If this identifying value is for a (identifying) property which has a
205
+ # pattern, and the value does NOT match the pattern, then do NOT include
206
+ # this value as an identifying path, since it cannot possibly be found.
207
+ continue
208
+ if not lookup_options:
209
+ lookup_options = Portal.LOOKUP_DEFAULT
210
+ if Portal.is_lookup_root_first(lookup_options):
211
+ identifying_paths.append(f"/{identifying_value}")
212
+ if Portal.is_lookup_specified_type(lookup_options) and self.type:
213
+ identifying_paths.append(f"/{self.type}/{identifying_value}")
214
+ if Portal.is_lookup_root(lookup_options) and not Portal.is_lookup_root_first(lookup_options):
215
+ identifying_paths.append(f"/{identifying_value}")
216
+ if Portal.is_lookup_subtypes(lookup_options):
217
+ for subtype_name in self._portal.get_schema_subtype_names(self.type):
218
+ identifying_paths.append(f"/{subtype_name}/{identifying_value}")
219
+ return identifying_paths or None
155
220
 
156
221
  def _normalized_refs(self, refs: List[dict]) -> Tuple[PortalObject, int]:
157
222
  """
dcicutils/portal_utils.py CHANGED
@@ -1,6 +1,5 @@
1
1
  from collections import deque
2
2
  from functools import lru_cache
3
- from dcicutils.function_cache_decorator import function_cache
4
3
  import io
5
4
  import json
6
5
  from pyramid.config import Configurator as PyramidConfigurator
@@ -19,7 +18,6 @@ from wsgiref.simple_server import make_server as wsgi_make_server
19
18
  from dcicutils.common import APP_SMAHT, OrchestratedApp, ORCHESTRATED_APPS
20
19
  from dcicutils.ff_utils import get_metadata, get_schema, patch_metadata, post_metadata
21
20
  from dcicutils.misc_utils import to_camel_case, VirtualApp
22
- from dcicutils.schema_utils import get_identifying_properties
23
21
  from dcicutils.tmpfile_utils import temporary_file
24
22
 
25
23
  Portal = Type["Portal"] # Forward type reference for type hints.
@@ -50,16 +48,15 @@ class Portal:
50
48
  FILE_TYPE_SCHEMA_NAME = "File"
51
49
 
52
50
  # Object lookup strategies; on a per-reference (type/value) basis, used currently ONLY by
53
- # structured_data.py; controlled by an optional lookup_strategy callable; default is
51
+ # structured_data.py; controlled by an optional ref_lookup_strategy callable; default is
54
52
  # lookup at root path but after the specified type path lookup, and then lookup all subtypes;
55
53
  # can choose to lookup root path first, or not lookup root path at all, or not lookup
56
- # subtypes at all; the lookup_strategy callable if specified should take a type_name
54
+ # subtypes at all; the ref_lookup_strategy callable if specified should take a type_name
57
55
  # and value (string) arguements and return an integer of any of the below ORed together.
58
56
  # The main purpose of this is optimization; to minimize portal lookups; since for example,
59
57
  # currently at least, /{type}/{accession} does not work but /{accession} does; so we
60
58
  # currently (smaht-portal/.../ingestion_processors) use LOOKUP_ROOT_FIRST for this.
61
59
  # And current usage NEVER has LOOKUP_SUBTYPES turned OFF; but support just in case.
62
- LOOKUP_UNDEFINED = 0
63
60
  LOOKUP_SPECIFIED_TYPE = 0x0001
64
61
  LOOKUP_ROOT = 0x0002
65
62
  LOOKUP_ROOT_FIRST = 0x0004 | LOOKUP_ROOT
@@ -208,6 +205,23 @@ class Portal:
208
205
  def vapp(self) -> Optional[TestApp]:
209
206
  return self._vapp
210
207
 
208
+ @staticmethod
209
+ def is_lookup_specified_type(lookup_options: int) -> bool:
210
+ return (lookup_options &
211
+ Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
212
+
213
+ @staticmethod
214
+ def is_lookup_root(lookup_options: int) -> bool:
215
+ return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
216
+
217
+ @staticmethod
218
+ def is_lookup_root_first(lookup_options: int) -> bool:
219
+ return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
220
+
221
+ @staticmethod
222
+ def is_lookup_subtypes(lookup_options: int) -> bool:
223
+ return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
224
+
211
225
  def get(self, url: str, follow: bool = True,
212
226
  raw: bool = False, database: bool = False, raise_for_status: bool = False, **kwargs) -> OptionalResponse:
213
227
  url = self.url(url, raw, database)
@@ -291,10 +305,7 @@ class Portal:
291
305
 
292
306
  @lru_cache(maxsize=100)
293
307
  def get_schema(self, schema_name: str) -> Optional[dict]:
294
- try:
295
- return get_schema(self.schema_name(schema_name), portal_vapp=self.vapp, key=self.key)
296
- except Exception:
297
- return None
308
+ return get_schema(self.schema_name(schema_name), portal_vapp=self.vapp, key=self.key)
298
309
 
299
310
  @lru_cache(maxsize=1)
300
311
  def get_schemas(self) -> dict:
@@ -405,208 +416,6 @@ class Portal:
405
416
  return []
406
417
  return schemas_super_type_map.get(type_name, [])
407
418
 
408
- @function_cache(maxsize=100, serialize_key=True)
409
- def get_identifying_paths(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None,
410
- lookup_strategy: Optional[Union[Callable, bool]] = None) -> List[str]:
411
- """
412
- Returns the list of the identifying Portal (URL) paths for the given Portal object. Favors any uuid
413
- and identifier based paths and defavors aliases based paths (ala self.get_identifying_property_names);
414
- no other ordering defined. Returns an empty list if no identifying properties or otherwise not found.
415
- Note that this is a newer version of what was in portal_object_utils and just uses the ref_lookup_stratey
416
- module directly, as it no longer needs to be exposed (to smaht-portal/ingester and smaht-submitr) and so
417
- this is a first step toward internalizing it to structured_data/portal_utils/portal_object_utils usages.
418
- """
419
- def is_lookup_specified_type(lookup_options: int) -> bool:
420
- return (lookup_options & Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
421
- def is_lookup_root(lookup_options: int) -> bool: # noqa
422
- return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
423
- def is_lookup_root_first(lookup_options: int) -> bool: # noqa
424
- return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
425
- def is_lookup_subtypes(lookup_options: int) -> bool: # noqa
426
- return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
427
-
428
- results = []
429
- if not isinstance(portal_object, dict):
430
- return results
431
- if not (isinstance(portal_type, str) and portal_type):
432
- if isinstance(portal_type, dict):
433
- # It appears that the given portal_type is an actual schema dictionary.
434
- portal_type = self.schema_name(portal_type.get("title"))
435
- if not (isinstance(portal_type, str) and portal_type):
436
- if not (portal_type := self.get_schema_type(portal_object)):
437
- return results
438
- if not callable(lookup_strategy):
439
- lookup_strategy = None if lookup_strategy is False else Portal._lookup_strategy
440
- for identifying_property in self.get_identifying_property_names(portal_type):
441
- if not (identifying_value := portal_object.get(identifying_property)):
442
- continue
443
- # The get_identifying_property_names call above ensures uuid is first if it is in the object.
444
- # And also note that ALL schemas do in fact have identifyingProperties which do in fact have
445
- # uuid, except for a couple "Test" ones, and (for some reason) SubmittedItem; otherwise we
446
- # might have a special case to check the Portal object explicitly for uuid, but no need.
447
- if identifying_property == "uuid":
448
- #
449
- # Note this idiosyncrasy with Portal paths: the only way we do NOT get a (HTTP 301) redirect
450
- # is if we use the lower-case-dashed-plural based version of the path, e.g. all of these:
451
- #
452
- # - /d13d06c1-218e-4f61-aaf0-91f226248b3c
453
- # - /d13d06c1-218e-4f61-aaf0-91f226248b3c/
454
- # - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c
455
- # - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c/
456
- # - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c
457
- #
458
- # Will result in a (HTTP 301) redirect to:
459
- #
460
- # - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c/
461
- #
462
- # Unfortunately, this code here has no reasonable way of getting that lower-case-dashed-plural
463
- # based name (e.g. file-formats) from the schema/portal type name (e.g. FileFormat); as the
464
- # information is contained, for this example, in the snovault.collection decorator for the
465
- # endpoint definition in smaht-portal/.../types/file_format.py. Unfortunately merely because
466
- # behind-the-scenes an extra round-trip HTTP request will occur, but happens automatically.
467
- # And note the disction of just using /{uuid} here rather than /{type}/{uuid} as in the else
468
- # statement below is not really necessary; just here for emphasis that this is all that's needed.
469
- #
470
- results.append(f"/{identifying_value}")
471
- elif isinstance(identifying_value, list):
472
- for identifying_value_item in identifying_value:
473
- if identifying_value_item:
474
- results.append(f"/{portal_type}/{identifying_value_item}")
475
- else:
476
- lookup_options = Portal.LOOKUP_UNDEFINED
477
- if schema := self.get_schema(portal_type):
478
- if callable(lookup_strategy):
479
- lookup_options, validator = lookup_strategy(self, portal_type, schema, identifying_value)
480
- if callable(validator):
481
- if validator(schema, identifying_property, identifying_value) is False:
482
- continue
483
- if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
484
- if not re.match(pattern, identifying_value):
485
- # If this identifying value is for a (identifying) property which has a
486
- # pattern, and the value does NOT match the pattern, then do NOT include
487
- # this value as an identifying path, since it cannot possibly be found.
488
- continue
489
- if lookup_options == Portal.LOOKUP_UNDEFINED:
490
- lookup_options = Portal.LOOKUP_DEFAULT
491
- if is_lookup_root_first(lookup_options):
492
- results.append(f"/{identifying_value}")
493
- if is_lookup_specified_type(lookup_options) and portal_type:
494
- results.append(f"/{portal_type}/{identifying_value}")
495
- if is_lookup_root(lookup_options) and not is_lookup_root_first(lookup_options):
496
- results.append(f"/{identifying_value}")
497
- if is_lookup_subtypes(lookup_options):
498
- for subtype_name in self.get_schema_subtype_names(portal_type):
499
- results.append(f"/{subtype_name}/{identifying_value}")
500
- return results
501
-
502
- @function_cache(maxsize=100, serialize_key=True)
503
- def get_identifying_path(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None,
504
- lookup_strategy: Optional[Union[Callable, bool]] = None) -> Optional[str]:
505
- if identifying_paths := self.get_identifying_paths(portal_object, portal_type, lookup_strategy):
506
- return identifying_paths[0]
507
- return None
508
-
509
- @function_cache(maxsize=100, serialize_key=True)
510
- def get_identifying_property_names(self, schema: Union[str, dict],
511
- portal_object: Optional[dict] = None) -> List[str]:
512
- """
513
- Returns the list of identifying property names for the given Portal schema, which may be
514
- either a schema name or a schema object. If a Portal object is also given then restricts this
515
- set of identifying properties to those which actually have values within this Portal object.
516
- Favors the uuid and identifier property names and defavors the aliases property name; no other
517
- ordering imposed. Returns empty list if no identifying properties or otherwise not found.
518
- """
519
- results = []
520
- if isinstance(schema, str):
521
- if not (schema := self.get_schema(schema)):
522
- return results
523
- elif not isinstance(schema, dict):
524
- return results
525
- if not (identifying_properties := get_identifying_properties(schema)):
526
- return results
527
- identifying_properties = list(set(identifying_properties)) # paranoid dedup
528
- identifying_properties = [*identifying_properties] # copy so as not to change schema if given
529
- favored_identifying_properties = ["uuid", "identifier"]
530
- defavored_identifying_properties = ["aliases"]
531
- for favored_identifying_property in reversed(favored_identifying_properties):
532
- if favored_identifying_property in identifying_properties:
533
- identifying_properties.remove(favored_identifying_property)
534
- identifying_properties.insert(0, favored_identifying_property)
535
- for defavored_identifying_property in defavored_identifying_properties:
536
- if defavored_identifying_property in identifying_properties:
537
- identifying_properties.remove(defavored_identifying_property)
538
- identifying_properties.append(defavored_identifying_property)
539
- if isinstance(portal_object, dict):
540
- for identifying_property in [*identifying_properties]:
541
- if portal_object.get(identifying_property) is None:
542
- identifying_properties.remove(identifying_property)
543
- return identifying_properties
544
-
545
- @staticmethod
546
- def _lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
547
- #
548
- # Note this slightly odd situation WRT object lookups by submitted_id and accession:
549
- # -----------------------------+-----------------------------------------------+---------------+
550
- # PATH | EXAMPLE | LOOKUP RESULT |
551
- # -----------------------------+-----------------------------------------------+---------------+
552
- # /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
553
- # /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
554
- # /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
555
- # /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
556
- # -----------------------------+-----------------------------------------------+---------------+
557
- # /accession | /SMAFSFXF1RO4 | FOUND |
558
- # /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
559
- # /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
560
- # /File/accession | /File/SMAFSFXF1RO4 | FOUND |
561
- # -----------------------------+-----------------------------------------------+---------------+
562
- #
563
- def ref_validator(schema: Optional[dict],
564
- property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
565
- """
566
- Returns False iff objects of type represented by the given schema, CANNOT be referenced with
567
- a Portal path using the given property name and its given property value, otherwise returns None.
568
-
569
- For example, if the schema is for UnalignedReads and the property name is accession, then we will
570
- return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
571
- will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
572
- will continue executing its default behavior, which is to check other ways in which the given type
573
- CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
574
- and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
575
-
576
- The goal (in structured_data) being to detect if a type is being referenced in such a way that
577
- CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
578
- if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
579
- identifying property for the given type.
580
- """
581
- if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
582
- if (property_format == "accession") and (property_name == "accession"):
583
- if not Portal._is_accession_id(property_value):
584
- return False
585
- return None
586
-
587
- DEFAULT_RESULT = (Portal.LOOKUP_DEFAULT, ref_validator)
588
- if not value:
589
- return DEFAULT_RESULT
590
- if not schema:
591
- if not isinstance(portal, Portal) or not (schema := portal.get_schema(type_name)):
592
- return DEFAULT_RESULT
593
- if schema_properties := schema.get("properties"):
594
- if schema_properties.get("accession") and Portal._is_accession_id(value):
595
- # Case: lookup by accession (only by root).
596
- return (Portal.LOOKUP_ROOT, ref_validator)
597
- elif schema_property_info_submitted_id := schema_properties.get("submitted_id"):
598
- if schema_property_pattern_submitted_id := schema_property_info_submitted_id.get("pattern"):
599
- if re.match(schema_property_pattern_submitted_id, value):
600
- # Case: lookup by submitted_id (only by specified type).
601
- return (Portal.LOOKUP_SPECIFIED_TYPE, ref_validator)
602
- return DEFAULT_RESULT
603
-
604
- @staticmethod
605
- def _is_accession_id(value: str) -> bool:
606
- # This is here for now because of problems with circular dependencies.
607
- # See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
608
- return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
609
-
610
419
  def url(self, url: str, raw: bool = False, database: bool = False) -> str:
611
420
  if not isinstance(url, str) or not url:
612
421
  return "/"
@@ -707,22 +516,6 @@ class Portal:
707
516
  response = TestResponseWrapper(response)
708
517
  return response
709
518
 
710
- @staticmethod
711
- def _create_vapp(arg: Union[TestApp, VirtualApp, PyramidRouter, str] = None) -> TestApp:
712
- if isinstance(arg, TestApp):
713
- return arg
714
- elif isinstance(arg, VirtualApp):
715
- if not isinstance(arg.wrapped_app, TestApp):
716
- raise Exception("Portal._create_vapp VirtualApp argument error.")
717
- return arg.wrapped_app
718
- if isinstance(arg, PyramidRouter):
719
- router = arg
720
- elif isinstance(arg, str) or not arg:
721
- router = pyramid_get_app(arg or "development.ini", "app")
722
- else:
723
- raise Exception("Portal._create_vapp argument error.")
724
- return TestApp(router, {"HTTP_ACCEPT": Portal.MIME_TYPE_JSON, "REMOTE_USER": "TEST"})
725
-
726
519
  @staticmethod
727
520
  def create_for_testing(arg: Optional[Union[str, bool, List[dict], dict, Callable]] = None) -> Portal:
728
521
  if isinstance(arg, list) or isinstance(arg, dict) or isinstance(arg, Callable):
@@ -754,6 +547,22 @@ class Portal:
754
547
  with temporary_file(content=minimal_ini_for_testing, suffix=".ini") as ini_file:
755
548
  return Portal(ini_file)
756
549
 
550
+ @staticmethod
551
+ def _create_vapp(arg: Union[TestApp, VirtualApp, PyramidRouter, str] = None) -> TestApp:
552
+ if isinstance(arg, TestApp):
553
+ return arg
554
+ elif isinstance(arg, VirtualApp):
555
+ if not isinstance(arg.wrapped_app, TestApp):
556
+ raise Exception("Portal._create_vapp VirtualApp argument error.")
557
+ return arg.wrapped_app
558
+ if isinstance(arg, PyramidRouter):
559
+ router = arg
560
+ elif isinstance(arg, str) or not arg:
561
+ router = pyramid_get_app(arg or "development.ini", "app")
562
+ else:
563
+ raise Exception("Portal._create_vapp argument error.")
564
+ return TestApp(router, {"HTTP_ACCEPT": Portal.MIME_TYPE_JSON, "REMOTE_USER": "TEST"})
565
+
757
566
  @staticmethod
758
567
  def _create_router_for_testing(endpoints: Optional[List[Dict[str, Union[str, Callable]]]] = None) -> PyramidRouter:
759
568
  if isinstance(endpoints, dict):
dcicutils/schema_utils.py CHANGED
@@ -24,6 +24,7 @@ class JsonSchemaConstants:
24
24
 
25
25
 
26
26
  class EncodedSchemaConstants:
27
+ DESCRIPTION = "description"
27
28
  IDENTIFYING_PROPERTIES = "identifyingProperties"
28
29
  LINK_TO = "linkTo"
29
30
  MERGE_REF = "$merge"
@@ -187,6 +188,21 @@ def get_one_of_formats(schema: Dict[str, Any]) -> List[str]:
187
188
  ]
188
189
 
189
190
 
191
+ def is_link(property_schema: Dict[str, Any]) -> bool:
192
+ """Is property schema a link?"""
193
+ return bool(property_schema.get(SchemaConstants.LINK_TO))
194
+
195
+
196
+ def get_enum(property_schema: Dict[str, Any]) -> List[str]:
197
+ """Return the enum of a property schema."""
198
+ return property_schema.get(SchemaConstants.ENUM, [])
199
+
200
+
201
+ def get_description(schema: Dict[str, Any]) -> str:
202
+ """Return the description of a schema."""
203
+ return schema.get(SchemaConstants.DESCRIPTION, "")
204
+
205
+
190
206
  class Schema:
191
207
 
192
208
  def __init__(self, schema: dict, type: Optional[str] = None) -> None:
@@ -56,7 +56,7 @@ class StructuredDataSet:
56
56
  remove_empty_objects_from_lists: bool = True,
57
57
  ref_lookup_strategy: Optional[Callable] = None,
58
58
  ref_lookup_nocache: bool = False,
59
- norefs: bool = False, merge: bool = False,
59
+ norefs: bool = False,
60
60
  progress: Optional[Callable] = None,
61
61
  debug_sleep: Optional[str] = None) -> None:
62
62
  self._progress = progress if callable(progress) else None
@@ -75,7 +75,6 @@ class StructuredDataSet:
75
75
  self._nrows = 0
76
76
  self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None
77
77
  self._norefs = True if norefs is True else False
78
- self._merge = True if merge is True else False
79
78
  self._debug_sleep = None
80
79
  if debug_sleep:
81
80
  try:
@@ -99,13 +98,13 @@ class StructuredDataSet:
99
98
  remove_empty_objects_from_lists: bool = True,
100
99
  ref_lookup_strategy: Optional[Callable] = None,
101
100
  ref_lookup_nocache: bool = False,
102
- norefs: bool = False, merge: bool = False,
101
+ norefs: bool = False,
103
102
  progress: Optional[Callable] = None,
104
103
  debug_sleep: Optional[str] = None) -> StructuredDataSet:
105
104
  return StructuredDataSet(file=file, portal=portal, schemas=schemas, autoadd=autoadd, order=order, prune=prune,
106
105
  remove_empty_objects_from_lists=remove_empty_objects_from_lists,
107
106
  ref_lookup_strategy=ref_lookup_strategy, ref_lookup_nocache=ref_lookup_nocache,
108
- norefs=norefs, merge=merge, progress=progress, debug_sleep=debug_sleep)
107
+ norefs=norefs, progress=progress, debug_sleep=debug_sleep)
109
108
 
110
109
  def validate(self, force: bool = False) -> None:
111
110
  def data_without_deleted_properties(data: dict) -> dict:
@@ -351,23 +350,18 @@ class StructuredDataSet:
351
350
 
352
351
  def _load_json_file(self, file: str) -> None:
353
352
  with open(file) as f:
354
- item = json.load(f)
355
- if ((schema_name_inferred_from_file_name := Schema.type_name(file)) and
356
- (self._portal.get_schema(schema_name_inferred_from_file_name) is not None)): # noqa
353
+ file_json = json.load(f)
354
+ schema_inferred_from_file_name = Schema.type_name(file)
355
+ if self._portal.get_schema(schema_inferred_from_file_name) is not None:
357
356
  # If the JSON file name looks like a schema name then assume it
358
357
  # contains an object or an array of object of that schema type.
359
- if self._merge:
360
- item = self._merge_with_existing_portal_object(item, schema_name_inferred_from_file_name)
361
- self._add(Schema.type_name(file), item)
362
- elif isinstance(item, dict):
358
+ self._add(Schema.type_name(file), file_json)
359
+ elif isinstance(file_json, dict):
363
360
  # Otherwise if the JSON file name does not look like a schema name then
364
361
  # assume it a dictionary where each property is the name of a schema, and
365
362
  # which (each property) contains a list of object of that schema type.
366
- for schema_name in item:
367
- item = item[schema_name]
368
- if self._merge:
369
- item = self._merge_with_existing_portal_object(item, schema_name)
370
- self._add(schema_name, item)
363
+ for schema_name in file_json:
364
+ self._add(schema_name, file_json[schema_name])
371
365
 
372
366
  def _load_reader(self, reader: RowReader, type_name: str) -> None:
373
367
  schema = None
@@ -389,14 +383,11 @@ class StructuredDataSet:
389
383
  structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number)
390
384
  if self._autoadd_properties:
391
385
  self._add_properties(structured_row, self._autoadd_properties, schema)
392
- # New merge functionality (2024-05-25).
393
- if self._merge:
394
- structured_row = self._merge_with_existing_portal_object(structured_row, schema_name)
395
386
  if (prune_error := self._prune_structured_row(structured_row)) is not None:
396
387
  self._note_error({"src": create_dict(type=schema_name, row=reader.row_number),
397
388
  "error": prune_error}, "validation")
398
389
  else:
399
- self._add(type_name, structured_row) # TODO: why type_name and not schema_name?
390
+ self._add(type_name, structured_row)
400
391
  if self._progress:
401
392
  self._progress({
402
393
  PROGRESS.LOAD_ITEM: self._nrows,
@@ -437,18 +428,6 @@ class StructuredDataSet:
437
428
  if name not in structured_row and (not schema or schema.data.get("properties", {}).get(name)):
438
429
  structured_row[name] = properties[name]
439
430
 
440
- def _merge_with_existing_portal_object(self, portal_object: dict, portal_type: str) -> dict:
441
- """
442
- Given a Portal object (presumably/in-practice from the given metadata), if there is
443
- an existing Portal item, identified by the identifying properties for the given object,
444
- then merges the given object into the existing one and returns the result; otherwise
445
- just returns the given object. Note that the given object may be CHANGED in place.
446
- """
447
- for identifying_path in self._portal.get_identifying_paths(portal_object, portal_type):
448
- if existing_portal_object := self._portal.get_metadata(identifying_path, raw=True, raise_exception=False):
449
- return merge_objects(existing_portal_object, portal_object)
450
- return portal_object
451
-
452
431
  def _is_ref_lookup_specified_type(ref_lookup_flags: int) -> bool:
453
432
  return (ref_lookup_flags &
454
433
  Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
@@ -2,45 +2,39 @@ import re
2
2
  from typing import Optional
3
3
  from dcicutils.structured_data import Portal
4
4
 
5
- # This function is exposed (to smaht-portal/ingester and smaht-submitr) only because previously,
6
- # before it was fully developed, we had differing behaviors; but this has been unified; so this
7
- # could now be internalized to structured_data, and portal_object_utils (TODO).
8
-
9
5
 
10
6
  def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
11
7
  #
12
- # Note this slight odd situation WRT object lookups by submitted_id and accession:
13
- # -----------------------------+-----------------------------------------------+---------------+
14
- # PATH | EXAMPLE | LOOKUP RESULT |
15
- # -----------------------------+-----------------------------------------------+---------------+
16
- # /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
17
- # /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
18
- # /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
19
- # /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
20
- # -----------------------------+-----------------------------------------------+---------------+
21
- # /accession | /SMAFSFXF1RO4 | FOUND |
22
- # /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
23
- # /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
24
- # /File/accession | /File/SMAFSFXF1RO4 | FOUND |
25
- # -----------------------------+-----------------------------------------------+---------------+
8
+ # FYI: Note this situation WRT object lookups ...
9
+ #
10
+ # /{submitted_id} # NOT FOUND
11
+ # /UnalignedReads/{submitted_id} # OK
12
+ # /SubmittedFile/{submitted_id} # OK
13
+ # /File/{submitted_id} # NOT FOUND
14
+ #
15
+ # /{accession} # OK
16
+ # /UnalignedReads/{accession} # NOT FOUND
17
+ # /SubmittedFile/{accession} # NOT FOUND
18
+ # /File/{accession} # OK
26
19
  #
27
20
  def ref_validator(schema: Optional[dict],
28
21
  property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
29
22
  """
30
- Returns False iff objects of type represented by the given schema, CANNOT be referenced with
31
- a Portal path using the given property name and its given property value, otherwise returns None.
23
+ Returns False iff the type represented by the given schema, can NOT be referenced by
24
+ the given property name with the given property value, otherwise returns None.
32
25
 
33
- For example, if the schema is for UnalignedReads and the property name is accession, then we will
34
- return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
35
- will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
36
- will continue executing its default behavior, which is to check other ways in which the given type
37
- CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
38
- and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
26
+ For example, if the schema is for the UnalignedReads type and the property name
27
+ is accession, then we will return False iff the given property value is NOT a properly
28
+ formatted accession ID. Otherwise, we will return None, which indicates that the
29
+ caller (in dcicutils.structured_data.Portal.ref_exists) will continue executing
30
+ its default behavior, which is to check other ways in which the given type can NOT
31
+ be referenced by the given value, i.e. it checks other identifying properties for
32
+ the type and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
39
33
 
40
- The goal (in structured_data) being to detect if a type is being referenced in such a way that
41
- CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
42
- if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
43
- identifying property for the given type.
34
+ The goal (in structured_data) being to detect if a type is being referenced in such
35
+ a way that cannot possibly be allowed, i.e. because none of its identifying types
36
+ are in the required form (if indeed there any requirements). Note that it is guaranteed
37
+ that the given property name is indeed an identifying property for the given type.
44
38
  """
45
39
  if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
46
40
  if (property_format == "accession") and (property_name == "accession"):
@@ -68,6 +62,6 @@ def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str
68
62
 
69
63
 
70
64
  # This is here for now because of problems with circular dependencies.
71
- # See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
65
+ # See: smaht-portal/.../schema_formats.py
72
66
  def _is_accession_id(value: str) -> bool:
73
67
  return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.8.6.1b10
3
+ Version: 8.9.0
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -47,8 +47,8 @@ dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
47
47
  dcicutils/misc_utils.py,sha256=zHwsxxEn24muLBP7mDvMa8I9VdMejwW8HMuCL5xbhhw,107690
48
48
  dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
49
49
  dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
50
- dcicutils/portal_object_utils.py,sha256=Az3n1aL-PQkN5gOFE6ZqC2XkYsqiwKlq7-tZggs1QN4,11062
51
- dcicutils/portal_utils.py,sha256=LMewNfPBSYBZuFQyJ4TWUBAO0oQUCyFfL4fxeNd50XA,44254
50
+ dcicutils/portal_object_utils.py,sha256=gDXRgPsRvqCFwbC8WatsuflAxNiigOnqr0Hi93k3AgE,15422
51
+ dcicutils/portal_utils.py,sha256=DYyE5o15GekDgzpJWas9iS7klAYbjJZUPW0G42McArk,30779
52
52
  dcicutils/progress_bar.py,sha256=UT7lxb-rVF_gp4yjY2Tg4eun1naaH__hB4_v3O85bcE,19468
53
53
  dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
54
54
  dcicutils/qa_checkers.py,sha256=cdXjeL0jCDFDLT8VR8Px78aS10hwNISOO5G_Zv2TZ6M,20534
@@ -56,7 +56,7 @@ dcicutils/qa_utils.py,sha256=TT0SiJWiuxYvbsIyhK9VO4uV_suxhB6CpuC4qPacCzQ,160208
56
56
  dcicutils/redis_tools.py,sha256=qkcSNMtvqkpvts-Cm9gWhneK523Q_oHwhNUud1be1qk,7055
57
57
  dcicutils/redis_utils.py,sha256=VJ-7g8pOZqR1ZCtdcjKz3-6as2DMUcs1b1zG6wSprH4,6462
58
58
  dcicutils/s3_utils.py,sha256=LauLFQGvZLfpBJ81tYMikjLd3SJRz2R_FrL1n4xSlyI,28868
59
- dcicutils/schema_utils.py,sha256=IhtozG2jQ7bFyn54iPEdmDrHoCf3ryJXeXvPJRBXNn0,10095
59
+ dcicutils/schema_utils.py,sha256=IIteRrg-iOJOFU17n2lvKByVdWdiMfuAQ1kf_QIM96Q,10604
60
60
  dcicutils/scripts/publish_to_pypi.py,sha256=LFzNHIQK2EXFr88YcfctyA_WKEBFc1ElnSjWrCXedPM,13889
61
61
  dcicutils/scripts/run_license_checker.py,sha256=z2keYnRDZsHQbTeo1XORAXSXNJK5axVzL5LjiNqZ7jE,4184
62
62
  dcicutils/scripts/view_portal_object.py,sha256=HZzM44BDcGycO9XTOTZyP-F7PRMZaZrnFfiqiT7Qvqg,29777
@@ -64,17 +64,17 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
64
64
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
65
65
  dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
66
66
  dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
67
- dcicutils/structured_data.py,sha256=sm8x08ckPZcIcyBaSlQRGrOD3YL9d09gz-xB3_TAWGE,64516
67
+ dcicutils/structured_data.py,sha256=XOMxrmkJohdCAyCJU09uI8ivthTKrtSSYReFbC9VYMs,63058
68
68
  dcicutils/submitr/progress_constants.py,sha256=5bxyX77ql8qEJearfHEvsvXl7D0GuUODW0T65mbRmnE,2895
69
- dcicutils/submitr/ref_lookup_strategy.py,sha256=VJN-Oo0LLna6Vo2cu47eC-eU-yUC9NFlQP29xajejVU,4741
69
+ dcicutils/submitr/ref_lookup_strategy.py,sha256=Js2cVznTmgjciLWBPLCvMiwLIHXjDn3jww-gJPjYuFw,3467
70
70
  dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
71
71
  dcicutils/tmpfile_utils.py,sha256=irmN6Otvtxyum-7qr5h9GIzDs9rtFFyUsGQyqJXd_y4,2997
72
72
  dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
73
73
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
74
74
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
75
75
  dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
76
- dcicutils-8.8.6.1b10.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
77
- dcicutils-8.8.6.1b10.dist-info/METADATA,sha256=IZMgaX7IPVYSBG8--yorVXR_4HaUfjNebiY1MDRFAPk,3440
78
- dcicutils-8.8.6.1b10.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
79
- dcicutils-8.8.6.1b10.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
80
- dcicutils-8.8.6.1b10.dist-info/RECORD,,
76
+ dcicutils-8.9.0.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
77
+ dcicutils-8.9.0.dist-info/METADATA,sha256=teICgpkM20wWR6PsceqVZ6GQDuqWquq75p2o35q1PLA,3435
78
+ dcicutils-8.9.0.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
79
+ dcicutils-8.9.0.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
80
+ dcicutils-8.9.0.dist-info/RECORD,,