dcicutils 8.10.0.0b0__py3-none-any.whl → 8.10.0.1b1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,4 @@
1
+ from __future__ import annotations
1
2
  import contextlib
2
3
  import functools
3
4
  import glob
@@ -7,7 +8,7 @@ import re
7
8
  import requests
8
9
  import subprocess
9
10
 
10
- from typing import Optional
11
+ from typing import Callable, Optional
11
12
  from .exceptions import InvalidParameterError
12
13
  from .lang_utils import there_are
13
14
  from .misc_utils import INPUT, PRINT, environ_bool, print_error_message, decorator
@@ -384,3 +385,70 @@ def script_catch_errors():
384
385
  message = str(e) # Note: We ignore the type, which isn't intended to be shown.
385
386
  PRINT(message)
386
387
  exit(1)
388
+
389
+
390
+ class Question:
391
+ """
392
+ Supports asking the user (via stdin) a yes/no question, possibly repeatedly; and after
393
+ some maximum number times of the same answer in a row (consecutively), then asks them
394
+ if they want to automatically give that same answer to any/all subsequent questions.
395
+ Supports static/global list of such Question instances, hashed (only) by the question text.
396
+ """
397
+ _static_instances = {}
398
+
399
+ @staticmethod
400
+ def instance(question: Optional[str] = None,
401
+ max: Optional[int] = None, printf: Optional[Callable] = None) -> Question:
402
+ question = question if isinstance(question, str) else ""
403
+ if not (instance := Question._static_instances.get(question)):
404
+ Question._static_instances[question] = (instance := Question(question, max=max, printf=printf))
405
+ return instance
406
+
407
+ @staticmethod
408
+ def yes(question: Optional[str] = None,
409
+ max: Optional[int] = None, printf: Optional[Callable] = None) -> bool:
410
+ return Question.instance(question, max=max, printf=printf).ask()
411
+
412
+ def __init__(self, question: Optional[str] = None,
413
+ max: Optional[int] = None, printf: Optional[Callable] = None) -> None:
414
+ self._question = question if isinstance(question, str) else ""
415
+ self._max = max if isinstance(max, int) and max > 0 else None
416
+ self._print = printf if callable(printf) else print
417
+ self._yes_consecutive_count = 0
418
+ self._no_consecutive_count = 0
419
+ self._yes_automatic = False
420
+ self._no_automatic = False
421
+
422
+ def ask(self, question: Optional[str] = None) -> bool:
423
+
424
+ def question_automatic(value: str) -> bool:
425
+ nonlocal self
426
+ RARROW = "▶"
427
+ LARROW = "◀"
428
+ if yes_or_no(f"{RARROW}{RARROW}{RARROW}"
429
+ f" Do you want to answer {value} to all such questions?"
430
+ f" {LARROW}{LARROW}{LARROW}"):
431
+ return True
432
+ self._yes_consecutive_count = 0
433
+ self._no_consecutive_count = 0
434
+
435
+ if self._yes_automatic:
436
+ return True
437
+ elif self._no_automatic:
438
+ return False
439
+ elif yes_or_no((question if isinstance(question, str) else "") or self._question or "Undefined question"):
440
+ self._yes_consecutive_count += 1
441
+ self._no_consecutive_count = 0
442
+ if (self._no_consecutive_count == 0) and self._max and (self._yes_consecutive_count >= self._max):
443
+ # Have reached the maximum number of consecutive YES answers; ask if YES to all subsequent.
444
+ if question_automatic("YES"):
445
+ self._yes_automatic = True
446
+ return True
447
+ else:
448
+ self._no_consecutive_count += 1
449
+ self._yes_consecutive_count = 0
450
+ if (self._yes_consecutive_count == 0) and self._max and (self._no_consecutive_count >= self._max):
451
+ # Have reached the maximum number of consecutive NO answers; ask if NO to all subsequent.
452
+ if question_automatic("NO"):
453
+ self._no_automatic = True
454
+ return False
@@ -248,6 +248,12 @@
248
248
  "docutils" // Used only privately as a separate documentation-generation task for ReadTheDocs
249
249
  ],
250
250
 
251
+
252
+ "GNU General Public License v2 (GPLv2)": [
253
+ "pyinstaller",
254
+ "pyinstaller-hooks-contrib"
255
+ ],
256
+
251
257
  "MIT/X11 Derivative": [
252
258
  // The license used by libxkbcommon is complicated and involves numerous included licenses,
253
259
  // but all are permissive.
dcicutils/misc_utils.py CHANGED
@@ -4,6 +4,7 @@ This file contains functions that might be generally useful.
4
4
 
5
5
  from collections import namedtuple
6
6
  import appdirs
7
+ from copy import deepcopy
7
8
  import contextlib
8
9
  import datetime
9
10
  import functools
@@ -2199,28 +2200,58 @@ def merge_key_value_dict_lists(x, y):
2199
2200
  return [key_value_dict(k, v) for k, v in merged.items()]
2200
2201
 
2201
2202
 
2202
- def merge_objects(target: Union[dict, List[Any]], source: Union[dict, List[Any]], full: bool = False) -> dict:
2203
+ def merge_objects(target: Union[dict, List[Any]], source: Union[dict, List[Any]],
2204
+ full: bool = False, # deprecated
2205
+ expand_lists: Optional[bool] = None,
2206
+ primitive_lists: bool = False,
2207
+ copy: bool = False, _recursing: bool = False) -> Union[dict, List[Any]]:
2203
2208
  """
2204
- Merges the given source dictionary or list into the target dictionary or list.
2205
- This MAY well change the given target (dictionary or list) IN PLACE.
2206
- The the full argument is True then any target lists longer than the
2207
- source be will be filled out with the last element(s) of the source.
2209
+ Merges the given source dictionary or list into the target dictionary or list and returns the
2210
+ result. This MAY well change the given target (dictionary or list) IN PLACE ... UNLESS the copy
2211
+ argument is True, then the given target will not change as a local copy is made (and returned).
2212
+
2213
+ If the expand_lists argument is True then any target lists longer than the
2214
+ source be will be filled out with the last element(s) of the source; the full
2215
+ argument (is deprecated and) is a synomym for this. The default is False.
2216
+
2217
+ If the primitive_lists argument is True then lists of primitives (i.e. lists in which
2218
+ NONE of its elements are dictionaries, lists, or tuples) will themselves be treated
2219
+ like primitives, meaning the whole of a source list will replace the corresponding
2220
+ target; otherwise they will be merged normally, meaning each element of a source list
2221
+ will be merged, recursively, into the corresponding target list. The default is False.
2208
2222
  """
2223
+ def is_primitive_list(value: Any) -> bool: # noqa
2224
+ if not isinstance(value, list):
2225
+ return False
2226
+ for item in value:
2227
+ if isinstance(item, (dict, list, tuple)):
2228
+ return False
2229
+ return True
2230
+
2209
2231
  if target is None:
2210
2232
  return source
2233
+ if expand_lists not in (True, False):
2234
+ expand_lists = full is True
2235
+ if (copy is True) and (_recursing is not True):
2236
+ target = deepcopy(target)
2211
2237
  if isinstance(target, dict) and isinstance(source, dict) and source:
2212
2238
  for key, value in source.items():
2213
- target[key] = merge_objects(target[key], value, full) if key in target else value
2239
+ if ((primitive_lists is True) and
2240
+ (key in target) and is_primitive_list(target[key]) and is_primitive_list(value)): # noqa
2241
+ target[key] = value
2242
+ else:
2243
+ target[key] = merge_objects(target[key], value,
2244
+ expand_lists=expand_lists, _recursing=True) if key in target else value
2214
2245
  elif isinstance(target, list) and isinstance(source, list) and source:
2215
2246
  for i in range(max(len(source), len(target))):
2216
2247
  if i < len(target):
2217
2248
  if i < len(source):
2218
- target[i] = merge_objects(target[i], source[i], full)
2219
- elif full:
2220
- target[i] = merge_objects(target[i], source[len(source) - 1], full)
2249
+ target[i] = merge_objects(target[i], source[i], expand_lists=expand_lists, _recursing=True)
2250
+ elif expand_lists is True:
2251
+ target[i] = merge_objects(target[i], source[len(source) - 1], expand_lists=expand_lists)
2221
2252
  else:
2222
2253
  target.append(source[i])
2223
- elif source:
2254
+ elif source not in (None, {}, []):
2224
2255
  target = source
2225
2256
  return target
2226
2257
 
@@ -1,6 +1,5 @@
1
1
  from copy import deepcopy
2
2
  from functools import lru_cache
3
- import re
4
3
  from typing import Any, Callable, List, Optional, Tuple, Type, Union
5
4
  from dcicutils.data_readers import RowReader
6
5
  from dcicutils.misc_utils import create_readonly_object
@@ -14,11 +13,9 @@ class PortalObject:
14
13
 
15
14
  _PROPERTY_DELETION_SENTINEL = RowReader.CELL_DELETION_SENTINEL
16
15
 
17
- def __init__(self, data: dict, portal: Portal = None,
18
- schema: Optional[Union[dict, Schema]] = None, type: Optional[str] = None) -> None:
16
+ def __init__(self, data: dict, portal: Optional[Portal] = None, type: Optional[str] = None) -> None:
19
17
  self._data = data if isinstance(data, dict) else {}
20
18
  self._portal = portal if isinstance(portal, Portal) else None
21
- self._schema = schema if isinstance(schema, dict) else (schema.data if isinstance(schema, Schema) else None)
22
19
  self._type = type if isinstance(type, str) else ""
23
20
 
24
21
  @property
@@ -32,7 +29,7 @@ class PortalObject:
32
29
  @property
33
30
  @lru_cache(maxsize=1)
34
31
  def type(self) -> str:
35
- return self._type or Portal.get_schema_type(self._data) or (Schema(self._schema).type if self._schema else "")
32
+ return self._type or Portal.get_schema_type(self._data) or ""
36
33
 
37
34
  @property
38
35
  @lru_cache(maxsize=1)
@@ -47,7 +44,7 @@ class PortalObject:
47
44
  @property
48
45
  @lru_cache(maxsize=1)
49
46
  def schema(self) -> Optional[dict]:
50
- return self._schema if self._schema else (self._portal.get_schema(self.type) if self._portal else None)
47
+ return self._portal.get_schema(self.type) if self._portal else None
51
48
 
52
49
  def copy(self) -> PortalObject:
53
50
  return PortalObject(deepcopy(self.data), portal=self.portal, type=self.type)
@@ -59,39 +56,29 @@ class PortalObject:
59
56
  Returns the list of all identifying property names of this Portal object which actually have values.
60
57
  Implicitly include "uuid" and "identifier" properties as identifying properties if they are actually
61
58
  properties in the object schema, and favor these (first); defavor "aliases"; no other ordering defined.
59
+ Changed (2024-05-26) to use portal_utils.get_identifying_property_names; migrating some intricate stuff there.
62
60
  """
63
- if not (schema := self.schema) or not (schema_identifying_properties := schema.get("identifyingProperties")):
64
- return None
65
- identifying_properties = []
66
- for identifying_property in schema_identifying_properties:
67
- if identifying_property not in ["uuid", "identifier", "aliases"]:
68
- if self._data.get(identifying_property):
69
- identifying_properties.append(identifying_property)
70
- if self._data.get("identifier"):
71
- identifying_properties.insert(0, "identifier")
72
- if self._data.get("uuid"):
73
- identifying_properties.insert(0, "uuid")
74
- if "aliases" in schema_identifying_properties and self._data.get("aliases"):
75
- identifying_properties.append("aliases")
76
- return identifying_properties or None
61
+ # Migrating to and unifying this in portal_utils.Portal.get_identifying_paths (2024-05-26).
62
+ return self._portal.get_identifying_property_names(self.type, portal_object=self._data) if self._portal else []
77
63
 
78
64
  @lru_cache(maxsize=8192)
79
65
  def lookup(self, raw: bool = False,
80
66
  ref_lookup_strategy: Optional[Callable] = None) -> Tuple[Optional[PortalObject], Optional[str], int]:
67
+ if not (identifying_paths := self._get_identifying_paths(ref_lookup_strategy=ref_lookup_strategy)):
68
+ return None, None, 0
81
69
  nlookups = 0
82
70
  first_identifying_path = None
83
71
  try:
84
- if identifying_paths := self._get_identifying_paths(ref_lookup_strategy=ref_lookup_strategy):
85
- for identifying_path in identifying_paths:
86
- if not first_identifying_path:
87
- first_identifying_path = identifying_path
88
- nlookups += 1
89
- if (value := self._portal.get(identifying_path, raw=raw)) and (value.status_code == 200):
90
- return (
91
- PortalObject(value.json(), portal=self._portal, type=self.type if raw else None),
92
- identifying_path,
93
- nlookups
94
- )
72
+ for identifying_path in identifying_paths:
73
+ if not first_identifying_path:
74
+ first_identifying_path = identifying_path
75
+ nlookups += 1
76
+ if self._portal and (item := self._portal.get(identifying_path, raw=raw)) and (item.status_code == 200):
77
+ return (
78
+ PortalObject(item.json(), portal=self._portal, type=self.type if raw else None),
79
+ identifying_path,
80
+ nlookups
81
+ )
95
82
  except Exception:
96
83
  pass
97
84
  return None, first_identifying_path, nlookups
@@ -159,64 +146,12 @@ class PortalObject:
159
146
 
160
147
  @lru_cache(maxsize=1)
161
148
  def _get_identifying_paths(self, ref_lookup_strategy: Optional[Callable] = None) -> Optional[List[str]]:
162
- """
163
- Returns a list of the possible Portal URL paths identifying this Portal object.
164
- """
165
- identifying_paths = []
166
- if not (identifying_properties := self.identifying_properties):
167
- if self.uuid:
168
- if self.type:
169
- identifying_paths.append(f"/{self.type}/{self.uuid}")
170
- identifying_paths.append(f"/{self.uuid}")
171
- return identifying_paths
172
- for identifying_property in identifying_properties:
173
- if identifying_value := self._data.get(identifying_property):
174
- if identifying_property == "uuid":
175
- if self.type:
176
- identifying_paths.append(f"/{self.type}/{identifying_value}")
177
- identifying_paths.append(f"/{identifying_value}")
178
- # For now at least we include the path both with and without the schema type component,
179
- # as for some identifying values, it works (only) with, and some, it works (only) without.
180
- # For example: If we have FileSet with "accession", an identifying property, with value
181
- # SMAFSFXF1RO4 then /SMAFSFXF1RO4 works but /FileSet/SMAFSFXF1RO4 does not; and
182
- # conversely using "submitted_id", also an identifying property, with value
183
- # UW_FILE-SET_COLO-829BL_HI-C_1 then /UW_FILE-SET_COLO-829BL_HI-C_1 does
184
- # not work but /FileSet/UW_FILE-SET_COLO-829BL_HI-C_1 does work.
185
- elif isinstance(identifying_value, list):
186
- for identifying_value_item in identifying_value:
187
- if self.type:
188
- identifying_paths.append(f"/{self.type}/{identifying_value_item}")
189
- identifying_paths.append(f"/{identifying_value_item}")
190
- else:
191
- # TODO: Import from somewhere ...
192
- lookup_options = 0
193
- if schema := self.schema:
194
- # TODO: Hook into the ref_lookup_strategy thing in structured_data to make
195
- # sure we check accession format (since it does not have a pattern).
196
- if callable(ref_lookup_strategy):
197
- lookup_options, ref_validator = ref_lookup_strategy(
198
- self._portal, self.type, schema, identifying_value)
199
- if callable(ref_validator):
200
- if ref_validator(schema, identifying_property, identifying_value) is False:
201
- continue
202
- if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
203
- if not re.match(pattern, identifying_value):
204
- # If this identifying value is for a (identifying) property which has a
205
- # pattern, and the value does NOT match the pattern, then do NOT include
206
- # this value as an identifying path, since it cannot possibly be found.
207
- continue
208
- if not lookup_options:
209
- lookup_options = Portal.LOOKUP_DEFAULT
210
- if Portal.is_lookup_root_first(lookup_options):
211
- identifying_paths.append(f"/{identifying_value}")
212
- if Portal.is_lookup_specified_type(lookup_options) and self.type:
213
- identifying_paths.append(f"/{self.type}/{identifying_value}")
214
- if Portal.is_lookup_root(lookup_options) and not Portal.is_lookup_root_first(lookup_options):
215
- identifying_paths.append(f"/{identifying_value}")
216
- if Portal.is_lookup_subtypes(lookup_options):
217
- for subtype_name in self._portal.get_schema_subtype_names(self.type):
218
- identifying_paths.append(f"/{subtype_name}/{identifying_value}")
219
- return identifying_paths or None
149
+ if not self._portal and (uuid := self.uuid):
150
+ return [f"/{uuid}"]
151
+ # Migrating to and unifying this in portal_utils.Portal.get_identifying_paths (2024-05-26).
152
+ return self._portal.get_identifying_paths(self._data,
153
+ portal_type=self.schema,
154
+ lookup_strategy=ref_lookup_strategy) if self._portal else None
220
155
 
221
156
  def _normalized_refs(self, refs: List[dict]) -> Tuple[PortalObject, int]:
222
157
  """
dcicutils/portal_utils.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from collections import deque
2
2
  from functools import lru_cache
3
+ from dcicutils.function_cache_decorator import function_cache
3
4
  import io
4
5
  import json
5
6
  from pyramid.config import Configurator as PyramidConfigurator
@@ -18,6 +19,7 @@ from wsgiref.simple_server import make_server as wsgi_make_server
18
19
  from dcicutils.common import APP_SMAHT, OrchestratedApp, ORCHESTRATED_APPS
19
20
  from dcicutils.ff_utils import get_metadata, get_schema, patch_metadata, post_metadata
20
21
  from dcicutils.misc_utils import to_camel_case, VirtualApp
22
+ from dcicutils.schema_utils import get_identifying_properties
21
23
  from dcicutils.tmpfile_utils import temporary_file
22
24
 
23
25
  Portal = Type["Portal"] # Forward type reference for type hints.
@@ -48,15 +50,16 @@ class Portal:
48
50
  FILE_TYPE_SCHEMA_NAME = "File"
49
51
 
50
52
  # Object lookup strategies; on a per-reference (type/value) basis, used currently ONLY by
51
- # structured_data.py; controlled by an optional ref_lookup_strategy callable; default is
53
+ # structured_data.py; controlled by an optional lookup_strategy callable; default is
52
54
  # lookup at root path but after the specified type path lookup, and then lookup all subtypes;
53
55
  # can choose to lookup root path first, or not lookup root path at all, or not lookup
54
- # subtypes at all; the ref_lookup_strategy callable if specified should take a type_name
56
+ # subtypes at all; the lookup_strategy callable if specified should take a type_name
55
57
  # and value (string) arguements and return an integer of any of the below ORed together.
56
58
  # The main purpose of this is optimization; to minimize portal lookups; since for example,
57
59
  # currently at least, /{type}/{accession} does not work but /{accession} does; so we
58
60
  # currently (smaht-portal/.../ingestion_processors) use LOOKUP_ROOT_FIRST for this.
59
61
  # And current usage NEVER has LOOKUP_SUBTYPES turned OFF; but support just in case.
62
+ LOOKUP_UNDEFINED = 0
60
63
  LOOKUP_SPECIFIED_TYPE = 0x0001
61
64
  LOOKUP_ROOT = 0x0002
62
65
  LOOKUP_ROOT_FIRST = 0x0004 | LOOKUP_ROOT
@@ -205,23 +208,6 @@ class Portal:
205
208
  def vapp(self) -> Optional[TestApp]:
206
209
  return self._vapp
207
210
 
208
- @staticmethod
209
- def is_lookup_specified_type(lookup_options: int) -> bool:
210
- return (lookup_options &
211
- Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
212
-
213
- @staticmethod
214
- def is_lookup_root(lookup_options: int) -> bool:
215
- return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
216
-
217
- @staticmethod
218
- def is_lookup_root_first(lookup_options: int) -> bool:
219
- return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
220
-
221
- @staticmethod
222
- def is_lookup_subtypes(lookup_options: int) -> bool:
223
- return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
224
-
225
211
  def get(self, url: str, follow: bool = True,
226
212
  raw: bool = False, database: bool = False, raise_for_status: bool = False, **kwargs) -> OptionalResponse:
227
213
  url = self.url(url, raw, database)
@@ -305,7 +291,10 @@ class Portal:
305
291
 
306
292
  @lru_cache(maxsize=100)
307
293
  def get_schema(self, schema_name: str) -> Optional[dict]:
308
- return get_schema(self.schema_name(schema_name), portal_vapp=self.vapp, key=self.key)
294
+ try:
295
+ return get_schema(self.schema_name(schema_name), portal_vapp=self.vapp, key=self.key)
296
+ except Exception:
297
+ return None
309
298
 
310
299
  @lru_cache(maxsize=1)
311
300
  def get_schemas(self) -> dict:
@@ -416,6 +405,218 @@ class Portal:
416
405
  return []
417
406
  return schemas_super_type_map.get(type_name, [])
418
407
 
408
+ @function_cache(maxsize=100, serialize_key=True)
409
+ def get_identifying_paths(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None,
410
+ first_only: bool = False,
411
+ lookup_strategy: Optional[Union[Callable, bool]] = None) -> List[str]:
412
+ """
413
+ Returns the list of the identifying Portal (URL) paths for the given Portal object. Favors any uuid
414
+ and identifier based paths and defavors aliases based paths (ala self.get_identifying_property_names);
415
+ no other ordering defined. Returns an empty list if no identifying properties or otherwise not found.
416
+ Note that this is a newer version of what was in portal_object_utils and just uses the ref_lookup_stratey
417
+ module directly, as it no longer needs to be exposed (to smaht-portal/ingester and smaht-submitr) and so
418
+ this is a first step toward internalizing it to structured_data/portal_utils/portal_object_utils usages.
419
+ """
420
+ def is_lookup_specified_type(lookup_options: int) -> bool:
421
+ return (lookup_options & Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
422
+ def is_lookup_root(lookup_options: int) -> bool: # noqa
423
+ return (lookup_options & Portal.LOOKUP_ROOT) == Portal.LOOKUP_ROOT
424
+ def is_lookup_root_first(lookup_options: int) -> bool: # noqa
425
+ return (lookup_options & Portal.LOOKUP_ROOT_FIRST) == Portal.LOOKUP_ROOT_FIRST
426
+ def is_lookup_subtypes(lookup_options: int) -> bool: # noqa
427
+ return (lookup_options & Portal.LOOKUP_SUBTYPES) == Portal.LOOKUP_SUBTYPES
428
+
429
+ results = []
430
+ if not isinstance(portal_object, dict):
431
+ return results
432
+ if not (isinstance(portal_type, str) and portal_type):
433
+ if isinstance(portal_type, dict):
434
+ # It appears that the given portal_type is an actual schema dictionary.
435
+ portal_type = self.schema_name(portal_type.get("title"))
436
+ if not (isinstance(portal_type, str) and portal_type):
437
+ if not (portal_type := self.get_schema_type(portal_object)):
438
+ return results
439
+ if not callable(lookup_strategy):
440
+ lookup_strategy = None if lookup_strategy is False else Portal._lookup_strategy
441
+ for identifying_property in self.get_identifying_property_names(portal_type):
442
+ if not (identifying_value := portal_object.get(identifying_property)):
443
+ continue
444
+ # The get_identifying_property_names call above ensures uuid is first if it is in the object.
445
+ # And also note that ALL schemas do in fact have identifyingProperties which do in fact have
446
+ # uuid, except for a couple "Test" ones, and (for some reason) SubmittedItem; otherwise we
447
+ # might have a special case to check the Portal object explicitly for uuid, but no need.
448
+ if identifying_property == "uuid":
449
+ #
450
+ # Note this idiosyncrasy with Portal paths: the only way we do NOT get a (HTTP 301) redirect
451
+ # is if we use the lower-case-dashed-plural based version of the path, e.g. all of these:
452
+ #
453
+ # - /d13d06c1-218e-4f61-aaf0-91f226248b3c
454
+ # - /d13d06c1-218e-4f61-aaf0-91f226248b3c/
455
+ # - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c
456
+ # - /FileFormat/d13d06c1-218e-4f61-aaf0-91f226248b3c/
457
+ # - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c
458
+ #
459
+ # Will result in a (HTTP 301) redirect to:
460
+ #
461
+ # - /files-formats/d13d06c1-218e-4f61-aaf0-91f226248b3c/
462
+ #
463
+ # Unfortunately, this code here has no reasonable way of getting that lower-case-dashed-plural
464
+ # based name (e.g. file-formats) from the schema/portal type name (e.g. FileFormat); as the
465
+ # information is contained, for this example, in the snovault.collection decorator for the
466
+ # endpoint definition in smaht-portal/.../types/file_format.py. Unfortunately merely because
467
+ # behind-the-scenes an extra round-trip HTTP request will occur, but happens automatically.
468
+ # And note the disction of just using /{uuid} here rather than /{type}/{uuid} as in the else
469
+ # statement below is not really necessary; just here for emphasis that this is all that's needed.
470
+ #
471
+ # TODO
472
+ # Consider (from PR-308) writing a portal API for retrieving possible path formats.
473
+ #
474
+ if first_only is True:
475
+ results.append(f"/{portal_type}/{identifying_value}")
476
+ else:
477
+ results.append(f"/{identifying_value}")
478
+ elif isinstance(identifying_value, list):
479
+ for identifying_value_item in identifying_value:
480
+ if identifying_value_item:
481
+ results.append(f"/{portal_type}/{identifying_value_item}")
482
+ else:
483
+ lookup_options = Portal.LOOKUP_UNDEFINED
484
+ if schema := self.get_schema(portal_type):
485
+ if callable(lookup_strategy):
486
+ lookup_options, validator = lookup_strategy(self, portal_type, schema, identifying_value)
487
+ if callable(validator):
488
+ if validator(schema, identifying_property, identifying_value) is False:
489
+ continue
490
+ if pattern := schema.get("properties", {}).get(identifying_property, {}).get("pattern"):
491
+ if not re.match(pattern, identifying_value):
492
+ # If this identifying value is for a (identifying) property which has a
493
+ # pattern, and the value does NOT match the pattern, then do NOT include
494
+ # this value as an identifying path, since it cannot possibly be found.
495
+ continue
496
+ if lookup_options == Portal.LOOKUP_UNDEFINED:
497
+ lookup_options = Portal.LOOKUP_DEFAULT
498
+ if is_lookup_root_first(lookup_options):
499
+ results.append(f"/{identifying_value}")
500
+ if is_lookup_specified_type(lookup_options) and portal_type:
501
+ results.append(f"/{portal_type}/{identifying_value}")
502
+ if is_lookup_root(lookup_options) and not is_lookup_root_first(lookup_options):
503
+ results.append(f"/{identifying_value}")
504
+ if is_lookup_subtypes(lookup_options):
505
+ for subtype_name in self.get_schema_subtype_names(portal_type):
506
+ results.append(f"/{subtype_name}/{identifying_value}")
507
+ if (first_only is True) and results:
508
+ return results
509
+ return results
510
+
511
+ @function_cache(maxsize=100, serialize_key=True)
512
+ def get_identifying_path(self, portal_object: dict, portal_type: Optional[Union[str, dict]] = None,
513
+ lookup_strategy: Optional[Union[Callable, bool]] = None) -> Optional[str]:
514
+ if identifying_paths := self.get_identifying_paths(portal_object, portal_type, first_only=True,
515
+ lookup_strategy=lookup_strategy):
516
+ return identifying_paths[0]
517
+ return None
518
+
519
+ @function_cache(maxsize=100, serialize_key=True)
520
+ def get_identifying_property_names(self, schema: Union[str, dict],
521
+ portal_object: Optional[dict] = None) -> List[str]:
522
+ """
523
+ Returns the list of identifying property names for the given Portal schema, which may be
524
+ either a schema name or a schema object. If a Portal object is also given then restricts this
525
+ set of identifying properties to those which actually have values within this Portal object.
526
+ Favors the uuid and identifier property names and defavors the aliases property name; no other
527
+ ordering imposed. Returns empty list if no identifying properties or otherwise not found.
528
+ """
529
+ results = []
530
+ if isinstance(schema, str):
531
+ if not (schema := self.get_schema(schema)):
532
+ return results
533
+ elif not isinstance(schema, dict):
534
+ return results
535
+ if not (identifying_properties := get_identifying_properties(schema)):
536
+ return results
537
+ identifying_properties = list(set(identifying_properties)) # paranoid dedup
538
+ identifying_properties = [*identifying_properties] # copy so as not to change schema if given
539
+ favored_identifying_properties = ["uuid", "identifier"]
540
+ defavored_identifying_properties = ["aliases"]
541
+ for favored_identifying_property in reversed(favored_identifying_properties):
542
+ if favored_identifying_property in identifying_properties:
543
+ identifying_properties.remove(favored_identifying_property)
544
+ identifying_properties.insert(0, favored_identifying_property)
545
+ for defavored_identifying_property in defavored_identifying_properties:
546
+ if defavored_identifying_property in identifying_properties:
547
+ identifying_properties.remove(defavored_identifying_property)
548
+ identifying_properties.append(defavored_identifying_property)
549
+ if isinstance(portal_object, dict):
550
+ for identifying_property in [*identifying_properties]:
551
+ if portal_object.get(identifying_property) is None:
552
+ identifying_properties.remove(identifying_property)
553
+ return identifying_properties
554
+
555
+ @staticmethod
556
+ def _lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
557
+ #
558
+ # Note this slightly odd situation WRT object lookups by submitted_id and accession:
559
+ # -----------------------------+-----------------------------------------------+---------------+
560
+ # PATH | EXAMPLE | LOOKUP RESULT |
561
+ # -----------------------------+-----------------------------------------------+---------------+
562
+ # /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
563
+ # /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
564
+ # /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
565
+ # /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
566
+ # -----------------------------+-----------------------------------------------+---------------+
567
+ # /accession | /SMAFSFXF1RO4 | FOUND |
568
+ # /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
569
+ # /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
570
+ # /File/accession | /File/SMAFSFXF1RO4 | FOUND |
571
+ # -----------------------------+-----------------------------------------------+---------------+
572
+ #
573
+ def ref_validator(schema: Optional[dict],
574
+ property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
575
+ """
576
+ Returns False iff objects of type represented by the given schema, CANNOT be referenced with
577
+ a Portal path using the given property name and its given property value, otherwise returns None.
578
+
579
+ For example, if the schema is for UnalignedReads and the property name is accession, then we will
580
+ return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
581
+ will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
582
+ will continue executing its default behavior, which is to check other ways in which the given type
583
+ CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
584
+ and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
585
+
586
+ The goal (in structured_data) being to detect if a type is being referenced in such a way that
587
+ CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
588
+ if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
589
+ identifying property for the given type.
590
+ """
591
+ if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
592
+ if (property_format == "accession") and (property_name == "accession"):
593
+ if not Portal._is_accession_id(property_value):
594
+ return False
595
+ return None
596
+
597
+ DEFAULT_RESULT = (Portal.LOOKUP_DEFAULT, ref_validator)
598
+ if not value:
599
+ return DEFAULT_RESULT
600
+ if not schema:
601
+ if not isinstance(portal, Portal) or not (schema := portal.get_schema(type_name)):
602
+ return DEFAULT_RESULT
603
+ if schema_properties := schema.get("properties"):
604
+ if schema_properties.get("accession") and Portal._is_accession_id(value):
605
+ # Case: lookup by accession (only by root).
606
+ return (Portal.LOOKUP_ROOT, ref_validator)
607
+ elif schema_property_info_submitted_id := schema_properties.get("submitted_id"):
608
+ if schema_property_pattern_submitted_id := schema_property_info_submitted_id.get("pattern"):
609
+ if re.match(schema_property_pattern_submitted_id, value):
610
+ # Case: lookup by submitted_id (only by specified type).
611
+ return (Portal.LOOKUP_SPECIFIED_TYPE, ref_validator)
612
+ return DEFAULT_RESULT
613
+
614
+ @staticmethod
615
+ def _is_accession_id(value: str) -> bool:
616
+ # This is here for now because of problems with circular dependencies.
617
+ # See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
618
+ return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
619
+
419
620
  def url(self, url: str, raw: bool = False, database: bool = False) -> str:
420
621
  if not isinstance(url, str) or not url:
421
622
  return "/"
@@ -516,6 +717,22 @@ class Portal:
516
717
  response = TestResponseWrapper(response)
517
718
  return response
518
719
 
720
+ @staticmethod
721
+ def _create_vapp(arg: Union[TestApp, VirtualApp, PyramidRouter, str] = None) -> TestApp:
722
+ if isinstance(arg, TestApp):
723
+ return arg
724
+ elif isinstance(arg, VirtualApp):
725
+ if not isinstance(arg.wrapped_app, TestApp):
726
+ raise Exception("Portal._create_vapp VirtualApp argument error.")
727
+ return arg.wrapped_app
728
+ if isinstance(arg, PyramidRouter):
729
+ router = arg
730
+ elif isinstance(arg, str) or not arg:
731
+ router = pyramid_get_app(arg or "development.ini", "app")
732
+ else:
733
+ raise Exception("Portal._create_vapp argument error.")
734
+ return TestApp(router, {"HTTP_ACCEPT": Portal.MIME_TYPE_JSON, "REMOTE_USER": "TEST"})
735
+
519
736
  @staticmethod
520
737
  def create_for_testing(arg: Optional[Union[str, bool, List[dict], dict, Callable]] = None) -> Portal:
521
738
  if isinstance(arg, list) or isinstance(arg, dict) or isinstance(arg, Callable):
@@ -547,22 +764,6 @@ class Portal:
547
764
  with temporary_file(content=minimal_ini_for_testing, suffix=".ini") as ini_file:
548
765
  return Portal(ini_file)
549
766
 
550
- @staticmethod
551
- def _create_vapp(arg: Union[TestApp, VirtualApp, PyramidRouter, str] = None) -> TestApp:
552
- if isinstance(arg, TestApp):
553
- return arg
554
- elif isinstance(arg, VirtualApp):
555
- if not isinstance(arg.wrapped_app, TestApp):
556
- raise Exception("Portal._create_vapp VirtualApp argument error.")
557
- return arg.wrapped_app
558
- if isinstance(arg, PyramidRouter):
559
- router = arg
560
- elif isinstance(arg, str) or not arg:
561
- router = pyramid_get_app(arg or "development.ini", "app")
562
- else:
563
- raise Exception("Portal._create_vapp argument error.")
564
- return TestApp(router, {"HTTP_ACCEPT": Portal.MIME_TYPE_JSON, "REMOTE_USER": "TEST"})
565
-
566
767
  @staticmethod
567
768
  def _create_router_for_testing(endpoints: Optional[List[Dict[str, Union[str, Callable]]]] = None) -> PyramidRouter:
568
769
  if isinstance(endpoints, dict):
dcicutils/schema_utils.py CHANGED
@@ -1,6 +1,5 @@
1
1
  import os
2
2
  from typing import Any, Dict, List, Optional, Tuple
3
-
4
3
  from dcicutils.misc_utils import to_camel_case
5
4
 
6
5
 
@@ -9,7 +8,6 @@ class JsonSchemaConstants:
9
8
  ARRAY = "array"
10
9
  BOOLEAN = "boolean"
11
10
  DEFAULT = "default"
12
- DEPENDENT_REQUIRED = "dependentRequired"
13
11
  ENUM = "enum"
14
12
  FORMAT = "format"
15
13
  INTEGER = "integer"
@@ -31,10 +29,6 @@ class EncodedSchemaConstants:
31
29
  LINK_TO = "linkTo"
32
30
  MERGE_REF = "$merge"
33
31
  MIXIN_PROPERTIES = "mixinProperties"
34
- SUBMISSION_COMMENT = "submissionComment"
35
- SUBMISSION_EXAMPLES = "submissionExamples"
36
- SUBMITTER_REQUIRED = "submitterRequired"
37
- SUGGESTED_ENUM = "suggested_enum"
38
32
  UNIQUE_KEY = "uniqueKey"
39
33
 
40
34
 
@@ -209,50 +203,6 @@ def get_description(schema: Dict[str, Any]) -> str:
209
203
  return schema.get(SchemaConstants.DESCRIPTION, "")
210
204
 
211
205
 
212
- def is_submitter_required(schema: Dict[str, Any]) -> bool:
213
- """Return True if the schema is marked as required for submitters.
214
-
215
- Specifically, required for external (i.e. non-admin) submitters.
216
-
217
- This is typically validated within the context of a oneOf, anyOf,
218
- or allOf schema on an item type which is used within the team and
219
- by external submitters, and is tricky to pick up on automatically.
220
- """
221
- return schema.get(SchemaConstants.SUBMITTER_REQUIRED, False)
222
-
223
-
224
- def get_submission_comment(schema: Dict[str, Any]) -> str:
225
- """Return the submission comment for a property.
226
-
227
- Custom property that can be manually added to a schema to provide
228
- additional context for submitters.
229
- """
230
- return schema.get(SchemaConstants.SUBMISSION_COMMENT, "")
231
-
232
-
233
- def get_submission_examples(schema: Dict[str, Any]) -> List[str]:
234
- """Return the submission example for a property.
235
-
236
- Custom property that can be manually added to a schema to provide
237
- an example for submitters.
238
- """
239
- return schema.get(SchemaConstants.SUBMISSION_EXAMPLES, [])
240
-
241
-
242
- def get_suggested_enum(schema: Dict[str, Any]) -> List[str]:
243
- """Return the suggested enum for a property.
244
-
245
- Custom property that can be manually added to a schema to provide
246
- a suggested list of values for submitters.
247
- """
248
- return schema.get(SchemaConstants.SUGGESTED_ENUM, [])
249
-
250
-
251
- def get_dependent_required(schema: Dict[str, Any]) -> Dict[str, List[str]]:
252
- """Return the dependent required properties of a schema."""
253
- return schema.get(SchemaConstants.DEPENDENT_REQUIRED, {})
254
-
255
-
256
206
  class Schema:
257
207
 
258
208
  def __init__(self, schema: dict, type: Optional[str] = None) -> None:
@@ -11,7 +11,6 @@ from webtest.app import TestApp
11
11
  from dcicutils.common import OrchestratedApp
12
12
  from dcicutils.data_readers import CsvReader, Excel, RowReader
13
13
  from dcicutils.datetime_utils import normalize_date_string, normalize_datetime_string
14
- from dcicutils.file_utils import search_for_file
15
14
  from dcicutils.misc_utils import (create_dict, create_readonly_object, is_uuid, load_json_if,
16
15
  merge_objects, remove_empty_properties, right_trim, split_string,
17
16
  to_boolean, to_enum, to_float, to_integer, VirtualApp)
@@ -56,7 +55,7 @@ class StructuredDataSet:
56
55
  remove_empty_objects_from_lists: bool = True,
57
56
  ref_lookup_strategy: Optional[Callable] = None,
58
57
  ref_lookup_nocache: bool = False,
59
- norefs: bool = False,
58
+ norefs: bool = False, merge: bool = False,
60
59
  progress: Optional[Callable] = None,
61
60
  debug_sleep: Optional[str] = None) -> None:
62
61
  self._progress = progress if callable(progress) else None
@@ -75,6 +74,7 @@ class StructuredDataSet:
75
74
  self._nrows = 0
76
75
  self._autoadd_properties = autoadd if isinstance(autoadd, dict) and autoadd else None
77
76
  self._norefs = True if norefs is True else False
77
+ self._merge = True if merge is True else False # New merge functionality (2024-05-25)
78
78
  self._debug_sleep = None
79
79
  if debug_sleep:
80
80
  try:
@@ -98,13 +98,13 @@ class StructuredDataSet:
98
98
  remove_empty_objects_from_lists: bool = True,
99
99
  ref_lookup_strategy: Optional[Callable] = None,
100
100
  ref_lookup_nocache: bool = False,
101
- norefs: bool = False,
101
+ norefs: bool = False, merge: bool = False,
102
102
  progress: Optional[Callable] = None,
103
103
  debug_sleep: Optional[str] = None) -> StructuredDataSet:
104
104
  return StructuredDataSet(file=file, portal=portal, schemas=schemas, autoadd=autoadd, order=order, prune=prune,
105
105
  remove_empty_objects_from_lists=remove_empty_objects_from_lists,
106
106
  ref_lookup_strategy=ref_lookup_strategy, ref_lookup_nocache=ref_lookup_nocache,
107
- norefs=norefs, progress=progress, debug_sleep=debug_sleep)
107
+ norefs=norefs, merge=merge, progress=progress, debug_sleep=debug_sleep)
108
108
 
109
109
  def validate(self, force: bool = False) -> None:
110
110
  def data_without_deleted_properties(data: dict) -> dict:
@@ -208,14 +208,6 @@ class StructuredDataSet:
208
208
  result.append({"type": type_name, "file": file_name})
209
209
  return result
210
210
 
211
- def upload_files_located(self,
212
- location: Union[str, Optional[List[str]]] = None, recursive: bool = False) -> List[str]:
213
- upload_files = copy.deepcopy(self.upload_files)
214
- for upload_file in upload_files:
215
- if file_path := search_for_file(upload_file["file"], location, recursive=recursive, single=True):
216
- upload_file["path"] = file_path
217
- return upload_files
218
-
219
211
  @property
220
212
  def nrows(self) -> int:
221
213
  return self._nrows
@@ -350,18 +342,23 @@ class StructuredDataSet:
350
342
 
351
343
  def _load_json_file(self, file: str) -> None:
352
344
  with open(file) as f:
353
- file_json = json.load(f)
354
- schema_inferred_from_file_name = Schema.type_name(file)
355
- if self._portal.get_schema(schema_inferred_from_file_name) is not None:
345
+ data = json.load(f)
346
+ if ((schema_name_inferred_from_file_name := Schema.type_name(file)) and
347
+ (self._portal.get_schema(schema_name_inferred_from_file_name) is not None)): # noqa
356
348
  # If the JSON file name looks like a schema name then assume it
357
349
  # contains an object or an array of object of that schema type.
358
- self._add(Schema.type_name(file), file_json)
359
- elif isinstance(file_json, dict):
350
+ if self._merge: # New merge functionality (2024-05-25)
351
+ data = self._merge_with_existing_portal_object(data, schema_name_inferred_from_file_name)
352
+ self._add(Schema.type_name(file), data)
353
+ elif isinstance(data, dict):
360
354
  # Otherwise if the JSON file name does not look like a schema name then
361
355
  # assume it a dictionary where each property is the name of a schema, and
362
356
  # which (each property) contains a list of object of that schema type.
363
- for schema_name in file_json:
364
- self._add(schema_name, file_json[schema_name])
357
+ for schema_name in data:
358
+ item = data[schema_name]
359
+ if self._merge: # New merge functionality (2024-05-25)
360
+ item = self._merge_with_existing_portal_object(item, schema_name)
361
+ self._add(schema_name, item)
365
362
 
366
363
  def _load_reader(self, reader: RowReader, type_name: str) -> None:
367
364
  schema = None
@@ -383,11 +380,13 @@ class StructuredDataSet:
383
380
  structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number)
384
381
  if self._autoadd_properties:
385
382
  self._add_properties(structured_row, self._autoadd_properties, schema)
383
+ if self._merge: # New merge functionality (2024-05-25)
384
+ structured_row = self._merge_with_existing_portal_object(structured_row, schema_name)
386
385
  if (prune_error := self._prune_structured_row(structured_row)) is not None:
387
386
  self._note_error({"src": create_dict(type=schema_name, row=reader.row_number),
388
387
  "error": prune_error}, "validation")
389
388
  else:
390
- self._add(type_name, structured_row)
389
+ self._add(type_name, structured_row) # TODO: why type_name and not schema_name?
391
390
  if self._progress:
392
391
  self._progress({
393
392
  PROGRESS.LOAD_ITEM: self._nrows,
@@ -428,6 +427,18 @@ class StructuredDataSet:
428
427
  if name not in structured_row and (not schema or schema.data.get("properties", {}).get(name)):
429
428
  structured_row[name] = properties[name]
430
429
 
430
+ def _merge_with_existing_portal_object(self, portal_object: dict, portal_type: str) -> dict:
431
+ """
432
+ Given a Portal object (presumably/in-practice from the given metadata), if there is
433
+ an existing Portal item, identified by the identifying properties for the given object,
434
+ then merges the given object into the existing one and returns the result; otherwise
435
+ just returns the given object. Note that the given object may be CHANGED in place.
436
+ """
437
+ for identifying_path in self._portal.get_identifying_paths(portal_object, portal_type):
438
+ if existing_portal_object := self._portal.get_metadata(identifying_path, raw=True, raise_exception=False):
439
+ return merge_objects(existing_portal_object, portal_object, primitive_lists=True)
440
+ return portal_object
441
+
431
442
  def _is_ref_lookup_specified_type(ref_lookup_flags: int) -> bool:
432
443
  return (ref_lookup_flags &
433
444
  Portal.LOOKUP_SPECIFIED_TYPE) == Portal.LOOKUP_SPECIFIED_TYPE
@@ -2,39 +2,45 @@ import re
2
2
  from typing import Optional
3
3
  from dcicutils.structured_data import Portal
4
4
 
5
+ # This function is exposed (to smaht-portal/ingester and smaht-submitr) only because previously,
6
+ # before it was fully developed, we had differing behaviors; but this has been unified; so this
7
+ # could now be internalized to structured_data, and portal_object_utils (TODO).
8
+
5
9
 
6
10
  def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str) -> (int, Optional[str]):
7
11
  #
8
- # FYI: Note this situation WRT object lookups ...
9
- #
10
- # /{submitted_id} # NOT FOUND
11
- # /UnalignedReads/{submitted_id} # OK
12
- # /SubmittedFile/{submitted_id} # OK
13
- # /File/{submitted_id} # NOT FOUND
14
- #
15
- # /{accession} # OK
16
- # /UnalignedReads/{accession} # NOT FOUND
17
- # /SubmittedFile/{accession} # NOT FOUND
18
- # /File/{accession} # OK
12
+ # Note this slight odd situation WRT object lookups by submitted_id and accession:
13
+ # -----------------------------+-----------------------------------------------+---------------+
14
+ # PATH | EXAMPLE | LOOKUP RESULT |
15
+ # -----------------------------+-----------------------------------------------+---------------+
16
+ # /submitted_id | //UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
17
+ # /UnalignedReads/submitted_id | /UnalignedReads/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
18
+ # /SubmittedFile/submitted_id | /SubmittedFile/UW_FILE-SET_COLO-829BL_HI-C_1 | FOUND |
19
+ # /File/submitted_id | /File/UW_FILE-SET_COLO-829BL_HI-C_1 | NOT FOUND |
20
+ # -----------------------------+-----------------------------------------------+---------------+
21
+ # /accession | /SMAFSFXF1RO4 | FOUND |
22
+ # /UnalignedReads/accession | /UnalignedReads/SMAFSFXF1RO4 | NOT FOUND |
23
+ # /SubmittedFile/accession | /SubmittedFile/SMAFSFXF1RO4 | NOT FOUND |
24
+ # /File/accession | /File/SMAFSFXF1RO4 | FOUND |
25
+ # -----------------------------+-----------------------------------------------+---------------+
19
26
  #
20
27
  def ref_validator(schema: Optional[dict],
21
28
  property_name: Optional[str], property_value: Optional[str]) -> Optional[bool]:
22
29
  """
23
- Returns False iff the type represented by the given schema, can NOT be referenced by
24
- the given property name with the given property value, otherwise returns None.
30
+ Returns False iff objects of type represented by the given schema, CANNOT be referenced with
31
+ a Portal path using the given property name and its given property value, otherwise returns None.
25
32
 
26
- For example, if the schema is for the UnalignedReads type and the property name
27
- is accession, then we will return False iff the given property value is NOT a properly
28
- formatted accession ID. Otherwise, we will return None, which indicates that the
29
- caller (in dcicutils.structured_data.Portal.ref_exists) will continue executing
30
- its default behavior, which is to check other ways in which the given type can NOT
31
- be referenced by the given value, i.e. it checks other identifying properties for
32
- the type and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
33
+ For example, if the schema is for UnalignedReads and the property name is accession, then we will
34
+ return False iff the given property value is NOT a properly formatted accession ID; otherwise, we
35
+ will return None, which indicates that the caller (e.g. dcicutils.structured_data.Portal.ref_exists)
36
+ will continue executing its default behavior, which is to check other ways in which the given type
37
+ CANNOT be referenced by the given value, i.e. it checks other identifying properties for the type
38
+ and makes sure any patterns (e.g. for submitted_id or uuid) are ahered to.
33
39
 
34
- The goal (in structured_data) being to detect if a type is being referenced in such
35
- a way that cannot possibly be allowed, i.e. because none of its identifying types
36
- are in the required form (if indeed there any requirements). Note that it is guaranteed
37
- that the given property name is indeed an identifying property for the given type.
40
+ The goal (in structured_data) being to detect if a type is being referenced in such a way that
41
+ CANNOT possibly be allowed, i.e. because none of its identifying types are in the required form,
42
+ if indeed there any requirements. It is assumed/guaranteed the given property name is indeed an
43
+ identifying property for the given type.
38
44
  """
39
45
  if property_format := schema.get("properties", {}).get(property_name, {}).get("format"):
40
46
  if (property_format == "accession") and (property_name == "accession"):
@@ -62,6 +68,6 @@ def ref_lookup_strategy(portal: Portal, type_name: str, schema: dict, value: str
62
68
 
63
69
 
64
70
  # This is here for now because of problems with circular dependencies.
65
- # See: smaht-portal/.../schema_formats.py
71
+ # See: smaht-portal/.../schema_formats.py/is_accession(instance) ...
66
72
  def _is_accession_id(value: str) -> bool:
67
73
  return isinstance(value, str) and re.match(r"^SMA[1-9A-Z]{9}$", value) is not None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.10.0.0b0
3
+ Version: 8.10.0.1b1
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -5,7 +5,7 @@ dcicutils/bundle_utils.py,sha256=ZVQcqlt7Yly8-YbL3A-5DW859_hMWpTL6dXtknEYZIw,346
5
5
  dcicutils/captured_output.py,sha256=0hP7sPwleMaYXQAvCfJOxG8Z8T_JJYy8ADp8A5ZoblE,3295
6
6
  dcicutils/cloudformation_utils.py,sha256=MtWJrSTXyiImgbPHgRvfH9bWso20ZPLTFJAfhDQSVj4,13786
7
7
  dcicutils/codebuild_utils.py,sha256=CKpmhJ-Z8gYbkt1I2zyMlKtFdsg7T8lqrx3V5ieta-U,1155
8
- dcicutils/command_utils.py,sha256=JExll5TMqIcmuiGvuS8q4XDUvoEfi2oSH0E2FVF6suU,15285
8
+ dcicutils/command_utils.py,sha256=1_h18LGX86sLAkRkH33HNmBkwMb7v2wAh3jL01hzceU,18487
9
9
  dcicutils/common.py,sha256=YE8Mt5-vaZWWz4uaChSVhqGFbFtW5QKtnIyOr4zG4vM,3955
10
10
  dcicutils/contribution_scripts.py,sha256=0k5Gw1TumcD5SAcXVkDd6-yvuMEw-jUp5Kfb7FJH6XQ,2015
11
11
  dcicutils/contribution_utils.py,sha256=vYLS1JUB3sKd24BUxZ29qUBqYeQBLK9cwo8x3k64uPg,25653
@@ -39,16 +39,16 @@ dcicutils/lang_utils.py,sha256=MI3K6bPHLUqlkx3s_9jYZfbGbahiQFlpq4rBE3OYMbg,28151
39
39
  dcicutils/license_policies/c4-infrastructure.jsonc,sha256=xEQbIN08Y2xh3gSLRtSz9EhAZox1p3kHC4r678hCpss,278
40
40
  dcicutils/license_policies/c4-python-infrastructure.jsonc,sha256=Tkq8P1mKGYlix68I82IFNmasrT4wtSdokOIM-g2B8DQ,296
41
41
  dcicutils/license_policies/park-lab-common-server.jsonc,sha256=aaK-NdFDT8f8z_gBXihZnQJ6g3CAZdGSlHOwUP8HvUQ,5790
42
- dcicutils/license_policies/park-lab-common.jsonc,sha256=0LIFlIQWmdHlt160ku8cTOiB59akgYzRhRN3Xb5G_Aw,18864
42
+ dcicutils/license_policies/park-lab-common.jsonc,sha256=QyzpPveVr87RMpjrLLhnxLSp4VuEWta1gehMAqgKKig,18995
43
43
  dcicutils/license_policies/park-lab-gpl-pipeline.jsonc,sha256=vLZkwm3Js-kjV44nug3PizRGDLVnDox4CnvDKu5d2oQ,3260
44
44
  dcicutils/license_policies/park-lab-pipeline.jsonc,sha256=9qlY0ASy3iUMQlr3gorVcXrSfRHnVGbLhkS427UaRy4,283
45
45
  dcicutils/license_utils.py,sha256=d1cq6iwv5Ju-VjdoINi6q7CPNNL7Oz6rcJdLMY38RX0,46978
46
46
  dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
47
- dcicutils/misc_utils.py,sha256=zHwsxxEn24muLBP7mDvMa8I9VdMejwW8HMuCL5xbhhw,107690
47
+ dcicutils/misc_utils.py,sha256=-syqTAj8DESiiP_KHoyBv9VvfboFYB03QbBlmXnBZXw,109423
48
48
  dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
49
49
  dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
50
- dcicutils/portal_object_utils.py,sha256=gDXRgPsRvqCFwbC8WatsuflAxNiigOnqr0Hi93k3AgE,15422
51
- dcicutils/portal_utils.py,sha256=DYyE5o15GekDgzpJWas9iS7klAYbjJZUPW0G42McArk,30779
50
+ dcicutils/portal_object_utils.py,sha256=Az3n1aL-PQkN5gOFE6ZqC2XkYsqiwKlq7-tZggs1QN4,11062
51
+ dcicutils/portal_utils.py,sha256=R7v4uQUll34mn-NxyU3qoTouAwWrVDzW6W1zBGSU-M4,44762
52
52
  dcicutils/progress_bar.py,sha256=UT7lxb-rVF_gp4yjY2Tg4eun1naaH__hB4_v3O85bcE,19468
53
53
  dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
54
54
  dcicutils/qa_checkers.py,sha256=cdXjeL0jCDFDLT8VR8Px78aS10hwNISOO5G_Zv2TZ6M,20534
@@ -56,7 +56,7 @@ dcicutils/qa_utils.py,sha256=TT0SiJWiuxYvbsIyhK9VO4uV_suxhB6CpuC4qPacCzQ,160208
56
56
  dcicutils/redis_tools.py,sha256=qkcSNMtvqkpvts-Cm9gWhneK523Q_oHwhNUud1be1qk,7055
57
57
  dcicutils/redis_utils.py,sha256=VJ-7g8pOZqR1ZCtdcjKz3-6as2DMUcs1b1zG6wSprH4,6462
58
58
  dcicutils/s3_utils.py,sha256=LauLFQGvZLfpBJ81tYMikjLd3SJRz2R_FrL1n4xSlyI,28868
59
- dcicutils/schema_utils.py,sha256=GmRm-XqZKJ6qine16SQF1txcby9WougDav_sYmKNs9E,12400
59
+ dcicutils/schema_utils.py,sha256=IIteRrg-iOJOFU17n2lvKByVdWdiMfuAQ1kf_QIM96Q,10604
60
60
  dcicutils/scripts/publish_to_pypi.py,sha256=LFzNHIQK2EXFr88YcfctyA_WKEBFc1ElnSjWrCXedPM,13889
61
61
  dcicutils/scripts/run_license_checker.py,sha256=z2keYnRDZsHQbTeo1XORAXSXNJK5axVzL5LjiNqZ7jE,4184
62
62
  dcicutils/scripts/view_portal_object.py,sha256=HZzM44BDcGycO9XTOTZyP-F7PRMZaZrnFfiqiT7Qvqg,29777
@@ -64,17 +64,17 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
64
64
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
65
65
  dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
66
66
  dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
67
- dcicutils/structured_data.py,sha256=XOMxrmkJohdCAyCJU09uI8ivthTKrtSSYReFbC9VYMs,63058
67
+ dcicutils/structured_data.py,sha256=HVe1ruXz0vH4nRBOwq0cNrfR4KtqocC4U940KRXM5zY,64160
68
68
  dcicutils/submitr/progress_constants.py,sha256=5bxyX77ql8qEJearfHEvsvXl7D0GuUODW0T65mbRmnE,2895
69
- dcicutils/submitr/ref_lookup_strategy.py,sha256=Js2cVznTmgjciLWBPLCvMiwLIHXjDn3jww-gJPjYuFw,3467
69
+ dcicutils/submitr/ref_lookup_strategy.py,sha256=VJN-Oo0LLna6Vo2cu47eC-eU-yUC9NFlQP29xajejVU,4741
70
70
  dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
71
71
  dcicutils/tmpfile_utils.py,sha256=irmN6Otvtxyum-7qr5h9GIzDs9rtFFyUsGQyqJXd_y4,2997
72
72
  dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
73
73
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
74
74
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
75
75
  dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
76
- dcicutils-8.10.0.0b0.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
77
- dcicutils-8.10.0.0b0.dist-info/METADATA,sha256=vcIAVrY7xnIajSS-yHlYgIrl9PGr2EmjUnnG9jDctEQ,3440
78
- dcicutils-8.10.0.0b0.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
79
- dcicutils-8.10.0.0b0.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
80
- dcicutils-8.10.0.0b0.dist-info/RECORD,,
76
+ dcicutils-8.10.0.1b1.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
77
+ dcicutils-8.10.0.1b1.dist-info/METADATA,sha256=mVkdVaQtLvCiBIwKoT3JL9HYr8fmTgy-TiFlpGyCcZs,3440
78
+ dcicutils-8.10.0.1b1.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
79
+ dcicutils-8.10.0.1b1.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
80
+ dcicutils-8.10.0.1b1.dist-info/RECORD,,