biocypher 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biocypher might be problematic. Click here for more details.

biocypher/_translate.py CHANGED
@@ -1,10 +1,11 @@
1
- """
2
- BioCypher 'translation' module. Responsible for translating between the raw
3
- input data and the BioCypherNode and BioCypherEdge objects.
1
+ """BioCypher 'translation' module.
2
+
3
+ Responsible for translating between the raw input data and the
4
+ BioCypherNode and BioCypherEdge objects.
4
5
  """
5
6
 
6
7
  from collections.abc import Generator, Iterable
7
- from typing import Any, Optional, Union
8
+ from typing import Any
8
9
 
9
10
  from more_itertools import peekable
10
11
 
@@ -19,21 +20,23 @@ __all__ = ["Translator"]
19
20
 
20
21
 
21
22
  class Translator:
22
- """
23
- Class responsible for exacting the translation process that is configured in
24
- the schema_config.yaml file. Creates a mapping dictionary from that file,
25
- and, given nodes and edges, translates them into BioCypherNodes and
26
- BioCypherEdges. During this process, can also filter the properties of the
27
- entities if the schema_config.yaml file specifies a property whitelist or
28
- blacklist.
23
+ """Class responsible for exacting the translation process.
24
+
25
+ Translation is configured in the schema_config.yaml file. Creates a mapping
26
+ dictionary from that file, and, given nodes and edges, translates them into
27
+ BioCypherNodes and BioCypherEdges. During this process, can also filter the
28
+ properties of the entities if the schema_config.yaml file specifies a property
29
+ whitelist or blacklist.
29
30
 
30
31
  Provides utility functions for translating between input and output labels
31
32
  and cypher queries.
32
33
  """
33
34
 
34
35
  def __init__(self, ontology: "Ontology", strict_mode: bool = False):
35
- """
36
+ """Initialise the translator.
37
+
36
38
  Args:
39
+ ----
37
40
  leaves:
38
41
  Dictionary detailing the leaves of the hierarchy
39
42
  tree representing the structure of the graph; the leaves are
@@ -43,8 +46,8 @@ class Translator:
43
46
  strict_mode:
44
47
  If True, the translator will raise an error if input data do not
45
48
  carry source, licence, and version information.
46
- """
47
49
 
50
+ """
48
51
  self.ontology = ontology
49
52
  self.strict_mode = strict_mode
50
53
 
@@ -59,11 +62,7 @@ class Translator:
59
62
 
60
63
  def translate_entities(self, entities):
61
64
  entities = peekable(entities)
62
- if (
63
- isinstance(entities.peek(), BioCypherNode)
64
- or isinstance(entities.peek(), BioCypherEdge)
65
- or isinstance(entities.peek(), BioCypherRelAsNode)
66
- ):
65
+ if isinstance(entities.peek(), BioCypherEdge | BioCypherNode | BioCypherRelAsNode):
67
66
  translated_entities = entities
68
67
  elif len(entities.peek()) < 4:
69
68
  translated_entities = self.translate_nodes(entities)
@@ -75,19 +74,20 @@ class Translator:
75
74
  self,
76
75
  node_tuples: Iterable,
77
76
  ) -> Generator[BioCypherNode, None, None]:
78
- """
79
- Translates input node representation to a representation that
80
- conforms to the schema of the given BioCypher graph. For now
81
- requires explicit statement of node type on pass.
77
+ """Translate input node representation.
78
+
79
+ Translate the node tuples to a representation that conforms to the
80
+ schema of the given BioCypher graph. For now requires explicit
81
+ statement of node type on pass.
82
82
 
83
83
  Args:
84
+ ----
84
85
  node_tuples (list of tuples): collection of tuples
85
86
  representing individual nodes by their unique id and a type
86
87
  that is translated from the original database notation to
87
88
  the corresponding BioCypher notation.
88
89
 
89
90
  """
90
-
91
91
  self._log_begin_translate(node_tuples, "nodes")
92
92
 
93
93
  for _id, _type, _props in node_tuples:
@@ -101,10 +101,12 @@ class Translator:
101
101
 
102
102
  for prop in required_props:
103
103
  if prop not in _props:
104
- raise ValueError(
104
+ msg = (
105
105
  f"Property `{prop}` missing from node {_id}. "
106
- "Strict mode is enabled, so this is not allowed."
106
+ "Strict mode is enabled, so this is not allowed.",
107
107
  )
108
+ logger.error(msg)
109
+ raise ValueError(msg)
108
110
 
109
111
  # find the node in leaves that represents ontology node type
110
112
  _ontology_class = self._get_ontology_mapping(_type)
@@ -129,10 +131,11 @@ class Translator:
129
131
  self._log_finish_translate("nodes")
130
132
 
131
133
  def _get_preferred_id(self, _bl_type: str) -> str:
132
- """
133
- Returns the preferred id for the given Biolink type.
134
- """
134
+ """Return the preferred id for the given Biolink type.
135
135
 
136
+ If the preferred id is not specified in the schema_config.yaml file,
137
+ return "id".
138
+ """
136
139
  return (
137
140
  self.ontology.mapping.extended_schema[_bl_type]["preferred_id"]
138
141
  if "preferred_id" in self.ontology.mapping.extended_schema.get(_bl_type, {})
@@ -140,10 +143,11 @@ class Translator:
140
143
  )
141
144
 
142
145
  def _filter_props(self, bl_type: str, props: dict) -> dict:
143
- """
144
- Filters properties for those specified in schema_config if any.
145
- """
146
+ """Filter properties for those specified in schema_config if any.
146
147
 
148
+ If the properties are not specified in the schema_config.yaml file,
149
+ return the original properties.
150
+ """
147
151
  filter_props = self.ontology.mapping.extended_schema[bl_type].get("properties", {})
148
152
 
149
153
  # strict mode: add required properties (only if there is a whitelist)
@@ -179,14 +183,15 @@ class Translator:
179
183
  def translate_edges(
180
184
  self,
181
185
  edge_tuples: Iterable,
182
- ) -> Generator[Union[BioCypherEdge, BioCypherRelAsNode], None, None]:
183
- """
184
- Translates input edge representation to a representation that
185
- conforms to the schema of the given BioCypher graph. For now
186
- requires explicit statement of edge type on pass.
186
+ ) -> Generator[BioCypherEdge | BioCypherRelAsNode, None, None]:
187
+ """Translate input edge representation.
187
188
 
188
- Args:
189
+ Translate the edge tuples to a representation that conforms to the
190
+ schema of the given BioCypher graph. For now requires explicit
191
+ statement of edge type on pass.
189
192
 
193
+ Args:
194
+ ----
190
195
  edge_tuples (list of tuples):
191
196
 
192
197
  collection of tuples representing source and target of
@@ -194,8 +199,8 @@ class Translator:
194
199
  of interaction in the original database notation, which
195
200
  is translated to BioCypher notation using the `leaves`.
196
201
  Can optionally possess its own ID.
197
- """
198
202
 
203
+ """
199
204
  self._log_begin_translate(edge_tuples, "edges")
200
205
 
201
206
  # legacy: deal with 4-tuples (no edge id)
@@ -208,18 +213,22 @@ class Translator:
208
213
  # check for strict mode requirements
209
214
  if self.strict_mode:
210
215
  if "source" not in _props:
211
- raise ValueError(
212
- f"Edge {_id if _id else (_src, _tar)} does not have a `source` property.",
216
+ msg = (
217
+ f"Edge {_id if _id else (_src, _tar)} does not have a `source` property."
213
218
  " This is required in strict mode.",
214
219
  )
220
+ logger.error(msg)
221
+ raise ValueError(msg)
215
222
  if "licence" not in _props:
216
- raise ValueError(
217
- f"Edge {_id if _id else (_src, _tar)} does not have a `licence` property.",
223
+ msg = (
224
+ f"Edge {_id if _id else (_src, _tar)} does not have a `licence` property."
218
225
  " This is required in strict mode.",
219
226
  )
227
+ logger.error(msg)
228
+ raise ValueError(msg)
220
229
 
221
230
  # match the input label (_type) to
222
- # a Biolink label from schema_config
231
+ # an ontology label from schema_config
223
232
  bl_type = self._get_ontology_mapping(_type)
224
233
 
225
234
  if bl_type:
@@ -295,12 +304,12 @@ class Translator:
295
304
  self._log_finish_translate("edges")
296
305
 
297
306
  def _record_no_type(self, _type: Any, what: Any) -> None:
298
- """
299
- Records the type of a node or edge that is not represented in the
300
- schema_config.
301
- """
307
+ """Record the type of a non-represented node or edge.
302
308
 
303
- logger.debug(f"No ontology type defined for `{_type}`: {what}")
309
+ In case of an entity that is not represented in the schema_config,
310
+ record the type and the entity.
311
+ """
312
+ logger.error(f"No ontology type defined for `{_type}`: {what}")
304
313
 
305
314
  if self.notype.get(_type, None):
306
315
  self.notype[_type] += 1
@@ -309,11 +318,11 @@ class Translator:
309
318
  self.notype[_type] = 1
310
319
 
311
320
  def get_missing_biolink_types(self) -> dict:
312
- """
313
- Returns a dictionary of types that were not represented in the
314
- schema_config.
315
- """
321
+ """Return a dictionary of non-represented types.
316
322
 
323
+ The dictionary contains the type as the key and the number of
324
+ occurrences as the value.
325
+ """
317
326
  return self.notype
318
327
 
319
328
  @staticmethod
@@ -327,12 +336,10 @@ class Translator:
327
336
  logger.debug(f"Finished translating {what} to BioCypher.")
328
337
 
329
338
  def _update_ontology_types(self):
330
- """
331
- Creates a dictionary to translate from input labels to ontology labels.
339
+ """Create a dictionary to translate from input to ontology labels.
332
340
 
333
341
  If multiple input labels, creates mapping for each.
334
342
  """
335
-
336
343
  self._ontology_mapping = {}
337
344
 
338
345
  for key, value in self.ontology.mapping.extended_schema.items():
@@ -351,47 +358,45 @@ class Translator:
351
358
  else:
352
359
  self._add_translation_mappings(labels, key)
353
360
 
354
- def _get_ontology_mapping(self, label: str) -> Optional[str]:
355
- """
361
+ def _get_ontology_mapping(self, label: str) -> str | None:
362
+ """Find the ontology class for the given input type.
363
+
356
364
  For each given input type ("input_label" or "label_in_input"), find the
357
365
  corresponding ontology class in the leaves dictionary (from the
358
366
  `schema_config.yam`).
359
367
 
360
368
  Args:
369
+ ----
361
370
  label:
362
371
  The input type to find (`input_label` or `label_in_input` in
363
372
  `schema_config.yaml`).
364
- """
365
373
 
374
+ """
375
+ # FIXME does not seem like a necessary function.
366
376
  # commented out until behaviour of _update_bl_types is fixed
367
377
  return self._ontology_mapping.get(label, None)
368
378
 
369
379
  def translate_term(self, term):
370
- """
371
- Translate a single term.
372
- """
373
-
380
+ """Translate a single term."""
374
381
  return self.mappings.get(term, None)
375
382
 
376
383
  def reverse_translate_term(self, term):
377
- """
378
- Reverse translate a single term.
379
- """
380
-
384
+ """Reverse translate a single term."""
381
385
  return self.reverse_mappings.get(term, None)
382
386
 
383
387
  def translate(self, query):
384
- """
385
- Translate a cypher query. Only translates labels as of now.
388
+ """Translate a cypher query.
389
+
390
+ Only translates labels as of now.
386
391
  """
387
392
  for key in self.mappings:
388
393
  query = query.replace(":" + key, ":" + self.mappings[key])
389
394
  return query
390
395
 
391
396
  def reverse_translate(self, query):
392
- """
393
- Reverse translate a cypher query. Only translates labels as of
394
- now.
397
+ """Reverse translate a cypher query.
398
+
399
+ Only translates labels as of now.
395
400
  """
396
401
  for key in self.reverse_mappings:
397
402
  a = ":" + key + ")"
@@ -399,12 +404,14 @@ class Translator:
399
404
  # TODO this conditional probably does not cover all cases
400
405
  if a in query or b in query:
401
406
  if isinstance(self.reverse_mappings[key], list):
402
- raise NotImplementedError(
407
+ msg = (
403
408
  "Reverse translation of multiple inputs not "
404
409
  "implemented yet. Many-to-one mappings are "
405
410
  "not reversible. "
406
411
  f"({key} -> {self.reverse_mappings[key]})",
407
412
  )
413
+ logger.error(msg)
414
+ raise NotImplementedError(msg)
408
415
  else:
409
416
  query = query.replace(
410
417
  a,
@@ -413,10 +420,10 @@ class Translator:
413
420
  return query
414
421
 
415
422
  def _add_translation_mappings(self, original_name, biocypher_name):
416
- """
417
- Add translation mappings for a label and name. We use here the
418
- PascalCase version of the BioCypher name, since sentence case is
419
- not useful for Cypher queries.
423
+ """Add translation mappings for a label and name.
424
+
425
+ We use here the PascalCase version of the BioCypher name, since
426
+ sentence case is not useful for Cypher queries.
420
427
  """
421
428
  if isinstance(original_name, list):
422
429
  for on in original_name:
@@ -444,9 +451,7 @@ class Translator:
444
451
 
445
452
  @staticmethod
446
453
  def name_sentence_to_pascal(name: str) -> str:
447
- """
448
- Converts a name in sentence case to pascal case.
449
- """
454
+ """Convert a name in sentence case to pascal case."""
450
455
  # split on dots if dot is present
451
456
  if "." in name:
452
457
  return ".".join(