biocypher 0.5.17__py3-none-any.whl → 0.5.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biocypher might be problematic. Click here for more details.

biocypher/_pandas.py CHANGED
@@ -1,9 +1,10 @@
1
1
  import pandas as pd
2
- from ._create import BioCypherNode, BioCypherEdge
2
+
3
+ from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
4
+
3
5
 
4
6
  class Pandas:
5
- def __init__(self, ontology, translator, deduplicator):
6
- self.ontology = ontology
7
+ def __init__(self, translator, deduplicator):
7
8
  self.translator = translator
8
9
  self.deduplicator = deduplicator
9
10
 
@@ -16,18 +17,48 @@ class Pandas:
16
17
  """
17
18
  lists = {}
18
19
  for entity in entities:
19
- if not isinstance(entity, BioCypherNode) and not isinstance(entity, BioCypherEdge):
20
- raise TypeError(f"Expected a BioCypherNode or BioCypherEdge, got {type(entity)}.")
21
-
20
+ if (
21
+ not isinstance(entity, BioCypherNode)
22
+ and not isinstance(entity, BioCypherEdge)
23
+ and not isinstance(entity, BioCypherRelAsNode)
24
+ ):
25
+ raise TypeError(
26
+ "Expected a BioCypherNode / BioCypherEdge / "
27
+ f"BioCypherRelAsNode, got {type(entity)}."
28
+ )
29
+
22
30
  if isinstance(entity, BioCypherNode):
23
31
  seen = self.deduplicator.node_seen(entity)
24
32
  elif isinstance(entity, BioCypherEdge):
25
33
  seen = self.deduplicator.edge_seen(entity)
34
+ elif isinstance(entity, BioCypherRelAsNode):
35
+ seen = self.deduplicator.rel_as_node_seen(entity)
26
36
 
27
37
  if seen:
28
38
  continue
29
-
30
- _type = entity.get_label()
39
+
40
+ if isinstance(entity, BioCypherRelAsNode):
41
+ node = entity.get_node()
42
+ source_edge = entity.get_source_edge()
43
+ target_edge = entity.get_target_edge()
44
+
45
+ _type = node.get_type()
46
+ if not _type in lists:
47
+ lists[_type] = []
48
+ lists[_type].append(node)
49
+
50
+ _source_type = source_edge.get_type()
51
+ if not _source_type in lists:
52
+ lists[_source_type] = []
53
+ lists[_source_type].append(source_edge)
54
+
55
+ _target_type = target_edge.get_type()
56
+ if not _target_type in lists:
57
+ lists[_target_type] = []
58
+ lists[_target_type].append(target_edge)
59
+ continue
60
+
61
+ _type = entity.get_type()
31
62
  if not _type in lists:
32
63
  lists[_type] = []
33
64
  lists[_type].append(entity)
@@ -45,10 +76,14 @@ class Pandas:
45
76
  self._add_entity_df(_type, _entities)
46
77
 
47
78
  def _add_entity_df(self, _type, _entities):
48
- df = pd.DataFrame(pd.json_normalize([node.get_dict() for node in _entities]))
49
- #replace "properties." with "" in column names
79
+ df = pd.DataFrame(
80
+ pd.json_normalize([node.get_dict() for node in _entities])
81
+ )
82
+ # replace "properties." with "" in column names
50
83
  df.columns = [col.replace("properties.", "") for col in df.columns]
51
84
  if _type not in self.dfs:
52
85
  self.dfs[_type] = df
53
86
  else:
54
- self.dfs[_type] = pd.concat([self.dfs[_type], df], ignore_index=True)
87
+ self.dfs[_type] = pd.concat(
88
+ [self.dfs[_type], df], ignore_index=True
89
+ )
biocypher/_translate.py CHANGED
@@ -14,7 +14,7 @@ input data and the BioCypherNode and BioCypherEdge objects.
14
14
  """
15
15
  from ._logger import logger
16
16
 
17
- logger.debug(f'Loading module {__name__}.')
17
+ logger.debug(f"Loading module {__name__}.")
18
18
 
19
19
  from typing import Any, Union, Optional
20
20
  from collections.abc import Iterable, Generator
@@ -23,9 +23,9 @@ from more_itertools import peekable
23
23
 
24
24
  from . import _misc
25
25
  from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
26
- from ._mapping import OntologyMapping
26
+ from ._ontology import Ontology
27
27
 
28
- __all__ = ['BiolinkAdapter', 'Translator']
28
+ __all__ = ["BiolinkAdapter", "Translator"]
29
29
 
30
30
 
31
31
  class Translator:
@@ -40,9 +40,8 @@ class Translator:
40
40
  Provides utility functions for translating between input and output labels
41
41
  and cypher queries.
42
42
  """
43
- def __init__(
44
- self, ontology_mapping: 'OntologyMapping', strict_mode: bool = False
45
- ):
43
+
44
+ def __init__(self, ontology: "Ontology", strict_mode: bool = False):
46
45
  """
47
46
  Args:
48
47
  leaves:
@@ -56,7 +55,7 @@ class Translator:
56
55
  carry source, licence, and version information.
57
56
  """
58
57
 
59
- self.extended_schema = ontology_mapping.extended_schema
58
+ self.ontology = ontology
60
59
  self.strict_mode = strict_mode
61
60
 
62
61
  # record nodes without biolink type configured in schema_config.yaml
@@ -70,7 +69,7 @@ class Translator:
70
69
 
71
70
  def translate_nodes(
72
71
  self,
73
- id_type_prop_tuples: Iterable,
72
+ node_tuples: Iterable,
74
73
  ) -> Generator[BioCypherNode, None, None]:
75
74
  """
76
75
  Translates input node representation to a representation that
@@ -78,37 +77,35 @@ class Translator:
78
77
  requires explicit statement of node type on pass.
79
78
 
80
79
  Args:
81
- id_type_tuples (list of tuples): collection of tuples
80
+ node_tuples (list of tuples): collection of tuples
82
81
  representing individual nodes by their unique id and a type
83
82
  that is translated from the original database notation to
84
83
  the corresponding BioCypher notation.
85
84
 
86
85
  """
87
86
 
88
- self._log_begin_translate(id_type_prop_tuples, 'nodes')
89
-
90
- for _id, _type, _props in id_type_prop_tuples:
87
+ self._log_begin_translate(node_tuples, "nodes")
91
88
 
89
+ for _id, _type, _props in node_tuples:
92
90
  # check for strict mode requirements
93
- required_props = ['source', 'licence', 'version']
91
+ required_props = ["source", "licence", "version"]
94
92
 
95
93
  if self.strict_mode:
96
94
  # rename 'license' to 'licence' in _props
97
- if _props.get('license'):
98
- _props['licence'] = _props.pop('license')
95
+ if _props.get("license"):
96
+ _props["licence"] = _props.pop("license")
99
97
 
100
98
  for prop in required_props:
101
99
  if prop not in _props:
102
100
  raise ValueError(
103
- f'Property `{prop}` missing from node {_id}. '
104
- 'Strict mode is enabled, so this is not allowed.'
101
+ f"Property `{prop}` missing from node {_id}. "
102
+ "Strict mode is enabled, so this is not allowed."
105
103
  )
106
104
 
107
105
  # find the node in leaves that represents biolink node type
108
106
  _ontology_class = self._get_ontology_mapping(_type)
109
107
 
110
108
  if _ontology_class:
111
-
112
109
  # filter properties for those specified in schema_config if any
113
110
  _filtered_props = self._filter_props(_ontology_class, _props)
114
111
 
@@ -123,10 +120,9 @@ class Translator:
123
120
  )
124
121
 
125
122
  else:
126
-
127
123
  self._record_no_type(_type, _id)
128
124
 
129
- self._log_finish_translate('nodes')
125
+ self._log_finish_translate("nodes")
130
126
 
131
127
  def _get_preferred_id(self, _bl_type: str) -> str:
132
128
  """
@@ -134,8 +130,10 @@ class Translator:
134
130
  """
135
131
 
136
132
  return (
137
- self.extended_schema[_bl_type]['preferred_id'] if 'preferred_id'
138
- in self.extended_schema.get(_bl_type, {}) else 'id'
133
+ self.ontology.mapping.extended_schema[_bl_type]["preferred_id"]
134
+ if "preferred_id"
135
+ in self.ontology.mapping.extended_schema.get(_bl_type, {})
136
+ else "id"
139
137
  )
140
138
 
141
139
  def _filter_props(self, bl_type: str, props: dict) -> dict:
@@ -143,27 +141,24 @@ class Translator:
143
141
  Filters properties for those specified in schema_config if any.
144
142
  """
145
143
 
146
- filter_props = self.extended_schema[bl_type].get('properties', {})
144
+ filter_props = self.ontology.mapping.extended_schema[bl_type].get(
145
+ "properties", {}
146
+ )
147
147
 
148
148
  # strict mode: add required properties (only if there is a whitelist)
149
149
  if self.strict_mode and filter_props:
150
150
  filter_props.update(
151
- {
152
- 'source': 'str',
153
- 'licence': 'str',
154
- 'version': 'str'
155
- },
151
+ {"source": "str", "licence": "str", "version": "str"},
156
152
  )
157
153
 
158
- exclude_props = self.extended_schema[bl_type].get(
159
- 'exclude_properties', []
154
+ exclude_props = self.ontology.mapping.extended_schema[bl_type].get(
155
+ "exclude_properties", []
160
156
  )
161
157
 
162
158
  if isinstance(exclude_props, str):
163
159
  exclude_props = [exclude_props]
164
160
 
165
161
  if filter_props and exclude_props:
166
-
167
162
  filtered_props = {
168
163
  k: v
169
164
  for k, v in props.items()
@@ -171,21 +166,16 @@ class Translator:
171
166
  }
172
167
 
173
168
  elif filter_props:
174
-
175
169
  filtered_props = {
176
- k: v
177
- for k, v in props.items() if k in filter_props.keys()
170
+ k: v for k, v in props.items() if k in filter_props.keys()
178
171
  }
179
172
 
180
173
  elif exclude_props:
181
-
182
174
  filtered_props = {
183
- k: v
184
- for k, v in props.items() if k not in exclude_props
175
+ k: v for k, v in props.items() if k not in exclude_props
185
176
  }
186
177
 
187
178
  else:
188
-
189
179
  return props
190
180
 
191
181
  missing_props = [
@@ -193,14 +183,13 @@ class Translator:
193
183
  ]
194
184
  # add missing properties with default values
195
185
  for k in missing_props:
196
-
197
186
  filtered_props[k] = None
198
187
 
199
188
  return filtered_props
200
189
 
201
190
  def translate_edges(
202
191
  self,
203
- id_src_tar_type_prop_tuples: Iterable,
192
+ edge_tuples: Iterable,
204
193
  ) -> Generator[Union[BioCypherEdge, BioCypherRelAsNode], None, None]:
205
194
  """
206
195
  Translates input edge representation to a representation that
@@ -209,7 +198,7 @@ class Translator:
209
198
 
210
199
  Args:
211
200
 
212
- id_src_tar_type_prop_tuples (list of tuples):
201
+ edge_tuples (list of tuples):
213
202
 
214
203
  collection of tuples representing source and target of
215
204
  an interaction via their unique ids as well as the type
@@ -218,30 +207,29 @@ class Translator:
218
207
  Can optionally possess its own ID.
219
208
  """
220
209
 
221
- self._log_begin_translate(id_src_tar_type_prop_tuples, 'edges')
210
+ self._log_begin_translate(edge_tuples, "edges")
222
211
 
223
212
  # legacy: deal with 4-tuples (no edge id)
224
213
  # TODO remove for performance reasons once safe
225
- id_src_tar_type_prop_tuples = peekable(id_src_tar_type_prop_tuples)
226
- if len(id_src_tar_type_prop_tuples.peek()) == 4:
227
- id_src_tar_type_prop_tuples = [
214
+ edge_tuples = peekable(edge_tuples)
215
+ if len(edge_tuples.peek()) == 4:
216
+ edge_tuples = [
228
217
  (None, src, tar, typ, props)
229
- for src, tar, typ, props in id_src_tar_type_prop_tuples
218
+ for src, tar, typ, props in edge_tuples
230
219
  ]
231
220
 
232
- for _id, _src, _tar, _type, _props in id_src_tar_type_prop_tuples:
233
-
221
+ for _id, _src, _tar, _type, _props in edge_tuples:
234
222
  # check for strict mode requirements
235
223
  if self.strict_mode:
236
- if not 'source' in _props:
224
+ if not "source" in _props:
237
225
  raise ValueError(
238
- f'Edge {_id if _id else (_src, _tar)} does not have a `source` property.',
239
- ' This is required in strict mode.',
226
+ f"Edge {_id if _id else (_src, _tar)} does not have a `source` property.",
227
+ " This is required in strict mode.",
240
228
  )
241
- if not 'licence' in _props:
229
+ if not "licence" in _props:
242
230
  raise ValueError(
243
- f'Edge {_id if _id else (_src, _tar)} does not have a `licence` property.',
244
- ' This is required in strict mode.',
231
+ f"Edge {_id if _id else (_src, _tar)} does not have a `licence` property.",
232
+ " This is required in strict mode.",
245
233
  )
246
234
 
247
235
  # match the input label (_type) to
@@ -249,14 +237,14 @@ class Translator:
249
237
  bl_type = self._get_ontology_mapping(_type)
250
238
 
251
239
  if bl_type:
252
-
253
240
  # filter properties for those specified in schema_config if any
254
241
  _filtered_props = self._filter_props(bl_type, _props)
255
242
 
256
- rep = self.extended_schema[bl_type]['represented_as']
257
-
258
- if rep == 'node':
243
+ rep = self.ontology.mapping.extended_schema[bl_type][
244
+ "represented_as"
245
+ ]
259
246
 
247
+ if rep == "node":
260
248
  if _id:
261
249
  # if it brings its own ID, use it
262
250
  node_id = _id
@@ -264,8 +252,11 @@ class Translator:
264
252
  else:
265
253
  # source target concat
266
254
  node_id = (
267
- str(_src) + '_' + str(_tar) + '_' +
268
- '_'.join(str(v) for v in _filtered_props.values())
255
+ str(_src)
256
+ + "_"
257
+ + str(_tar)
258
+ + "_"
259
+ + "_".join(str(v) for v in _filtered_props.values())
269
260
  )
270
261
 
271
262
  n = BioCypherNode(
@@ -277,21 +268,18 @@ class Translator:
277
268
  # directionality check TODO generalise to account for
278
269
  # different descriptions of directionality or find a
279
270
  # more consistent solution for indicating directionality
280
- if _filtered_props.get('directed') == True:
281
-
282
- l1 = 'IS_SOURCE_OF'
283
- l2 = 'IS_TARGET_OF'
271
+ if _filtered_props.get("directed") == True:
272
+ l1 = "IS_SOURCE_OF"
273
+ l2 = "IS_TARGET_OF"
284
274
 
285
275
  elif _filtered_props.get(
286
- 'src_role',
287
- ) and _filtered_props.get('tar_role'):
288
-
289
- l1 = _filtered_props.get('src_role')
290
- l2 = _filtered_props.get('tar_role')
276
+ "src_role",
277
+ ) and _filtered_props.get("tar_role"):
278
+ l1 = _filtered_props.get("src_role")
279
+ l2 = _filtered_props.get("tar_role")
291
280
 
292
281
  else:
293
-
294
- l1 = l2 = 'IS_PART_OF'
282
+ l1 = l2 = "IS_PART_OF"
295
283
 
296
284
  e_s = BioCypherEdge(
297
285
  source_id=_src,
@@ -310,13 +298,11 @@ class Translator:
310
298
  yield BioCypherRelAsNode(n, e_s, e_t)
311
299
 
312
300
  else:
313
-
314
- edge_label = self.extended_schema[bl_type].get(
315
- 'label_as_edge'
316
- )
301
+ edge_label = self.ontology.mapping.extended_schema[
302
+ bl_type
303
+ ].get("label_as_edge")
317
304
 
318
305
  if edge_label is None:
319
-
320
306
  edge_label = bl_type
321
307
 
322
308
  yield BioCypherEdge(
@@ -328,10 +314,9 @@ class Translator:
328
314
  )
329
315
 
330
316
  else:
331
-
332
317
  self._record_no_type(_type, (_src, _tar))
333
318
 
334
- self._log_finish_translate('edges')
319
+ self._log_finish_translate("edges")
335
320
 
336
321
  def _record_no_type(self, _type: Any, what: Any) -> None:
337
322
  """
@@ -339,14 +324,12 @@ class Translator:
339
324
  schema_config.
340
325
  """
341
326
 
342
- logger.debug(f'No Biolink type defined for `{_type}`: {what}')
327
+ logger.debug(f"No Biolink type defined for `{_type}`: {what}")
343
328
 
344
329
  if self.notype.get(_type, None):
345
-
346
330
  self.notype[_type] += 1
347
331
 
348
332
  else:
349
-
350
333
  self.notype[_type] = 1
351
334
 
352
335
  def get_missing_biolink_types(self) -> dict:
@@ -359,15 +342,13 @@ class Translator:
359
342
 
360
343
  @staticmethod
361
344
  def _log_begin_translate(_input: Iterable, what: str):
345
+ n = f"{len(_input)} " if hasattr(_input, "__len__") else ""
362
346
 
363
- n = f'{len(_input)} ' if hasattr(_input, '__len__') else ''
364
-
365
- logger.debug(f'Translating {n}{what} to BioCypher')
347
+ logger.debug(f"Translating {n}{what} to BioCypher")
366
348
 
367
349
  @staticmethod
368
350
  def _log_finish_translate(what: str):
369
-
370
- logger.debug(f'Finished translating {what} to BioCypher.')
351
+ logger.debug(f"Finished translating {what} to BioCypher.")
371
352
 
372
353
  def _update_ontology_types(self):
373
354
  """
@@ -378,25 +359,20 @@ class Translator:
378
359
 
379
360
  self._ontology_mapping = {}
380
361
 
381
- for key, value in self.extended_schema.items():
382
-
383
- labels = value.get('input_label') or value.get('label_in_input')
362
+ for key, value in self.ontology.mapping.extended_schema.items():
363
+ labels = value.get("input_label") or value.get("label_in_input")
384
364
 
385
365
  if isinstance(labels, str):
386
-
387
366
  self._ontology_mapping[labels] = key
388
367
 
389
368
  elif isinstance(labels, list):
390
-
391
369
  for label in labels:
392
370
  self._ontology_mapping[label] = key
393
371
 
394
- if value.get('label_as_edge'):
395
-
396
- self._add_translation_mappings(labels, value['label_as_edge'])
372
+ if value.get("label_as_edge"):
373
+ self._add_translation_mappings(labels, value["label_as_edge"])
397
374
 
398
375
  else:
399
-
400
376
  self._add_translation_mappings(labels, key)
401
377
 
402
378
  def _get_ontology_mapping(self, label: str) -> Optional[str]:
@@ -433,7 +409,7 @@ class Translator:
433
409
  Translate a cypher query. Only translates labels as of now.
434
410
  """
435
411
  for key in self.mappings:
436
- query = query.replace(':' + key, ':' + self.mappings[key])
412
+ query = query.replace(":" + key, ":" + self.mappings[key])
437
413
  return query
438
414
 
439
415
  def reverse_translate(self, query):
@@ -442,23 +418,22 @@ class Translator:
442
418
  now.
443
419
  """
444
420
  for key in self.reverse_mappings:
445
-
446
- a = ':' + key + ')'
447
- b = ':' + key + ']'
421
+ a = ":" + key + ")"
422
+ b = ":" + key + "]"
448
423
  # TODO this conditional probably does not cover all cases
449
424
  if a in query or b in query:
450
425
  if isinstance(self.reverse_mappings[key], list):
451
426
  raise NotImplementedError(
452
- 'Reverse translation of multiple inputs not '
453
- 'implemented yet. Many-to-one mappings are '
454
- 'not reversible. '
455
- f'({key} -> {self.reverse_mappings[key]})',
427
+ "Reverse translation of multiple inputs not "
428
+ "implemented yet. Many-to-one mappings are "
429
+ "not reversible. "
430
+ f"({key} -> {self.reverse_mappings[key]})",
456
431
  )
457
432
  else:
458
433
  query = query.replace(
459
434
  a,
460
- ':' + self.reverse_mappings[key] + ')',
461
- ).replace(b, ':' + self.reverse_mappings[key] + ']')
435
+ ":" + self.reverse_mappings[key] + ")",
436
+ ).replace(b, ":" + self.reverse_mappings[key] + "]")
462
437
  return query
463
438
 
464
439
  def _add_translation_mappings(self, original_name, biocypher_name):
@@ -479,12 +454,17 @@ class Translator:
479
454
 
480
455
  if isinstance(biocypher_name, list):
481
456
  for bn in biocypher_name:
482
- self.reverse_mappings[self.name_sentence_to_pascal(bn, )
483
- ] = original_name
457
+ self.reverse_mappings[
458
+ self.name_sentence_to_pascal(
459
+ bn,
460
+ )
461
+ ] = original_name
484
462
  else:
485
- self.reverse_mappings[self.name_sentence_to_pascal(
486
- biocypher_name,
487
- )] = original_name
463
+ self.reverse_mappings[
464
+ self.name_sentence_to_pascal(
465
+ biocypher_name,
466
+ )
467
+ ] = original_name
488
468
 
489
469
  @staticmethod
490
470
  def name_sentence_to_pascal(name: str) -> str:
@@ -492,9 +472,9 @@ class Translator:
492
472
  Converts a name in sentence case to pascal case.
493
473
  """
494
474
  # split on dots if dot is present
495
- if '.' in name:
496
- return '.'.join(
497
- [_misc.sentencecase_to_pascalcase(n) for n in name.split('.')],
475
+ if "." in name:
476
+ return ".".join(
477
+ [_misc.sentencecase_to_pascalcase(n) for n in name.split(".")],
498
478
  )
499
479
  else:
500
480
  return _misc.sentencecase_to_pascalcase(name)