biocypher 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biocypher might be problematic. Click here for more details.
- biocypher/_config/biocypher_config.yaml +21 -4
- biocypher/_metadata.py +1 -1
- biocypher/_ontology.py +144 -51
- biocypher/_translate.py +84 -79
- biocypher/output/write/_batch_writer.py +133 -52
- biocypher/output/write/_get_writer.py +28 -11
- biocypher/output/write/_writer.py +32 -14
- biocypher/output/write/graph/_arangodb.py +44 -32
- biocypher/output/write/graph/_neo4j.py +3 -4
- biocypher/output/write/graph/_owl.py +569 -0
- biocypher/output/write/graph/_rdf.py +234 -97
- {biocypher-0.7.0.dist-info → biocypher-0.9.0.dist-info}/METADATA +1 -1
- {biocypher-0.7.0.dist-info → biocypher-0.9.0.dist-info}/RECORD +15 -14
- {biocypher-0.7.0.dist-info → biocypher-0.9.0.dist-info}/LICENSE +0 -0
- {biocypher-0.7.0.dist-info → biocypher-0.9.0.dist-info}/WHEEL +0 -0
biocypher/_translate.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
"""BioCypher 'translation' module.
|
|
2
|
+
|
|
3
|
+
Responsible for translating between the raw input data and the
|
|
4
|
+
BioCypherNode and BioCypherEdge objects.
|
|
4
5
|
"""
|
|
5
6
|
|
|
6
7
|
from collections.abc import Generator, Iterable
|
|
7
|
-
from typing import Any
|
|
8
|
+
from typing import Any
|
|
8
9
|
|
|
9
10
|
from more_itertools import peekable
|
|
10
11
|
|
|
@@ -19,21 +20,23 @@ __all__ = ["Translator"]
|
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
class Translator:
|
|
22
|
-
"""
|
|
23
|
-
|
|
24
|
-
the schema_config.yaml file. Creates a mapping
|
|
25
|
-
and, given nodes and edges, translates them into
|
|
26
|
-
BioCypherEdges. During this process, can also filter the
|
|
27
|
-
entities if the schema_config.yaml file specifies a property
|
|
28
|
-
blacklist.
|
|
23
|
+
"""Class responsible for exacting the translation process.
|
|
24
|
+
|
|
25
|
+
Translation is configured in the schema_config.yaml file. Creates a mapping
|
|
26
|
+
dictionary from that file, and, given nodes and edges, translates them into
|
|
27
|
+
BioCypherNodes and BioCypherEdges. During this process, can also filter the
|
|
28
|
+
properties of the entities if the schema_config.yaml file specifies a property
|
|
29
|
+
whitelist or blacklist.
|
|
29
30
|
|
|
30
31
|
Provides utility functions for translating between input and output labels
|
|
31
32
|
and cypher queries.
|
|
32
33
|
"""
|
|
33
34
|
|
|
34
35
|
def __init__(self, ontology: "Ontology", strict_mode: bool = False):
|
|
35
|
-
"""
|
|
36
|
+
"""Initialise the translator.
|
|
37
|
+
|
|
36
38
|
Args:
|
|
39
|
+
----
|
|
37
40
|
leaves:
|
|
38
41
|
Dictionary detailing the leaves of the hierarchy
|
|
39
42
|
tree representing the structure of the graph; the leaves are
|
|
@@ -43,8 +46,8 @@ class Translator:
|
|
|
43
46
|
strict_mode:
|
|
44
47
|
If True, the translator will raise an error if input data do not
|
|
45
48
|
carry source, licence, and version information.
|
|
46
|
-
"""
|
|
47
49
|
|
|
50
|
+
"""
|
|
48
51
|
self.ontology = ontology
|
|
49
52
|
self.strict_mode = strict_mode
|
|
50
53
|
|
|
@@ -59,11 +62,7 @@ class Translator:
|
|
|
59
62
|
|
|
60
63
|
def translate_entities(self, entities):
|
|
61
64
|
entities = peekable(entities)
|
|
62
|
-
if (
|
|
63
|
-
isinstance(entities.peek(), BioCypherNode)
|
|
64
|
-
or isinstance(entities.peek(), BioCypherEdge)
|
|
65
|
-
or isinstance(entities.peek(), BioCypherRelAsNode)
|
|
66
|
-
):
|
|
65
|
+
if isinstance(entities.peek(), BioCypherEdge | BioCypherNode | BioCypherRelAsNode):
|
|
67
66
|
translated_entities = entities
|
|
68
67
|
elif len(entities.peek()) < 4:
|
|
69
68
|
translated_entities = self.translate_nodes(entities)
|
|
@@ -75,19 +74,20 @@ class Translator:
|
|
|
75
74
|
self,
|
|
76
75
|
node_tuples: Iterable,
|
|
77
76
|
) -> Generator[BioCypherNode, None, None]:
|
|
78
|
-
"""
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
77
|
+
"""Translate input node representation.
|
|
78
|
+
|
|
79
|
+
Translate the node tuples to a representation that conforms to the
|
|
80
|
+
schema of the given BioCypher graph. For now requires explicit
|
|
81
|
+
statement of node type on pass.
|
|
82
82
|
|
|
83
83
|
Args:
|
|
84
|
+
----
|
|
84
85
|
node_tuples (list of tuples): collection of tuples
|
|
85
86
|
representing individual nodes by their unique id and a type
|
|
86
87
|
that is translated from the original database notation to
|
|
87
88
|
the corresponding BioCypher notation.
|
|
88
89
|
|
|
89
90
|
"""
|
|
90
|
-
|
|
91
91
|
self._log_begin_translate(node_tuples, "nodes")
|
|
92
92
|
|
|
93
93
|
for _id, _type, _props in node_tuples:
|
|
@@ -101,10 +101,12 @@ class Translator:
|
|
|
101
101
|
|
|
102
102
|
for prop in required_props:
|
|
103
103
|
if prop not in _props:
|
|
104
|
-
|
|
104
|
+
msg = (
|
|
105
105
|
f"Property `{prop}` missing from node {_id}. "
|
|
106
|
-
"Strict mode is enabled, so this is not allowed."
|
|
106
|
+
"Strict mode is enabled, so this is not allowed.",
|
|
107
107
|
)
|
|
108
|
+
logger.error(msg)
|
|
109
|
+
raise ValueError(msg)
|
|
108
110
|
|
|
109
111
|
# find the node in leaves that represents ontology node type
|
|
110
112
|
_ontology_class = self._get_ontology_mapping(_type)
|
|
@@ -129,10 +131,11 @@ class Translator:
|
|
|
129
131
|
self._log_finish_translate("nodes")
|
|
130
132
|
|
|
131
133
|
def _get_preferred_id(self, _bl_type: str) -> str:
|
|
132
|
-
"""
|
|
133
|
-
Returns the preferred id for the given Biolink type.
|
|
134
|
-
"""
|
|
134
|
+
"""Return the preferred id for the given Biolink type.
|
|
135
135
|
|
|
136
|
+
If the preferred id is not specified in the schema_config.yaml file,
|
|
137
|
+
return "id".
|
|
138
|
+
"""
|
|
136
139
|
return (
|
|
137
140
|
self.ontology.mapping.extended_schema[_bl_type]["preferred_id"]
|
|
138
141
|
if "preferred_id" in self.ontology.mapping.extended_schema.get(_bl_type, {})
|
|
@@ -140,10 +143,11 @@ class Translator:
|
|
|
140
143
|
)
|
|
141
144
|
|
|
142
145
|
def _filter_props(self, bl_type: str, props: dict) -> dict:
|
|
143
|
-
"""
|
|
144
|
-
Filters properties for those specified in schema_config if any.
|
|
145
|
-
"""
|
|
146
|
+
"""Filter properties for those specified in schema_config if any.
|
|
146
147
|
|
|
148
|
+
If the properties are not specified in the schema_config.yaml file,
|
|
149
|
+
return the original properties.
|
|
150
|
+
"""
|
|
147
151
|
filter_props = self.ontology.mapping.extended_schema[bl_type].get("properties", {})
|
|
148
152
|
|
|
149
153
|
# strict mode: add required properties (only if there is a whitelist)
|
|
@@ -179,14 +183,15 @@ class Translator:
|
|
|
179
183
|
def translate_edges(
|
|
180
184
|
self,
|
|
181
185
|
edge_tuples: Iterable,
|
|
182
|
-
) -> Generator[
|
|
183
|
-
"""
|
|
184
|
-
Translates input edge representation to a representation that
|
|
185
|
-
conforms to the schema of the given BioCypher graph. For now
|
|
186
|
-
requires explicit statement of edge type on pass.
|
|
186
|
+
) -> Generator[BioCypherEdge | BioCypherRelAsNode, None, None]:
|
|
187
|
+
"""Translate input edge representation.
|
|
187
188
|
|
|
188
|
-
|
|
189
|
+
Translate the edge tuples to a representation that conforms to the
|
|
190
|
+
schema of the given BioCypher graph. For now requires explicit
|
|
191
|
+
statement of edge type on pass.
|
|
189
192
|
|
|
193
|
+
Args:
|
|
194
|
+
----
|
|
190
195
|
edge_tuples (list of tuples):
|
|
191
196
|
|
|
192
197
|
collection of tuples representing source and target of
|
|
@@ -194,8 +199,8 @@ class Translator:
|
|
|
194
199
|
of interaction in the original database notation, which
|
|
195
200
|
is translated to BioCypher notation using the `leaves`.
|
|
196
201
|
Can optionally possess its own ID.
|
|
197
|
-
"""
|
|
198
202
|
|
|
203
|
+
"""
|
|
199
204
|
self._log_begin_translate(edge_tuples, "edges")
|
|
200
205
|
|
|
201
206
|
# legacy: deal with 4-tuples (no edge id)
|
|
@@ -208,18 +213,22 @@ class Translator:
|
|
|
208
213
|
# check for strict mode requirements
|
|
209
214
|
if self.strict_mode:
|
|
210
215
|
if "source" not in _props:
|
|
211
|
-
|
|
212
|
-
f"Edge {_id if _id else (_src, _tar)} does not have a `source` property."
|
|
216
|
+
msg = (
|
|
217
|
+
f"Edge {_id if _id else (_src, _tar)} does not have a `source` property."
|
|
213
218
|
" This is required in strict mode.",
|
|
214
219
|
)
|
|
220
|
+
logger.error(msg)
|
|
221
|
+
raise ValueError(msg)
|
|
215
222
|
if "licence" not in _props:
|
|
216
|
-
|
|
217
|
-
f"Edge {_id if _id else (_src, _tar)} does not have a `licence` property."
|
|
223
|
+
msg = (
|
|
224
|
+
f"Edge {_id if _id else (_src, _tar)} does not have a `licence` property."
|
|
218
225
|
" This is required in strict mode.",
|
|
219
226
|
)
|
|
227
|
+
logger.error(msg)
|
|
228
|
+
raise ValueError(msg)
|
|
220
229
|
|
|
221
230
|
# match the input label (_type) to
|
|
222
|
-
#
|
|
231
|
+
# an ontology label from schema_config
|
|
223
232
|
bl_type = self._get_ontology_mapping(_type)
|
|
224
233
|
|
|
225
234
|
if bl_type:
|
|
@@ -295,12 +304,12 @@ class Translator:
|
|
|
295
304
|
self._log_finish_translate("edges")
|
|
296
305
|
|
|
297
306
|
def _record_no_type(self, _type: Any, what: Any) -> None:
|
|
298
|
-
"""
|
|
299
|
-
Records the type of a node or edge that is not represented in the
|
|
300
|
-
schema_config.
|
|
301
|
-
"""
|
|
307
|
+
"""Record the type of a non-represented node or edge.
|
|
302
308
|
|
|
303
|
-
|
|
309
|
+
In case of an entity that is not represented in the schema_config,
|
|
310
|
+
record the type and the entity.
|
|
311
|
+
"""
|
|
312
|
+
logger.error(f"No ontology type defined for `{_type}`: {what}")
|
|
304
313
|
|
|
305
314
|
if self.notype.get(_type, None):
|
|
306
315
|
self.notype[_type] += 1
|
|
@@ -309,11 +318,11 @@ class Translator:
|
|
|
309
318
|
self.notype[_type] = 1
|
|
310
319
|
|
|
311
320
|
def get_missing_biolink_types(self) -> dict:
|
|
312
|
-
"""
|
|
313
|
-
Returns a dictionary of types that were not represented in the
|
|
314
|
-
schema_config.
|
|
315
|
-
"""
|
|
321
|
+
"""Return a dictionary of non-represented types.
|
|
316
322
|
|
|
323
|
+
The dictionary contains the type as the key and the number of
|
|
324
|
+
occurrences as the value.
|
|
325
|
+
"""
|
|
317
326
|
return self.notype
|
|
318
327
|
|
|
319
328
|
@staticmethod
|
|
@@ -327,12 +336,10 @@ class Translator:
|
|
|
327
336
|
logger.debug(f"Finished translating {what} to BioCypher.")
|
|
328
337
|
|
|
329
338
|
def _update_ontology_types(self):
|
|
330
|
-
"""
|
|
331
|
-
Creates a dictionary to translate from input labels to ontology labels.
|
|
339
|
+
"""Create a dictionary to translate from input to ontology labels.
|
|
332
340
|
|
|
333
341
|
If multiple input labels, creates mapping for each.
|
|
334
342
|
"""
|
|
335
|
-
|
|
336
343
|
self._ontology_mapping = {}
|
|
337
344
|
|
|
338
345
|
for key, value in self.ontology.mapping.extended_schema.items():
|
|
@@ -351,47 +358,45 @@ class Translator:
|
|
|
351
358
|
else:
|
|
352
359
|
self._add_translation_mappings(labels, key)
|
|
353
360
|
|
|
354
|
-
def _get_ontology_mapping(self, label: str) ->
|
|
355
|
-
"""
|
|
361
|
+
def _get_ontology_mapping(self, label: str) -> str | None:
|
|
362
|
+
"""Find the ontology class for the given input type.
|
|
363
|
+
|
|
356
364
|
For each given input type ("input_label" or "label_in_input"), find the
|
|
357
365
|
corresponding ontology class in the leaves dictionary (from the
|
|
358
366
|
`schema_config.yam`).
|
|
359
367
|
|
|
360
368
|
Args:
|
|
369
|
+
----
|
|
361
370
|
label:
|
|
362
371
|
The input type to find (`input_label` or `label_in_input` in
|
|
363
372
|
`schema_config.yaml`).
|
|
364
|
-
"""
|
|
365
373
|
|
|
374
|
+
"""
|
|
375
|
+
# FIXME does not seem like a necessary function.
|
|
366
376
|
# commented out until behaviour of _update_bl_types is fixed
|
|
367
377
|
return self._ontology_mapping.get(label, None)
|
|
368
378
|
|
|
369
379
|
def translate_term(self, term):
|
|
370
|
-
"""
|
|
371
|
-
Translate a single term.
|
|
372
|
-
"""
|
|
373
|
-
|
|
380
|
+
"""Translate a single term."""
|
|
374
381
|
return self.mappings.get(term, None)
|
|
375
382
|
|
|
376
383
|
def reverse_translate_term(self, term):
|
|
377
|
-
"""
|
|
378
|
-
Reverse translate a single term.
|
|
379
|
-
"""
|
|
380
|
-
|
|
384
|
+
"""Reverse translate a single term."""
|
|
381
385
|
return self.reverse_mappings.get(term, None)
|
|
382
386
|
|
|
383
387
|
def translate(self, query):
|
|
384
|
-
"""
|
|
385
|
-
|
|
388
|
+
"""Translate a cypher query.
|
|
389
|
+
|
|
390
|
+
Only translates labels as of now.
|
|
386
391
|
"""
|
|
387
392
|
for key in self.mappings:
|
|
388
393
|
query = query.replace(":" + key, ":" + self.mappings[key])
|
|
389
394
|
return query
|
|
390
395
|
|
|
391
396
|
def reverse_translate(self, query):
|
|
392
|
-
"""
|
|
393
|
-
|
|
394
|
-
now.
|
|
397
|
+
"""Reverse translate a cypher query.
|
|
398
|
+
|
|
399
|
+
Only translates labels as of now.
|
|
395
400
|
"""
|
|
396
401
|
for key in self.reverse_mappings:
|
|
397
402
|
a = ":" + key + ")"
|
|
@@ -399,12 +404,14 @@ class Translator:
|
|
|
399
404
|
# TODO this conditional probably does not cover all cases
|
|
400
405
|
if a in query or b in query:
|
|
401
406
|
if isinstance(self.reverse_mappings[key], list):
|
|
402
|
-
|
|
407
|
+
msg = (
|
|
403
408
|
"Reverse translation of multiple inputs not "
|
|
404
409
|
"implemented yet. Many-to-one mappings are "
|
|
405
410
|
"not reversible. "
|
|
406
411
|
f"({key} -> {self.reverse_mappings[key]})",
|
|
407
412
|
)
|
|
413
|
+
logger.error(msg)
|
|
414
|
+
raise NotImplementedError(msg)
|
|
408
415
|
else:
|
|
409
416
|
query = query.replace(
|
|
410
417
|
a,
|
|
@@ -413,10 +420,10 @@ class Translator:
|
|
|
413
420
|
return query
|
|
414
421
|
|
|
415
422
|
def _add_translation_mappings(self, original_name, biocypher_name):
|
|
416
|
-
"""
|
|
417
|
-
|
|
418
|
-
PascalCase version of the BioCypher name, since
|
|
419
|
-
not useful for Cypher queries.
|
|
423
|
+
"""Add translation mappings for a label and name.
|
|
424
|
+
|
|
425
|
+
We use here the PascalCase version of the BioCypher name, since
|
|
426
|
+
sentence case is not useful for Cypher queries.
|
|
420
427
|
"""
|
|
421
428
|
if isinstance(original_name, list):
|
|
422
429
|
for on in original_name:
|
|
@@ -444,9 +451,7 @@ class Translator:
|
|
|
444
451
|
|
|
445
452
|
@staticmethod
|
|
446
453
|
def name_sentence_to_pascal(name: str) -> str:
|
|
447
|
-
"""
|
|
448
|
-
Converts a name in sentence case to pascal case.
|
|
449
|
-
"""
|
|
454
|
+
"""Convert a name in sentence case to pascal case."""
|
|
450
455
|
# split on dots if dot is present
|
|
451
456
|
if "." in name:
|
|
452
457
|
return ".".join(
|