biocypher 0.6.2__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biocypher might be problematic. Click here for more details.
- biocypher/__init__.py +3 -13
- biocypher/_config/__init__.py +6 -23
- biocypher/_core.py +360 -262
- biocypher/_create.py +13 -27
- biocypher/_deduplicate.py +4 -11
- biocypher/_get.py +21 -60
- biocypher/_logger.py +4 -16
- biocypher/_mapping.py +4 -17
- biocypher/_metadata.py +3 -15
- biocypher/_misc.py +14 -28
- biocypher/_ontology.py +127 -212
- biocypher/_translate.py +34 -58
- biocypher/output/connect/_get_connector.py +40 -0
- biocypher/output/connect/_neo4j_driver.py +9 -65
- biocypher/output/in_memory/_get_in_memory_kg.py +34 -0
- biocypher/output/in_memory/_in_memory_kg.py +40 -0
- biocypher/output/in_memory/_networkx.py +44 -0
- biocypher/output/in_memory/_pandas.py +20 -15
- biocypher/output/write/_batch_writer.py +132 -177
- biocypher/output/write/_get_writer.py +11 -24
- biocypher/output/write/_writer.py +14 -33
- biocypher/output/write/graph/_arangodb.py +7 -24
- biocypher/output/write/graph/_neo4j.py +51 -56
- biocypher/output/write/graph/_networkx.py +36 -43
- biocypher/output/write/graph/_rdf.py +107 -95
- biocypher/output/write/relational/_csv.py +6 -11
- biocypher/output/write/relational/_postgresql.py +5 -13
- biocypher/output/write/relational/_sqlite.py +3 -1
- {biocypher-0.6.2.dist-info → biocypher-0.7.0.dist-info}/LICENSE +1 -1
- {biocypher-0.6.2.dist-info → biocypher-0.7.0.dist-info}/METADATA +3 -3
- biocypher-0.7.0.dist-info/RECORD +43 -0
- {biocypher-0.6.2.dist-info → biocypher-0.7.0.dist-info}/WHEEL +1 -1
- biocypher-0.6.2.dist-info/RECORD +0 -39
biocypher/_ontology.py
CHANGED
|
@@ -1,44 +1,33 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
# Copyright 2021, Heidelberg University Clinic
|
|
5
|
-
#
|
|
6
|
-
# File author(s): Sebastian Lobentanzer
|
|
7
|
-
# ...
|
|
8
|
-
#
|
|
9
|
-
# Distributed under MIT licence, see the file `LICENSE`.
|
|
10
|
-
#
|
|
11
|
-
"""
|
|
12
|
-
BioCypher 'ontology' module. Contains classes and functions to handle parsing
|
|
1
|
+
"""BioCypher 'ontology' module. Contains classes and functions to handle parsing
|
|
13
2
|
and representation of single ontologies as well as their hybridisation and
|
|
14
3
|
other advanced operations.
|
|
15
4
|
"""
|
|
16
|
-
import os
|
|
17
|
-
|
|
18
|
-
from ._logger import logger
|
|
19
5
|
|
|
20
|
-
|
|
6
|
+
import os
|
|
21
7
|
|
|
22
|
-
from typing import Optional
|
|
23
8
|
from datetime import datetime
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
import networkx as nx
|
|
12
|
+
import rdflib
|
|
24
13
|
|
|
25
14
|
from rdflib import Graph
|
|
26
15
|
from rdflib.extras.external_graph_libs import rdflib_to_networkx_digraph
|
|
27
|
-
import rdflib
|
|
28
|
-
import networkx as nx
|
|
29
16
|
|
|
17
|
+
from ._logger import logger
|
|
18
|
+
from ._mapping import OntologyMapping
|
|
30
19
|
from ._misc import (
|
|
31
|
-
to_list,
|
|
32
|
-
to_lower_sentence_case,
|
|
33
20
|
create_tree_visualisation,
|
|
34
21
|
sentencecase_to_pascalcase,
|
|
22
|
+
to_list,
|
|
23
|
+
to_lower_sentence_case,
|
|
35
24
|
)
|
|
36
|
-
|
|
25
|
+
|
|
26
|
+
logger.debug(f"Loading module {__name__}.")
|
|
37
27
|
|
|
38
28
|
|
|
39
29
|
class OntologyAdapter:
|
|
40
|
-
"""
|
|
41
|
-
Class that represents an ontology to be used in the Biocypher framework. Can
|
|
30
|
+
"""Class that represents an ontology to be used in the Biocypher framework. Can
|
|
42
31
|
read from a variety of formats, including OWL, OBO, and RDF/XML. The
|
|
43
32
|
ontology is represented by a networkx.DiGraph object; an RDFlib graph is
|
|
44
33
|
also kept. By default, the DiGraph reverses the label and identifier of the
|
|
@@ -52,16 +41,16 @@ class OntologyAdapter:
|
|
|
52
41
|
self,
|
|
53
42
|
ontology_file: str,
|
|
54
43
|
root_label: str,
|
|
55
|
-
ontology_file_format:
|
|
56
|
-
head_join_node_label:
|
|
57
|
-
merge_nodes:
|
|
44
|
+
ontology_file_format: str | None = None,
|
|
45
|
+
head_join_node_label: str | None = None,
|
|
46
|
+
merge_nodes: bool | None = True,
|
|
58
47
|
switch_label_and_id: bool = True,
|
|
59
48
|
remove_prefixes: bool = True,
|
|
60
49
|
):
|
|
61
|
-
"""
|
|
62
|
-
Initialize the OntologyAdapter class.
|
|
50
|
+
"""Initialize the OntologyAdapter class.
|
|
63
51
|
|
|
64
52
|
Args:
|
|
53
|
+
----
|
|
65
54
|
ontology_file (str): Path to the ontology file. Can be local or
|
|
66
55
|
remote.
|
|
67
56
|
|
|
@@ -86,8 +75,8 @@ class OntologyAdapter:
|
|
|
86
75
|
|
|
87
76
|
remove_prefixes (bool): If True, the prefixes of the identifiers will
|
|
88
77
|
be removed. Defaults to True.
|
|
89
|
-
"""
|
|
90
78
|
|
|
79
|
+
"""
|
|
91
80
|
logger.info(f"Instantiating OntologyAdapter class for {ontology_file}.")
|
|
92
81
|
|
|
93
82
|
self._ontology_file = ontology_file
|
|
@@ -100,9 +89,7 @@ class OntologyAdapter:
|
|
|
100
89
|
|
|
101
90
|
self._rdf_graph = self._load_rdf_graph(ontology_file)
|
|
102
91
|
|
|
103
|
-
self._nx_graph = self._rdf_to_nx(
|
|
104
|
-
self._rdf_graph, root_label, switch_label_and_id
|
|
105
|
-
)
|
|
92
|
+
self._nx_graph = self._rdf_to_nx(self._rdf_graph, root_label, switch_label_and_id)
|
|
106
93
|
|
|
107
94
|
def _rdf_to_nx(
|
|
108
95
|
self,
|
|
@@ -111,36 +98,29 @@ class OntologyAdapter:
|
|
|
111
98
|
switch_label_and_id: bool,
|
|
112
99
|
rename_nodes: bool = True,
|
|
113
100
|
) -> nx.DiGraph:
|
|
114
|
-
one_to_one_triples, one_to_many_dict = self._get_relevant_rdf_triples(
|
|
115
|
-
_rdf_graph
|
|
116
|
-
)
|
|
101
|
+
one_to_one_triples, one_to_many_dict = self._get_relevant_rdf_triples(_rdf_graph)
|
|
117
102
|
nx_graph = self._convert_to_nx(one_to_one_triples, one_to_many_dict)
|
|
118
103
|
nx_graph = self._add_labels_to_nodes(nx_graph, switch_label_and_id)
|
|
119
|
-
nx_graph = self._change_nodes_to_biocypher_format(
|
|
120
|
-
|
|
121
|
-
)
|
|
122
|
-
nx_graph = self._get_all_ancestors(
|
|
123
|
-
nx_graph, root_label, switch_label_and_id, rename_nodes
|
|
124
|
-
)
|
|
104
|
+
nx_graph = self._change_nodes_to_biocypher_format(nx_graph, switch_label_and_id, rename_nodes)
|
|
105
|
+
nx_graph = self._get_all_ancestors(nx_graph, root_label, switch_label_and_id, rename_nodes)
|
|
125
106
|
return nx.DiGraph(nx_graph)
|
|
126
107
|
|
|
127
108
|
def _get_relevant_rdf_triples(self, g: rdflib.Graph) -> tuple:
|
|
128
|
-
one_to_one_inheritance_graph = self._get_one_to_one_inheritance_triples(
|
|
129
|
-
g
|
|
130
|
-
)
|
|
109
|
+
one_to_one_inheritance_graph = self._get_one_to_one_inheritance_triples(g)
|
|
131
110
|
intersection = self._get_multiple_inheritance_dict(g)
|
|
132
111
|
return one_to_one_inheritance_graph, intersection
|
|
133
112
|
|
|
134
|
-
def _get_one_to_one_inheritance_triples(
|
|
135
|
-
self, g: rdflib.Graph
|
|
136
|
-
) -> rdflib.Graph:
|
|
113
|
+
def _get_one_to_one_inheritance_triples(self, g: rdflib.Graph) -> rdflib.Graph:
|
|
137
114
|
"""Get the one to one inheritance triples from the RDF graph.
|
|
138
115
|
|
|
139
116
|
Args:
|
|
117
|
+
----
|
|
140
118
|
g (rdflib.Graph): The RDF graph
|
|
141
119
|
|
|
142
120
|
Returns:
|
|
121
|
+
-------
|
|
143
122
|
rdflib.Graph: The one to one inheritance graph
|
|
123
|
+
|
|
144
124
|
"""
|
|
145
125
|
one_to_one_inheritance_graph = Graph()
|
|
146
126
|
for s, p, o in g.triples((None, rdflib.RDFS.subClassOf, None)):
|
|
@@ -152,32 +132,29 @@ class OntologyAdapter:
|
|
|
152
132
|
"""Get the multiple inheritance dictionary from the RDF graph.
|
|
153
133
|
|
|
154
134
|
Args:
|
|
135
|
+
----
|
|
155
136
|
g (rdflib.Graph): The RDF graph
|
|
156
137
|
|
|
157
138
|
Returns:
|
|
139
|
+
-------
|
|
158
140
|
dict: The multiple inheritance dictionary
|
|
141
|
+
|
|
159
142
|
"""
|
|
160
|
-
multiple_inheritance = g.triples(
|
|
161
|
-
(None, rdflib.OWL.intersectionOf, None)
|
|
162
|
-
)
|
|
143
|
+
multiple_inheritance = g.triples((None, rdflib.OWL.intersectionOf, None))
|
|
163
144
|
intersection = {}
|
|
164
145
|
for (
|
|
165
146
|
node,
|
|
166
147
|
has_multiple_parents,
|
|
167
148
|
first_node_of_intersection_list,
|
|
168
149
|
) in multiple_inheritance:
|
|
169
|
-
parents = self._retrieve_rdf_linked_list(
|
|
170
|
-
first_node_of_intersection_list
|
|
171
|
-
)
|
|
150
|
+
parents = self._retrieve_rdf_linked_list(first_node_of_intersection_list)
|
|
172
151
|
child_name = None
|
|
173
152
|
for s_, _, _ in g.triples((None, rdflib.RDFS.subClassOf, node)):
|
|
174
153
|
child_name = s_
|
|
175
154
|
|
|
176
155
|
# Handle Snomed CT post coordinated expressions
|
|
177
156
|
if not child_name:
|
|
178
|
-
for s_, _, _ in g.triples(
|
|
179
|
-
(None, rdflib.OWL.equivalentClass, node)
|
|
180
|
-
):
|
|
157
|
+
for s_, _, _ in g.triples((None, rdflib.OWL.equivalentClass, node)):
|
|
181
158
|
child_name = s_
|
|
182
159
|
|
|
183
160
|
if child_name:
|
|
@@ -191,10 +168,12 @@ class OntologyAdapter:
|
|
|
191
168
|
"""Does the node have a label in g?
|
|
192
169
|
|
|
193
170
|
Args:
|
|
171
|
+
----
|
|
194
172
|
node (rdflib.URIRef): The node to check
|
|
195
173
|
g (rdflib.Graph): The graph to check in
|
|
196
174
|
Returns:
|
|
197
175
|
bool: True if the node has a label, False otherwise
|
|
176
|
+
|
|
198
177
|
"""
|
|
199
178
|
return (node, rdflib.RDFS.label, None) in g
|
|
200
179
|
|
|
@@ -219,48 +198,41 @@ class OntologyAdapter:
|
|
|
219
198
|
rdf_list.extend(self._retrieve_rdf_linked_list(o))
|
|
220
199
|
return rdf_list
|
|
221
200
|
|
|
222
|
-
def _convert_to_nx(
|
|
223
|
-
self, one_to_one: rdflib.Graph, one_to_many: dict
|
|
224
|
-
) -> nx.DiGraph:
|
|
201
|
+
def _convert_to_nx(self, one_to_one: rdflib.Graph, one_to_many: dict) -> nx.DiGraph:
|
|
225
202
|
"""Convert the one to one and one to many inheritance graphs to networkx.
|
|
226
203
|
|
|
227
204
|
Args:
|
|
205
|
+
----
|
|
228
206
|
one_to_one (rdflib.Graph): The one to one inheritance graph
|
|
229
207
|
one_to_many (dict): The one to many inheritance dictionary
|
|
230
208
|
|
|
231
209
|
Returns:
|
|
210
|
+
-------
|
|
232
211
|
nx.DiGraph: The networkx graph
|
|
212
|
+
|
|
233
213
|
"""
|
|
234
|
-
nx_graph = rdflib_to_networkx_digraph(
|
|
235
|
-
one_to_one, edge_attrs=lambda s, p, o: {}, calc_weights=False
|
|
236
|
-
)
|
|
214
|
+
nx_graph = rdflib_to_networkx_digraph(one_to_one, edge_attrs=lambda s, p, o: {}, calc_weights=False)
|
|
237
215
|
for key, value in one_to_many.items():
|
|
238
|
-
nx_graph.add_edges_from(
|
|
239
|
-
[
|
|
240
|
-
(value["child_name"], parent)
|
|
241
|
-
for parent in value["parent_node_names"]
|
|
242
|
-
]
|
|
243
|
-
)
|
|
216
|
+
nx_graph.add_edges_from([(value["child_name"], parent) for parent in value["parent_node_names"]])
|
|
244
217
|
if key in nx_graph.nodes:
|
|
245
218
|
nx_graph.remove_node(key)
|
|
246
219
|
return nx_graph
|
|
247
220
|
|
|
248
|
-
def _add_labels_to_nodes(
|
|
249
|
-
self, nx_graph: nx.DiGraph, switch_label_and_id: bool
|
|
250
|
-
) -> nx.DiGraph:
|
|
221
|
+
def _add_labels_to_nodes(self, nx_graph: nx.DiGraph, switch_label_and_id: bool) -> nx.DiGraph:
|
|
251
222
|
"""Add labels to the nodes in the networkx graph.
|
|
252
223
|
|
|
253
224
|
Args:
|
|
225
|
+
----
|
|
254
226
|
nx_graph (nx.DiGraph): The networkx graph
|
|
255
227
|
switch_label_and_id (bool): If True, id and label are switched
|
|
256
228
|
|
|
257
229
|
Returns:
|
|
230
|
+
-------
|
|
258
231
|
nx.DiGraph: The networkx graph with labels
|
|
232
|
+
|
|
259
233
|
"""
|
|
260
234
|
for node in list(nx_graph.nodes):
|
|
261
|
-
nx_id, nx_label = self._get_nx_id_and_label(
|
|
262
|
-
node, switch_label_and_id
|
|
263
|
-
)
|
|
235
|
+
nx_id, nx_label = self._get_nx_id_and_label(node, switch_label_and_id)
|
|
264
236
|
if nx_id == "none":
|
|
265
237
|
# remove node if it has no id
|
|
266
238
|
nx_graph.remove_node(node)
|
|
@@ -281,18 +253,18 @@ class OntologyAdapter:
|
|
|
281
253
|
- adapt the labels (replace _ with space and convert to lower sentence case)
|
|
282
254
|
|
|
283
255
|
Args:
|
|
256
|
+
----
|
|
284
257
|
nx_graph (nx.DiGraph): The networkx graph
|
|
285
258
|
switch_label_and_id (bool): If True, id and label are switched
|
|
286
259
|
rename_nodes (bool): If True, the nodes are renamed
|
|
287
260
|
|
|
288
261
|
Returns:
|
|
262
|
+
-------
|
|
289
263
|
nx.DiGraph: The networkx ontology graph in BioCypher format
|
|
264
|
+
|
|
290
265
|
"""
|
|
291
266
|
mapping = {
|
|
292
|
-
node: self._get_nx_id_and_label(
|
|
293
|
-
node, switch_label_and_id, rename_nodes
|
|
294
|
-
)[0]
|
|
295
|
-
for node in nx_graph.nodes
|
|
267
|
+
node: self._get_nx_id_and_label(node, switch_label_and_id, rename_nodes)[0] for node in nx_graph.nodes
|
|
296
268
|
}
|
|
297
269
|
renamed = nx.relabel_nodes(nx_graph, mapping, copy=False)
|
|
298
270
|
return renamed
|
|
@@ -307,13 +279,16 @@ class OntologyAdapter:
|
|
|
307
279
|
"""Get all ancestors of the root node in the networkx graph.
|
|
308
280
|
|
|
309
281
|
Args:
|
|
282
|
+
----
|
|
310
283
|
renamed (nx.DiGraph): The renamed networkx graph
|
|
311
284
|
root_label (str): The label of the root node in the ontology
|
|
312
285
|
switch_label_and_id (bool): If True, id and label are switched
|
|
313
286
|
rename_nodes (bool): If True, the nodes are renamed
|
|
314
287
|
|
|
315
288
|
Returns:
|
|
289
|
+
-------
|
|
316
290
|
nx.DiGraph: The filtered networkx graph
|
|
291
|
+
|
|
317
292
|
"""
|
|
318
293
|
root = self._get_nx_id_and_label(
|
|
319
294
|
self._find_root_label(self._rdf_graph, root_label),
|
|
@@ -325,17 +300,18 @@ class OntologyAdapter:
|
|
|
325
300
|
filtered_graph = renamed.subgraph(ancestors)
|
|
326
301
|
return filtered_graph
|
|
327
302
|
|
|
328
|
-
def _get_nx_id_and_label(
|
|
329
|
-
self, node, switch_id_and_label: bool, rename_nodes: bool = True
|
|
330
|
-
) -> tuple[str, str]:
|
|
303
|
+
def _get_nx_id_and_label(self, node, switch_id_and_label: bool, rename_nodes: bool = True) -> tuple[str, str]:
|
|
331
304
|
"""Rename node id and label for nx graph.
|
|
332
305
|
|
|
333
306
|
Args:
|
|
307
|
+
----
|
|
334
308
|
node (str): The node to rename
|
|
335
309
|
switch_id_and_label (bool): If True, switch id and label
|
|
336
310
|
|
|
337
311
|
Returns:
|
|
312
|
+
-------
|
|
338
313
|
tuple[str, str]: The renamed node id and label
|
|
314
|
+
|
|
339
315
|
"""
|
|
340
316
|
node_id_str = self._remove_prefix(str(node))
|
|
341
317
|
node_label_str = str(self._rdf_graph.value(node, rdflib.RDFS.label))
|
|
@@ -348,28 +324,23 @@ class OntologyAdapter:
|
|
|
348
324
|
|
|
349
325
|
def _find_root_label(self, g, root_label):
|
|
350
326
|
# Loop through all labels in the ontology
|
|
351
|
-
for label_subject, _, label_in_ontology in g.triples(
|
|
352
|
-
(None, rdflib.RDFS.label, None)
|
|
353
|
-
):
|
|
327
|
+
for label_subject, _, label_in_ontology in g.triples((None, rdflib.RDFS.label, None)):
|
|
354
328
|
# If the label is the root label, set the root node to the label's subject
|
|
355
329
|
if str(label_in_ontology) == root_label:
|
|
356
330
|
root = label_subject
|
|
357
331
|
break
|
|
358
332
|
else:
|
|
359
333
|
labels_in_ontology = []
|
|
360
|
-
for label_subject, _, label_in_ontology in g.triples(
|
|
361
|
-
(None, rdflib.RDFS.label, None)
|
|
362
|
-
):
|
|
334
|
+
for label_subject, _, label_in_ontology in g.triples((None, rdflib.RDFS.label, None)):
|
|
363
335
|
labels_in_ontology.append(str(label_in_ontology))
|
|
364
336
|
raise ValueError(
|
|
365
337
|
f"Could not find root node with label '{root_label}'. "
|
|
366
|
-
f"The ontology contains the following labels: {labels_in_ontology}"
|
|
338
|
+
f"The ontology contains the following labels: {labels_in_ontology}",
|
|
367
339
|
)
|
|
368
340
|
return root
|
|
369
341
|
|
|
370
342
|
def _remove_prefix(self, uri: str) -> str:
|
|
371
|
-
"""
|
|
372
|
-
Remove the prefix of a URI. URIs can contain either "#" or "/" as a
|
|
343
|
+
"""Remove the prefix of a URI. URIs can contain either "#" or "/" as a
|
|
373
344
|
separator between the prefix and the local name. The prefix is
|
|
374
345
|
everything before the last separator.
|
|
375
346
|
"""
|
|
@@ -379,8 +350,7 @@ class OntologyAdapter:
|
|
|
379
350
|
return uri
|
|
380
351
|
|
|
381
352
|
def _load_rdf_graph(self, ontology_file):
|
|
382
|
-
"""
|
|
383
|
-
Load the ontology into an RDFlib graph. The ontology file can be in
|
|
353
|
+
"""Load the ontology into an RDFlib graph. The ontology file can be in
|
|
384
354
|
OWL, OBO, or RDF/XML format.
|
|
385
355
|
"""
|
|
386
356
|
g = rdflib.Graph()
|
|
@@ -388,9 +358,7 @@ class OntologyAdapter:
|
|
|
388
358
|
return g
|
|
389
359
|
|
|
390
360
|
def _get_format(self, ontology_file):
|
|
391
|
-
"""
|
|
392
|
-
Get the format of the ontology file.
|
|
393
|
-
"""
|
|
361
|
+
"""Get the format of the ontology file."""
|
|
394
362
|
if self._format:
|
|
395
363
|
if self._format == "owl":
|
|
396
364
|
return "application/rdf+xml"
|
|
@@ -401,9 +369,7 @@ class OntologyAdapter:
|
|
|
401
369
|
elif self._format == "ttl":
|
|
402
370
|
return self._format
|
|
403
371
|
else:
|
|
404
|
-
raise ValueError(
|
|
405
|
-
f"Could not determine format of ontology file {ontology_file}"
|
|
406
|
-
)
|
|
372
|
+
raise ValueError(f"Could not determine format of ontology file {ontology_file}")
|
|
407
373
|
|
|
408
374
|
if ontology_file.endswith(".owl"):
|
|
409
375
|
return "application/rdf+xml"
|
|
@@ -414,31 +380,25 @@ class OntologyAdapter:
|
|
|
414
380
|
elif ontology_file.endswith(".ttl"):
|
|
415
381
|
return "ttl"
|
|
416
382
|
else:
|
|
417
|
-
raise ValueError(
|
|
418
|
-
f"Could not determine format of ontology file {ontology_file}"
|
|
419
|
-
)
|
|
383
|
+
raise ValueError(f"Could not determine format of ontology file {ontology_file}")
|
|
420
384
|
|
|
421
385
|
def get_nx_graph(self):
|
|
422
|
-
"""
|
|
423
|
-
Get the networkx graph representing the ontology.
|
|
424
|
-
"""
|
|
386
|
+
"""Get the networkx graph representing the ontology."""
|
|
425
387
|
return self._nx_graph
|
|
426
388
|
|
|
427
389
|
def get_rdf_graph(self):
|
|
428
|
-
"""
|
|
429
|
-
Get the RDFlib graph representing the ontology.
|
|
430
|
-
"""
|
|
390
|
+
"""Get the RDFlib graph representing the ontology."""
|
|
431
391
|
return self._rdf_graph
|
|
432
392
|
|
|
433
393
|
def get_root_node(self):
|
|
434
|
-
"""
|
|
435
|
-
Get root node in the ontology.
|
|
394
|
+
"""Get root node in the ontology.
|
|
436
395
|
|
|
437
|
-
Returns
|
|
396
|
+
Returns
|
|
397
|
+
-------
|
|
438
398
|
root_node: If _switch_label_and_id is True, the root node label is returned,
|
|
439
399
|
otherwise the root node id is returned.
|
|
440
|
-
"""
|
|
441
400
|
|
|
401
|
+
"""
|
|
442
402
|
root_node = None
|
|
443
403
|
root_label = self._root_label.replace("_", " ")
|
|
444
404
|
|
|
@@ -446,30 +406,23 @@ class OntologyAdapter:
|
|
|
446
406
|
root_node = to_lower_sentence_case(root_label)
|
|
447
407
|
elif not self._switch_label_and_id:
|
|
448
408
|
for node, data in self.get_nx_graph().nodes(data=True):
|
|
449
|
-
if "label" in data and data["label"] == to_lower_sentence_case(
|
|
450
|
-
root_label
|
|
451
|
-
):
|
|
409
|
+
if "label" in data and data["label"] == to_lower_sentence_case(root_label):
|
|
452
410
|
root_node = node
|
|
453
411
|
break
|
|
454
412
|
|
|
455
413
|
return root_node
|
|
456
414
|
|
|
457
415
|
def get_ancestors(self, node_label):
|
|
458
|
-
"""
|
|
459
|
-
Get the ancestors of a node in the ontology.
|
|
460
|
-
"""
|
|
416
|
+
"""Get the ancestors of a node in the ontology."""
|
|
461
417
|
return nx.dfs_preorder_nodes(self._nx_graph, node_label)
|
|
462
418
|
|
|
463
419
|
def get_head_join_node(self):
|
|
464
|
-
"""
|
|
465
|
-
Get the head join node of the ontology.
|
|
466
|
-
"""
|
|
420
|
+
"""Get the head join node of the ontology."""
|
|
467
421
|
return self._head_join_node
|
|
468
422
|
|
|
469
423
|
|
|
470
424
|
class Ontology:
|
|
471
|
-
"""
|
|
472
|
-
A class that represents the ontological "backbone" of a BioCypher knowledge
|
|
425
|
+
"""A class that represents the ontological "backbone" of a BioCypher knowledge
|
|
473
426
|
graph. The ontology can be built from a single resource, or hybridised from
|
|
474
427
|
a combination of resources, with one resource being the "head" ontology,
|
|
475
428
|
while an arbitrary number of other resources can become "tail" ontologies at
|
|
@@ -480,18 +433,18 @@ class Ontology:
|
|
|
480
433
|
self,
|
|
481
434
|
head_ontology: dict,
|
|
482
435
|
ontology_mapping: Optional["OntologyMapping"] = None,
|
|
483
|
-
tail_ontologies:
|
|
436
|
+
tail_ontologies: dict | None = None,
|
|
484
437
|
):
|
|
485
|
-
"""
|
|
486
|
-
Initialize the Ontology class.
|
|
438
|
+
"""Initialize the Ontology class.
|
|
487
439
|
|
|
488
440
|
Args:
|
|
441
|
+
----
|
|
489
442
|
head_ontology (OntologyAdapter): The head ontology.
|
|
490
443
|
|
|
491
444
|
tail_ontologies (list): A list of OntologyAdapters that will be
|
|
492
445
|
added to the head ontology. Defaults to None.
|
|
493
|
-
"""
|
|
494
446
|
|
|
447
|
+
"""
|
|
495
448
|
self._head_ontology_meta = head_ontology
|
|
496
449
|
self.mapping = ontology_mapping
|
|
497
450
|
self._tail_ontology_meta = tail_ontologies
|
|
@@ -505,8 +458,7 @@ class Ontology:
|
|
|
505
458
|
self._main()
|
|
506
459
|
|
|
507
460
|
def _main(self) -> None:
|
|
508
|
-
"""
|
|
509
|
-
Main method to be run on instantiation. Loads the ontologies, joins
|
|
461
|
+
"""Main method to be run on instantiation. Loads the ontologies, joins
|
|
510
462
|
them, and returns the hybrid ontology. Loads only the head ontology
|
|
511
463
|
if nothing else is given. Adds user extensions and properties from
|
|
512
464
|
the mapping.
|
|
@@ -529,20 +481,16 @@ class Ontology:
|
|
|
529
481
|
self._add_properties()
|
|
530
482
|
|
|
531
483
|
def _load_ontologies(self) -> None:
|
|
532
|
-
"""
|
|
533
|
-
For each ontology, load the OntologyAdapter object and store it as an
|
|
484
|
+
"""For each ontology, load the OntologyAdapter object and store it as an
|
|
534
485
|
instance variable (head) or a dictionary (tail).
|
|
535
486
|
"""
|
|
536
|
-
|
|
537
487
|
logger.info("Loading ontologies...")
|
|
538
488
|
|
|
539
489
|
self._head_ontology = OntologyAdapter(
|
|
540
490
|
ontology_file=self._head_ontology_meta["url"],
|
|
541
491
|
root_label=self._head_ontology_meta["root_node"],
|
|
542
492
|
ontology_file_format=self._head_ontology_meta.get("format", None),
|
|
543
|
-
switch_label_and_id=self._head_ontology_meta.get(
|
|
544
|
-
"switch_label_and_id", True
|
|
545
|
-
),
|
|
493
|
+
switch_label_and_id=self._head_ontology_meta.get("switch_label_and_id", True),
|
|
546
494
|
)
|
|
547
495
|
|
|
548
496
|
if self._tail_ontology_meta:
|
|
@@ -558,32 +506,25 @@ class Ontology:
|
|
|
558
506
|
)
|
|
559
507
|
|
|
560
508
|
def _get_head_join_node(self, adapter: OntologyAdapter) -> str:
|
|
561
|
-
"""
|
|
562
|
-
Tries to find the head join node of the given ontology adapter in the
|
|
509
|
+
"""Tries to find the head join node of the given ontology adapter in the
|
|
563
510
|
head ontology. If the join node is not found, the method will raise an
|
|
564
511
|
error.
|
|
565
512
|
|
|
566
513
|
Args:
|
|
514
|
+
----
|
|
567
515
|
adapter (OntologyAdapter): The ontology adapter of which to find the
|
|
568
516
|
join node in the head ontology.
|
|
569
|
-
"""
|
|
570
517
|
|
|
518
|
+
"""
|
|
571
519
|
head_join_node = None
|
|
572
520
|
user_defined_head_join_node_label = adapter.get_head_join_node()
|
|
573
|
-
head_join_node_label_in_bc_format = to_lower_sentence_case(
|
|
574
|
-
user_defined_head_join_node_label.replace("_", " ")
|
|
575
|
-
)
|
|
521
|
+
head_join_node_label_in_bc_format = to_lower_sentence_case(user_defined_head_join_node_label.replace("_", " "))
|
|
576
522
|
|
|
577
523
|
if self._head_ontology._switch_label_and_id:
|
|
578
524
|
head_join_node = head_join_node_label_in_bc_format
|
|
579
525
|
elif not self._head_ontology._switch_label_and_id:
|
|
580
|
-
for node_id, data in self._head_ontology.get_nx_graph().nodes(
|
|
581
|
-
data
|
|
582
|
-
):
|
|
583
|
-
if (
|
|
584
|
-
"label" in data
|
|
585
|
-
and data["label"] == head_join_node_label_in_bc_format
|
|
586
|
-
):
|
|
526
|
+
for node_id, data in self._head_ontology.get_nx_graph().nodes(data=True):
|
|
527
|
+
if "label" in data and data["label"] == head_join_node_label_in_bc_format:
|
|
587
528
|
head_join_node = node_id
|
|
588
529
|
break
|
|
589
530
|
|
|
@@ -596,22 +537,20 @@ class Ontology:
|
|
|
596
537
|
)
|
|
597
538
|
raise ValueError(
|
|
598
539
|
f"Head join node '{head_join_node}' not found in head ontology. "
|
|
599
|
-
f"The head ontology contains the following nodes: {head_ontology.nodes}."
|
|
540
|
+
f"The head ontology contains the following nodes: {head_ontology.nodes}.",
|
|
600
541
|
)
|
|
601
542
|
return head_join_node
|
|
602
543
|
|
|
603
|
-
def _join_ontologies(
|
|
604
|
-
|
|
605
|
-
) -> None:
|
|
606
|
-
"""
|
|
607
|
-
Joins the ontologies by adding the tail ontology as a subgraph to the
|
|
544
|
+
def _join_ontologies(self, adapter: OntologyAdapter, head_join_node) -> None:
|
|
545
|
+
"""Joins the ontologies by adding the tail ontology as a subgraph to the
|
|
608
546
|
head ontology at the specified join nodes.
|
|
609
547
|
|
|
610
548
|
Args:
|
|
549
|
+
----
|
|
611
550
|
adapter (OntologyAdapter): The ontology adapter of the tail ontology
|
|
612
551
|
to be added to the head ontology.
|
|
613
|
-
"""
|
|
614
552
|
|
|
553
|
+
"""
|
|
615
554
|
if not self._nx_graph:
|
|
616
555
|
self._nx_graph = self._head_ontology.get_nx_graph().copy()
|
|
617
556
|
|
|
@@ -619,9 +558,7 @@ class Ontology:
|
|
|
619
558
|
tail_ontology = adapter.get_nx_graph()
|
|
620
559
|
|
|
621
560
|
# subtree of tail ontology at join node
|
|
622
|
-
tail_ontology_subtree = nx.dfs_tree(
|
|
623
|
-
tail_ontology.reverse(), tail_join_node
|
|
624
|
-
).reverse()
|
|
561
|
+
tail_ontology_subtree = nx.dfs_tree(tail_ontology.reverse(), tail_join_node).reverse()
|
|
625
562
|
|
|
626
563
|
# transfer node attributes from tail ontology to subtree
|
|
627
564
|
for node in tail_ontology_subtree.nodes:
|
|
@@ -639,21 +576,19 @@ class Ontology:
|
|
|
639
576
|
tail_ontology_subtree.add_edge(tail_join_node, head_join_node)
|
|
640
577
|
|
|
641
578
|
# else rename tail join node to match head join node if necessary
|
|
642
|
-
elif
|
|
643
|
-
tail_ontology_subtree = nx.relabel_nodes(
|
|
644
|
-
tail_ontology_subtree, {tail_join_node: head_join_node}
|
|
645
|
-
)
|
|
579
|
+
elif tail_join_node != head_join_node:
|
|
580
|
+
tail_ontology_subtree = nx.relabel_nodes(tail_ontology_subtree, {tail_join_node: head_join_node})
|
|
646
581
|
|
|
647
582
|
# combine head ontology and tail subtree
|
|
648
583
|
self._nx_graph = nx.compose(self._nx_graph, tail_ontology_subtree)
|
|
649
584
|
|
|
650
585
|
def _extend_ontology(self) -> None:
|
|
651
|
-
"""
|
|
652
|
-
Adds the user extensions to the ontology. Tries to find the parent in
|
|
653
|
-
the ontology, adds it if necessary, and adds the child and a directed
|
|
654
|
-
edge from child to parent. Can handle multiple parents.
|
|
655
|
-
"""
|
|
586
|
+
"""Add the user extensions to the ontology.
|
|
656
587
|
|
|
588
|
+
Tries to find the parent in the ontology, adds it if necessary, and adds
|
|
589
|
+
the child and a directed edge from child to parent. Can handle multiple
|
|
590
|
+
parents.
|
|
591
|
+
"""
|
|
657
592
|
if not self._nx_graph:
|
|
658
593
|
self._nx_graph = self._head_ontology.get_nx_graph().copy()
|
|
659
594
|
|
|
@@ -663,11 +598,13 @@ class Ontology:
|
|
|
663
598
|
continue
|
|
664
599
|
|
|
665
600
|
if not self._nx_graph.has_node(key):
|
|
666
|
-
|
|
667
|
-
f"Node {key} not found in ontology, but also has no "
|
|
668
|
-
"
|
|
669
|
-
"
|
|
601
|
+
msg = (
|
|
602
|
+
f"Node {key} not found in ontology, but also has no inheritance definition. Please check your "
|
|
603
|
+
"schema for spelling errors, first letter not in lower case, use of underscores, a missing "
|
|
604
|
+
"`is_a` definition (SubClassOf a root node), or missing labels in class or super-classes."
|
|
670
605
|
)
|
|
606
|
+
logger.error(msg)
|
|
607
|
+
raise ValueError(msg)
|
|
671
608
|
|
|
672
609
|
continue
|
|
673
610
|
|
|
@@ -679,9 +616,7 @@ class Ontology:
|
|
|
679
616
|
|
|
680
617
|
if parent not in self._nx_graph.nodes:
|
|
681
618
|
self._nx_graph.add_node(parent)
|
|
682
|
-
self._nx_graph.nodes[parent][
|
|
683
|
-
"label"
|
|
684
|
-
] = sentencecase_to_pascalcase(parent)
|
|
619
|
+
self._nx_graph.nodes[parent]["label"] = sentencecase_to_pascalcase(parent)
|
|
685
620
|
|
|
686
621
|
# mark parent as user extension
|
|
687
622
|
self._nx_graph.nodes[parent]["user_extension"] = True
|
|
@@ -689,9 +624,7 @@ class Ontology:
|
|
|
689
624
|
|
|
690
625
|
if child not in self._nx_graph.nodes:
|
|
691
626
|
self._nx_graph.add_node(child)
|
|
692
|
-
self._nx_graph.nodes[child][
|
|
693
|
-
"label"
|
|
694
|
-
] = sentencecase_to_pascalcase(child)
|
|
627
|
+
self._nx_graph.nodes[child]["label"] = sentencecase_to_pascalcase(child)
|
|
695
628
|
|
|
696
629
|
# mark child as user extension
|
|
697
630
|
self._nx_graph.nodes[child]["user_extension"] = True
|
|
@@ -702,10 +635,7 @@ class Ontology:
|
|
|
702
635
|
child = parent
|
|
703
636
|
|
|
704
637
|
def _connect_biolink_classes(self) -> None:
|
|
705
|
-
"""
|
|
706
|
-
Experimental: Adds edges from disjoint classes to the entity node.
|
|
707
|
-
"""
|
|
708
|
-
|
|
638
|
+
"""Experimental: Adds edges from disjoint classes to the entity node."""
|
|
709
639
|
if not self._nx_graph:
|
|
710
640
|
self._nx_graph = self._head_ontology.get_nx_graph().copy()
|
|
711
641
|
|
|
@@ -726,19 +656,15 @@ class Ontology:
|
|
|
726
656
|
for node in disjoint_classes:
|
|
727
657
|
if not self._nx_graph.nodes.get(node):
|
|
728
658
|
self._nx_graph.add_node(node)
|
|
729
|
-
self._nx_graph.nodes[node][
|
|
730
|
-
"label"
|
|
731
|
-
] = sentencecase_to_pascalcase(node)
|
|
659
|
+
self._nx_graph.nodes[node]["label"] = sentencecase_to_pascalcase(node)
|
|
732
660
|
|
|
733
661
|
self._nx_graph.add_edge(node, "entity")
|
|
734
662
|
|
|
735
663
|
def _add_properties(self) -> None:
|
|
736
|
-
"""
|
|
737
|
-
For each entity in the mapping, update the ontology with the properties
|
|
664
|
+
"""For each entity in the mapping, update the ontology with the properties
|
|
738
665
|
specified in the mapping. Updates synonym information in the graph,
|
|
739
666
|
setting the synonym as the primary node label.
|
|
740
667
|
"""
|
|
741
|
-
|
|
742
668
|
for key, value in self.mapping.extended_schema.items():
|
|
743
669
|
if key in self._nx_graph.nodes:
|
|
744
670
|
self._nx_graph.nodes[key].update(value)
|
|
@@ -746,33 +672,29 @@ class Ontology:
|
|
|
746
672
|
if value.get("synonym_for"):
|
|
747
673
|
# change node label to synonym
|
|
748
674
|
if value["synonym_for"] not in self._nx_graph.nodes:
|
|
749
|
-
raise ValueError(
|
|
750
|
-
f'Node {value["synonym_for"]} not found in ontology.'
|
|
751
|
-
)
|
|
675
|
+
raise ValueError(f"Node {value['synonym_for']} not found in ontology.")
|
|
752
676
|
|
|
753
|
-
self._nx_graph = nx.relabel_nodes(
|
|
754
|
-
self._nx_graph, {value["synonym_for"]: key}
|
|
755
|
-
)
|
|
677
|
+
self._nx_graph = nx.relabel_nodes(self._nx_graph, {value["synonym_for"]: key})
|
|
756
678
|
|
|
757
679
|
def get_ancestors(self, node_label: str) -> list:
|
|
758
|
-
"""
|
|
759
|
-
Get the ancestors of a node in the ontology.
|
|
680
|
+
"""Get the ancestors of a node in the ontology.
|
|
760
681
|
|
|
761
682
|
Args:
|
|
683
|
+
----
|
|
762
684
|
node_label (str): The label of the node in the ontology.
|
|
763
685
|
|
|
764
686
|
Returns:
|
|
687
|
+
-------
|
|
765
688
|
list: A list of the ancestors of the node.
|
|
766
|
-
"""
|
|
767
689
|
|
|
690
|
+
"""
|
|
768
691
|
return nx.dfs_tree(self._nx_graph, node_label)
|
|
769
692
|
|
|
770
693
|
def show_ontology_structure(self, to_disk: str = None, full: bool = False):
|
|
771
|
-
"""
|
|
772
|
-
Show the ontology structure using treelib or write to GRAPHML file.
|
|
694
|
+
"""Show the ontology structure using treelib or write to GRAPHML file.
|
|
773
695
|
|
|
774
696
|
Args:
|
|
775
|
-
|
|
697
|
+
----
|
|
776
698
|
to_disk (str): If specified, the ontology structure will be saved
|
|
777
699
|
to disk as a GRAPHML file at the location (directory) specified
|
|
778
700
|
by the `to_disk` string, to be opened in your favourite graph
|
|
@@ -781,15 +703,15 @@ class Ontology:
|
|
|
781
703
|
full (bool): If True, the full ontology structure will be shown,
|
|
782
704
|
including all nodes and edges. If False, only the nodes and
|
|
783
705
|
edges that are relevant to the extended schema will be shown.
|
|
784
|
-
"""
|
|
785
706
|
|
|
707
|
+
"""
|
|
786
708
|
if not full and not self.mapping.extended_schema:
|
|
787
709
|
raise ValueError(
|
|
788
710
|
"You are attempting to visualise a subset of the loaded"
|
|
789
711
|
"ontology, but have not provided a schema configuration. "
|
|
790
712
|
"To display a partial ontology graph, please provide a schema "
|
|
791
713
|
"configuration file; to visualise the full graph, please use "
|
|
792
|
-
"the parameter `full = True`."
|
|
714
|
+
"the parameter `full = True`.",
|
|
793
715
|
)
|
|
794
716
|
|
|
795
717
|
if not self._nx_graph:
|
|
@@ -799,7 +721,7 @@ class Ontology:
|
|
|
799
721
|
msg = f"Showing ontology structure based on {self._head_ontology._ontology_file}"
|
|
800
722
|
|
|
801
723
|
else:
|
|
802
|
-
msg = f"Showing ontology structure based on {len(self._tail_ontology_meta)+1} ontologies: "
|
|
724
|
+
msg = f"Showing ontology structure based on {len(self._tail_ontology_meta) + 1} ontologies: "
|
|
803
725
|
|
|
804
726
|
logger.info(msg)
|
|
805
727
|
|
|
@@ -825,10 +747,7 @@ class Ontology:
|
|
|
825
747
|
if not isinstance(self.mapping.extended_schema[node], dict):
|
|
826
748
|
continue
|
|
827
749
|
if self.mapping.extended_schema[node].get("synonym_for"):
|
|
828
|
-
tree.nodes[node].tag = (
|
|
829
|
-
f"{node} = "
|
|
830
|
-
f"{self.mapping.extended_schema[node].get('synonym_for')}"
|
|
831
|
-
)
|
|
750
|
+
tree.nodes[node].tag = f"{node} = {self.mapping.extended_schema[node].get('synonym_for')}"
|
|
832
751
|
|
|
833
752
|
logger.info(f"\n{tree}")
|
|
834
753
|
|
|
@@ -859,11 +778,9 @@ class Ontology:
|
|
|
859
778
|
return True
|
|
860
779
|
|
|
861
780
|
def get_dict(self) -> dict:
|
|
862
|
-
"""
|
|
863
|
-
Returns a dictionary compatible with a BioCypher node for compatibility
|
|
781
|
+
"""Returns a dictionary compatible with a BioCypher node for compatibility
|
|
864
782
|
with the Neo4j driver.
|
|
865
783
|
"""
|
|
866
|
-
|
|
867
784
|
d = {
|
|
868
785
|
"node_id": self._get_current_id(),
|
|
869
786
|
"node_label": "BioCypher",
|
|
@@ -875,13 +792,11 @@ class Ontology:
|
|
|
875
792
|
return d
|
|
876
793
|
|
|
877
794
|
def _get_current_id(self):
|
|
878
|
-
"""
|
|
879
|
-
Instantiate a version ID for the current session. For now does simple
|
|
795
|
+
"""Instantiate a version ID for the current session. For now does simple
|
|
880
796
|
versioning using datetime.
|
|
881
797
|
|
|
882
798
|
Can later implement incremental versioning, versioning from
|
|
883
799
|
config file, or manual specification via argument.
|
|
884
800
|
"""
|
|
885
|
-
|
|
886
801
|
now = datetime.now()
|
|
887
802
|
return now.strftime("v%Y%m%d-%H%M%S")
|