biocypher 0.5.41__py3-none-any.whl → 0.5.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biocypher might be problematic. Click here for more details.
- biocypher/_config/biocypher_config.yaml +15 -0
- biocypher/_core.py +3 -3
- biocypher/_metadata.py +1 -1
- biocypher/_misc.py +6 -1
- biocypher/_ontology.py +133 -53
- biocypher/{_connect.py → output/connect/_neo4j_driver.py} +5 -5
- biocypher/{_pandas.py → output/in_memory/_pandas.py} +2 -1
- biocypher/output/write/__init__.py +0 -0
- biocypher/{write → output/write}/_batch_writer.py +26 -22
- biocypher/{write/_write.py → output/write/_get_writer.py} +19 -11
- biocypher/output/write/_writer.py +200 -0
- biocypher/output/write/graph/__init__.py +0 -0
- biocypher/{write → output/write}/graph/_arangodb.py +1 -1
- biocypher/{write → output/write}/graph/_neo4j.py +2 -4
- biocypher/output/write/graph/_networkx.py +76 -0
- biocypher/output/write/graph/_rdf.py +515 -0
- biocypher/output/write/relational/__init__.py +0 -0
- biocypher/output/write/relational/_csv.py +76 -0
- biocypher/{write → output/write}/relational/_postgresql.py +2 -2
- biocypher/{write → output/write}/relational/_sqlite.py +1 -1
- {biocypher-0.5.41.dist-info → biocypher-0.5.43.dist-info}/METADATA +1 -1
- biocypher-0.5.43.dist-info/RECORD +39 -0
- biocypher-0.5.41.dist-info/RECORD +0 -32
- /biocypher/{write → output}/__init__.py +0 -0
- /biocypher/{write/graph → output/connect}/__init__.py +0 -0
- /biocypher/{write/relational → output/in_memory}/__init__.py +0 -0
- {biocypher-0.5.41.dist-info → biocypher-0.5.43.dist-info}/LICENSE +0 -0
- {biocypher-0.5.41.dist-info → biocypher-0.5.43.dist-info}/WHEEL +0 -0
|
@@ -27,6 +27,7 @@ biocypher:
|
|
|
27
27
|
head_ontology:
|
|
28
28
|
url: https://github.com/biolink/biolink-model/raw/v3.2.1/biolink-model.owl.ttl
|
|
29
29
|
root_node: entity
|
|
30
|
+
# switch_label_and_id: true
|
|
30
31
|
|
|
31
32
|
### Optional parameters ###
|
|
32
33
|
|
|
@@ -53,10 +54,12 @@ biocypher:
|
|
|
53
54
|
# url: test/ontologies/so.owl
|
|
54
55
|
# head_join_node: sequence variant
|
|
55
56
|
# tail_join_node: sequence_variant
|
|
57
|
+
# switch_label_and_id: true
|
|
56
58
|
# mondo:
|
|
57
59
|
# url: test/ontologies/mondo.owl
|
|
58
60
|
# head_join_node: disease
|
|
59
61
|
# tail_join_node: disease
|
|
62
|
+
# switch_label_and_id: true
|
|
60
63
|
|
|
61
64
|
### DBMS configuration ###
|
|
62
65
|
|
|
@@ -113,6 +116,10 @@ postgresql:
|
|
|
113
116
|
# import_call_bin_prefix: '' # path to "psql"
|
|
114
117
|
# import_call_file_prefix: '/path/to/files'
|
|
115
118
|
|
|
119
|
+
rdf:
|
|
120
|
+
### RDF configuration ###
|
|
121
|
+
rdf_format: turtle
|
|
122
|
+
|
|
116
123
|
sqlite:
|
|
117
124
|
### SQLite configuration ###
|
|
118
125
|
|
|
@@ -124,3 +131,11 @@ sqlite:
|
|
|
124
131
|
delimiter: '\t'
|
|
125
132
|
# import_call_bin_prefix: '' # path to "sqlite3"
|
|
126
133
|
# import_call_file_prefix: '/path/to/files'
|
|
134
|
+
|
|
135
|
+
csv:
|
|
136
|
+
### CSV/Pandas configuration ###
|
|
137
|
+
delimiter: ","
|
|
138
|
+
|
|
139
|
+
networkx:
|
|
140
|
+
### NetworkX configuration ###
|
|
141
|
+
some_config: some_value # placeholder for technical reasons TODO
|
biocypher/_core.py
CHANGED
|
@@ -26,17 +26,17 @@ from ._logger import logger
|
|
|
26
26
|
|
|
27
27
|
logger.debug(f"Loading module {__name__}.")
|
|
28
28
|
|
|
29
|
-
from biocypher.write._write import DBMS_TO_CLASS, get_writer
|
|
30
29
|
from ._get import Downloader
|
|
31
30
|
from ._config import config as _config
|
|
32
31
|
from ._config import update_from_file as _file_update
|
|
33
32
|
from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
|
|
34
|
-
from ._pandas import Pandas
|
|
35
|
-
from ._connect import get_driver
|
|
36
33
|
from ._mapping import OntologyMapping
|
|
37
34
|
from ._ontology import Ontology
|
|
38
35
|
from ._translate import Translator
|
|
39
36
|
from ._deduplicate import Deduplicator
|
|
37
|
+
from .output.in_memory._pandas import Pandas
|
|
38
|
+
from .output.write._get_writer import DBMS_TO_CLASS, get_writer
|
|
39
|
+
from .output.connect._neo4j_driver import get_driver
|
|
40
40
|
|
|
41
41
|
__all__ = ["BioCypher"]
|
|
42
42
|
|
biocypher/_metadata.py
CHANGED
biocypher/_misc.py
CHANGED
|
@@ -115,7 +115,12 @@ def _get_inheritance_tree(inheritance_graph: Union[dict, nx.Graph]) -> dict:
|
|
|
115
115
|
)
|
|
116
116
|
if multiple_parents_present:
|
|
117
117
|
logger.warning(
|
|
118
|
-
"The ontology contains multiple inheritance (one child node
|
|
118
|
+
"The ontology contains multiple inheritance (one child node "
|
|
119
|
+
"has multiple parent nodes). This is not visualized in the "
|
|
120
|
+
"following hierarchy tree (the child node is only added once). "
|
|
121
|
+
"If you wish to browse all relationships of the parsed "
|
|
122
|
+
"ontologies, write a graphml file to disk using "
|
|
123
|
+
"`to_disk = <directory>` and view this file."
|
|
119
124
|
)
|
|
120
125
|
|
|
121
126
|
# unlist values
|
biocypher/_ontology.py
CHANGED
|
@@ -43,19 +43,19 @@ class OntologyAdapter:
|
|
|
43
43
|
ontology is represented by a networkx.DiGraph object; an RDFlib graph is
|
|
44
44
|
also kept. By default, the DiGraph reverses the label and identifier of the
|
|
45
45
|
nodes, such that the node name in the graph is the human-readable label. The
|
|
46
|
-
edges are oriented from child to parent.
|
|
47
|
-
|
|
48
|
-
|
|
46
|
+
edges are oriented from child to parent.
|
|
47
|
+
Labels are formatted in lower sentence case and underscores are replaced by spaces.
|
|
48
|
+
Identifiers are taken as defined and the prefixes are removed by default.
|
|
49
49
|
"""
|
|
50
50
|
|
|
51
51
|
def __init__(
|
|
52
52
|
self,
|
|
53
53
|
ontology_file: str,
|
|
54
54
|
root_label: str,
|
|
55
|
-
|
|
56
|
-
|
|
55
|
+
ontology_file_format: Optional[str] = None,
|
|
56
|
+
head_join_node_label: Optional[str] = None,
|
|
57
57
|
merge_nodes: Optional[bool] = True,
|
|
58
|
-
|
|
58
|
+
switch_label_and_id: bool = True,
|
|
59
59
|
remove_prefixes: bool = True,
|
|
60
60
|
):
|
|
61
61
|
"""
|
|
@@ -68,7 +68,10 @@ class OntologyAdapter:
|
|
|
68
68
|
root_label (str): The label of the root node in the ontology. In
|
|
69
69
|
case of a tail ontology, this is the tail join node.
|
|
70
70
|
|
|
71
|
-
|
|
71
|
+
ontology_file_format (str): The format of the ontology file (e.g. "application/rdf+xml")
|
|
72
|
+
If format is not passed, it is determined automatically.
|
|
73
|
+
|
|
74
|
+
head_join_node_label (str): Optional variable to store the label of the
|
|
72
75
|
node in the head ontology that should be used to join to the
|
|
73
76
|
root node of the tail ontology. Defaults to None.
|
|
74
77
|
|
|
@@ -77,7 +80,7 @@ class OntologyAdapter:
|
|
|
77
80
|
tail join node will be attached as a child of the head join
|
|
78
81
|
node.
|
|
79
82
|
|
|
80
|
-
|
|
83
|
+
switch_label_and_id (bool): If True, the node names in the graph will be
|
|
81
84
|
the human-readable labels. If False, the node names will be the
|
|
82
85
|
identifiers. Defaults to True.
|
|
83
86
|
|
|
@@ -89,33 +92,37 @@ class OntologyAdapter:
|
|
|
89
92
|
|
|
90
93
|
self._ontology_file = ontology_file
|
|
91
94
|
self._root_label = root_label
|
|
92
|
-
self._format =
|
|
95
|
+
self._format = ontology_file_format
|
|
93
96
|
self._merge_nodes = merge_nodes
|
|
94
|
-
self._head_join_node =
|
|
95
|
-
self.
|
|
97
|
+
self._head_join_node = head_join_node_label
|
|
98
|
+
self._switch_label_and_id = switch_label_and_id
|
|
96
99
|
self._remove_prefixes = remove_prefixes
|
|
97
100
|
|
|
98
101
|
self._rdf_graph = self._load_rdf_graph(ontology_file)
|
|
99
102
|
|
|
100
103
|
self._nx_graph = self._rdf_to_nx(
|
|
101
|
-
self._rdf_graph, root_label,
|
|
104
|
+
self._rdf_graph, root_label, switch_label_and_id
|
|
102
105
|
)
|
|
103
106
|
|
|
104
107
|
def _rdf_to_nx(
|
|
105
|
-
self,
|
|
108
|
+
self,
|
|
109
|
+
_rdf_graph: rdflib.Graph,
|
|
110
|
+
root_label: str,
|
|
111
|
+
switch_label_and_id: bool,
|
|
112
|
+
rename_nodes: bool = True,
|
|
106
113
|
) -> nx.DiGraph:
|
|
107
114
|
one_to_one_triples, one_to_many_dict = self._get_relevant_rdf_triples(
|
|
108
115
|
_rdf_graph
|
|
109
116
|
)
|
|
110
117
|
nx_graph = self._convert_to_nx(one_to_one_triples, one_to_many_dict)
|
|
111
|
-
|
|
112
|
-
|
|
118
|
+
nx_graph = self._add_labels_to_nodes(nx_graph, switch_label_and_id)
|
|
119
|
+
nx_graph = self._change_nodes_to_biocypher_format(
|
|
120
|
+
nx_graph, switch_label_and_id, rename_nodes
|
|
113
121
|
)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
renamed_graph, root_label, reverse_labels
|
|
122
|
+
nx_graph = self._get_all_ancestors(
|
|
123
|
+
nx_graph, root_label, switch_label_and_id, rename_nodes
|
|
117
124
|
)
|
|
118
|
-
return nx.DiGraph(
|
|
125
|
+
return nx.DiGraph(nx_graph)
|
|
119
126
|
|
|
120
127
|
def _get_relevant_rdf_triples(self, g: rdflib.Graph) -> tuple:
|
|
121
128
|
one_to_one_inheritance_graph = self._get_one_to_one_inheritance_triples(
|
|
@@ -239,19 +246,21 @@ class OntologyAdapter:
|
|
|
239
246
|
return nx_graph
|
|
240
247
|
|
|
241
248
|
def _add_labels_to_nodes(
|
|
242
|
-
self, nx_graph: nx.DiGraph,
|
|
249
|
+
self, nx_graph: nx.DiGraph, switch_label_and_id: bool
|
|
243
250
|
) -> nx.DiGraph:
|
|
244
251
|
"""Add labels to the nodes in the networkx graph.
|
|
245
252
|
|
|
246
253
|
Args:
|
|
247
254
|
nx_graph (nx.DiGraph): The networkx graph
|
|
248
|
-
|
|
255
|
+
switch_label_and_id (bool): If True, id and label are switched
|
|
249
256
|
|
|
250
257
|
Returns:
|
|
251
258
|
nx.DiGraph: The networkx graph with labels
|
|
252
259
|
"""
|
|
253
260
|
for node in list(nx_graph.nodes):
|
|
254
|
-
nx_id, nx_label = self._get_nx_id_and_label(
|
|
261
|
+
nx_id, nx_label = self._get_nx_id_and_label(
|
|
262
|
+
node, switch_label_and_id
|
|
263
|
+
)
|
|
255
264
|
if nx_id == "none":
|
|
256
265
|
# remove node if it has no id
|
|
257
266
|
nx_graph.remove_node(node)
|
|
@@ -260,39 +269,56 @@ class OntologyAdapter:
|
|
|
260
269
|
nx_graph.nodes[node]["label"] = nx_label
|
|
261
270
|
return nx_graph
|
|
262
271
|
|
|
263
|
-
def
|
|
264
|
-
self,
|
|
272
|
+
def _change_nodes_to_biocypher_format(
|
|
273
|
+
self,
|
|
274
|
+
nx_graph: nx.DiGraph,
|
|
275
|
+
switch_label_and_id: bool,
|
|
276
|
+
rename_nodes: bool = True,
|
|
265
277
|
) -> nx.DiGraph:
|
|
266
|
-
"""
|
|
278
|
+
"""Change the nodes in the networkx graph to BioCypher format:
|
|
279
|
+
- remove the prefix of the identifier
|
|
280
|
+
- switch id and label
|
|
281
|
+
- adapt the labels (replace _ with space and convert to lower sentence case)
|
|
267
282
|
|
|
268
283
|
Args:
|
|
269
284
|
nx_graph (nx.DiGraph): The networkx graph
|
|
270
|
-
|
|
285
|
+
switch_label_and_id (bool): If True, id and label are switched
|
|
286
|
+
rename_nodes (bool): If True, the nodes are renamed
|
|
271
287
|
|
|
272
288
|
Returns:
|
|
273
|
-
nx.DiGraph: The
|
|
289
|
+
nx.DiGraph: The networkx ontology graph in BioCypher format
|
|
274
290
|
"""
|
|
275
291
|
mapping = {
|
|
276
|
-
node: self._get_nx_id_and_label(
|
|
292
|
+
node: self._get_nx_id_and_label(
|
|
293
|
+
node, switch_label_and_id, rename_nodes
|
|
294
|
+
)[0]
|
|
277
295
|
for node in nx_graph.nodes
|
|
278
296
|
}
|
|
279
297
|
renamed = nx.relabel_nodes(nx_graph, mapping, copy=False)
|
|
280
298
|
return renamed
|
|
281
299
|
|
|
282
300
|
def _get_all_ancestors(
|
|
283
|
-
self,
|
|
301
|
+
self,
|
|
302
|
+
renamed: nx.DiGraph,
|
|
303
|
+
root_label: str,
|
|
304
|
+
switch_label_and_id: bool,
|
|
305
|
+
rename_nodes: bool = True,
|
|
284
306
|
) -> nx.DiGraph:
|
|
285
307
|
"""Get all ancestors of the root node in the networkx graph.
|
|
286
308
|
|
|
287
309
|
Args:
|
|
288
310
|
renamed (nx.DiGraph): The renamed networkx graph
|
|
289
311
|
root_label (str): The label of the root node in the ontology
|
|
312
|
+
switch_label_and_id (bool): If True, id and label are switched
|
|
313
|
+
rename_nodes (bool): If True, the nodes are renamed
|
|
290
314
|
|
|
291
315
|
Returns:
|
|
292
316
|
nx.DiGraph: The filtered networkx graph
|
|
293
317
|
"""
|
|
294
318
|
root = self._get_nx_id_and_label(
|
|
295
|
-
self._find_root_label(self._rdf_graph, root_label),
|
|
319
|
+
self._find_root_label(self._rdf_graph, root_label),
|
|
320
|
+
switch_label_and_id,
|
|
321
|
+
rename_nodes,
|
|
296
322
|
)[0]
|
|
297
323
|
ancestors = nx.ancestors(renamed, root)
|
|
298
324
|
ancestors.add(root)
|
|
@@ -300,7 +326,7 @@ class OntologyAdapter:
|
|
|
300
326
|
return filtered_graph
|
|
301
327
|
|
|
302
328
|
def _get_nx_id_and_label(
|
|
303
|
-
self, node, switch_id_and_label: bool
|
|
329
|
+
self, node, switch_id_and_label: bool, rename_nodes: bool = True
|
|
304
330
|
) -> tuple[str, str]:
|
|
305
331
|
"""Rename node id and label for nx graph.
|
|
306
332
|
|
|
@@ -312,10 +338,10 @@ class OntologyAdapter:
|
|
|
312
338
|
tuple[str, str]: The renamed node id and label
|
|
313
339
|
"""
|
|
314
340
|
node_id_str = self._remove_prefix(str(node))
|
|
315
|
-
node_label_str = str(
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
341
|
+
node_label_str = str(self._rdf_graph.value(node, rdflib.RDFS.label))
|
|
342
|
+
if rename_nodes:
|
|
343
|
+
node_label_str = node_label_str.replace("_", " ")
|
|
344
|
+
node_label_str = to_lower_sentence_case(node_label_str)
|
|
319
345
|
nx_id = node_label_str if switch_id_and_label else node_id_str
|
|
320
346
|
nx_label = node_id_str if switch_id_and_label else node_label_str
|
|
321
347
|
return nx_id, nx_label
|
|
@@ -330,8 +356,14 @@ class OntologyAdapter:
|
|
|
330
356
|
root = label_subject
|
|
331
357
|
break
|
|
332
358
|
else:
|
|
359
|
+
labels_in_ontology = []
|
|
360
|
+
for label_subject, _, label_in_ontology in g.triples(
|
|
361
|
+
(None, rdflib.RDFS.label, None)
|
|
362
|
+
):
|
|
363
|
+
labels_in_ontology.append(str(label_in_ontology))
|
|
333
364
|
raise ValueError(
|
|
334
|
-
f"Could not find root node with label {root_label}"
|
|
365
|
+
f"Could not find root node with label '{root_label}'. "
|
|
366
|
+
f"The ontology contains the following labels: {labels_in_ontology}"
|
|
335
367
|
)
|
|
336
368
|
return root
|
|
337
369
|
|
|
@@ -398,11 +430,29 @@ class OntologyAdapter:
|
|
|
398
430
|
"""
|
|
399
431
|
return self._rdf_graph
|
|
400
432
|
|
|
401
|
-
def
|
|
433
|
+
def get_root_node(self):
|
|
402
434
|
"""
|
|
403
|
-
Get
|
|
435
|
+
Get root node in the ontology.
|
|
436
|
+
|
|
437
|
+
Returns:
|
|
438
|
+
root_node: If _switch_label_and_id is True, the root node label is returned,
|
|
439
|
+
otherwise the root node id is returned.
|
|
404
440
|
"""
|
|
405
|
-
|
|
441
|
+
|
|
442
|
+
root_node = None
|
|
443
|
+
root_label = self._root_label.replace("_", " ")
|
|
444
|
+
|
|
445
|
+
if self._switch_label_and_id:
|
|
446
|
+
root_node = to_lower_sentence_case(root_label)
|
|
447
|
+
elif not self._switch_label_and_id:
|
|
448
|
+
for node, data in self.get_nx_graph().nodes(data=True):
|
|
449
|
+
if "label" in data and data["label"] == to_lower_sentence_case(
|
|
450
|
+
root_label
|
|
451
|
+
):
|
|
452
|
+
root_node = node
|
|
453
|
+
break
|
|
454
|
+
|
|
455
|
+
return root_node
|
|
406
456
|
|
|
407
457
|
def get_ancestors(self, node_label):
|
|
408
458
|
"""
|
|
@@ -465,8 +515,8 @@ class Ontology:
|
|
|
465
515
|
|
|
466
516
|
if self._tail_ontologies:
|
|
467
517
|
for adapter in self._tail_ontologies.values():
|
|
468
|
-
self.
|
|
469
|
-
self._join_ontologies(adapter)
|
|
518
|
+
head_join_node = self._get_head_join_node(adapter)
|
|
519
|
+
self._join_ontologies(adapter, head_join_node)
|
|
470
520
|
else:
|
|
471
521
|
self._nx_graph = self._head_ontology.get_nx_graph()
|
|
472
522
|
|
|
@@ -489,7 +539,10 @@ class Ontology:
|
|
|
489
539
|
self._head_ontology = OntologyAdapter(
|
|
490
540
|
ontology_file=self._head_ontology_meta["url"],
|
|
491
541
|
root_label=self._head_ontology_meta["root_node"],
|
|
492
|
-
|
|
542
|
+
ontology_file_format=self._head_ontology_meta.get("format", None),
|
|
543
|
+
switch_label_and_id=self._head_ontology_meta.get(
|
|
544
|
+
"switch_label_and_id", True
|
|
545
|
+
),
|
|
493
546
|
)
|
|
494
547
|
|
|
495
548
|
if self._tail_ontology_meta:
|
|
@@ -498,12 +551,13 @@ class Ontology:
|
|
|
498
551
|
self._tail_ontologies[key] = OntologyAdapter(
|
|
499
552
|
ontology_file=value["url"],
|
|
500
553
|
root_label=value["tail_join_node"],
|
|
501
|
-
|
|
502
|
-
|
|
554
|
+
head_join_node_label=value["head_join_node"],
|
|
555
|
+
ontology_file_format=value.get("format", None),
|
|
503
556
|
merge_nodes=value.get("merge_nodes", True),
|
|
557
|
+
switch_label_and_id=value.get("switch_label_and_id", True),
|
|
504
558
|
)
|
|
505
559
|
|
|
506
|
-
def
|
|
560
|
+
def _get_head_join_node(self, adapter: OntologyAdapter) -> str:
|
|
507
561
|
"""
|
|
508
562
|
Tries to find the head join node of the given ontology adapter in the
|
|
509
563
|
head ontology. If the join node is not found, the method will raise an
|
|
@@ -514,15 +568,41 @@ class Ontology:
|
|
|
514
568
|
join node in the head ontology.
|
|
515
569
|
"""
|
|
516
570
|
|
|
517
|
-
head_join_node =
|
|
571
|
+
head_join_node = None
|
|
572
|
+
user_defined_head_join_node_label = adapter.get_head_join_node()
|
|
573
|
+
head_join_node_label_in_bc_format = to_lower_sentence_case(
|
|
574
|
+
user_defined_head_join_node_label.replace("_", " ")
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
if self._head_ontology._switch_label_and_id:
|
|
578
|
+
head_join_node = head_join_node_label_in_bc_format
|
|
579
|
+
elif not self._head_ontology._switch_label_and_id:
|
|
580
|
+
for node_id, data in self._head_ontology.get_nx_graph().nodes(
|
|
581
|
+
data=True
|
|
582
|
+
):
|
|
583
|
+
if (
|
|
584
|
+
"label" in data
|
|
585
|
+
and data["label"] == head_join_node_label_in_bc_format
|
|
586
|
+
):
|
|
587
|
+
head_join_node = node_id
|
|
588
|
+
break
|
|
518
589
|
|
|
519
590
|
if head_join_node not in self._head_ontology.get_nx_graph().nodes:
|
|
591
|
+
head_ontology = self._head_ontology._rdf_to_nx(
|
|
592
|
+
self._head_ontology.get_rdf_graph(),
|
|
593
|
+
self._head_ontology._root_label,
|
|
594
|
+
self._head_ontology._switch_label_and_id,
|
|
595
|
+
rename_nodes=False,
|
|
596
|
+
)
|
|
520
597
|
raise ValueError(
|
|
521
|
-
f"Head join node {head_join_node} not found in "
|
|
522
|
-
f"head ontology."
|
|
598
|
+
f"Head join node '{head_join_node}' not found in head ontology. "
|
|
599
|
+
f"The head ontology contains the following nodes: {head_ontology.nodes}."
|
|
523
600
|
)
|
|
601
|
+
return head_join_node
|
|
524
602
|
|
|
525
|
-
def _join_ontologies(
|
|
603
|
+
def _join_ontologies(
|
|
604
|
+
self, adapter: OntologyAdapter, head_join_node
|
|
605
|
+
) -> None:
|
|
526
606
|
"""
|
|
527
607
|
Joins the ontologies by adding the tail ontology as a subgraph to the
|
|
528
608
|
head ontology at the specified join nodes.
|
|
@@ -535,8 +615,7 @@ class Ontology:
|
|
|
535
615
|
if not self._nx_graph:
|
|
536
616
|
self._nx_graph = self._head_ontology.get_nx_graph().copy()
|
|
537
617
|
|
|
538
|
-
|
|
539
|
-
tail_join_node = to_lower_sentence_case(adapter.get_root_label())
|
|
618
|
+
tail_join_node = adapter.get_root_node()
|
|
540
619
|
tail_ontology = adapter.get_nx_graph()
|
|
541
620
|
|
|
542
621
|
# subtree of tail ontology at join node
|
|
@@ -695,8 +774,9 @@ class Ontology:
|
|
|
695
774
|
Args:
|
|
696
775
|
|
|
697
776
|
to_disk (str): If specified, the ontology structure will be saved
|
|
698
|
-
to disk as a GRAPHML file
|
|
699
|
-
|
|
777
|
+
to disk as a GRAPHML file at the location (directory) specified
|
|
778
|
+
by the `to_disk` string, to be opened in your favourite graph
|
|
779
|
+
visualisation tool.
|
|
700
780
|
|
|
701
781
|
full (bool): If True, the full ontology structure will be shown,
|
|
702
782
|
including all nodes and edges. If False, only the nodes and
|
|
@@ -13,7 +13,7 @@ BioCypher 'online' mode. Handles connection and manipulation of a running DBMS.
|
|
|
13
13
|
"""
|
|
14
14
|
import subprocess
|
|
15
15
|
|
|
16
|
-
from ._logger import logger
|
|
16
|
+
from biocypher._logger import logger
|
|
17
17
|
|
|
18
18
|
logger.debug(f"Loading module {__name__}.")
|
|
19
19
|
|
|
@@ -22,10 +22,10 @@ import itertools
|
|
|
22
22
|
|
|
23
23
|
import neo4j_utils
|
|
24
24
|
|
|
25
|
-
from
|
|
26
|
-
from ._config import config as _config
|
|
27
|
-
from ._create import BioCypherEdge, BioCypherNode
|
|
28
|
-
from ._translate import Translator
|
|
25
|
+
from biocypher import _misc
|
|
26
|
+
from biocypher._config import config as _config
|
|
27
|
+
from biocypher._create import BioCypherEdge, BioCypherNode
|
|
28
|
+
from biocypher._translate import Translator
|
|
29
29
|
|
|
30
30
|
__all__ = ["_Neo4jDriver"]
|
|
31
31
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
2
|
|
|
3
|
-
from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
|
|
3
|
+
from biocypher._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class Pandas:
|
|
@@ -87,3 +87,4 @@ class Pandas:
|
|
|
87
87
|
self.dfs[_type] = pd.concat(
|
|
88
88
|
[self.dfs[_type], df], ignore_index=True
|
|
89
89
|
)
|
|
90
|
+
return self.dfs[_type]
|
|
File without changes
|
|
@@ -12,9 +12,12 @@ from biocypher._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
|
|
|
12
12
|
from biocypher._logger import logger
|
|
13
13
|
from biocypher._translate import Translator
|
|
14
14
|
from biocypher._deduplicate import Deduplicator
|
|
15
|
+
from biocypher.output.write._writer import _Writer
|
|
15
16
|
|
|
16
17
|
|
|
17
|
-
class _BatchWriter(ABC):
|
|
18
|
+
class _BatchWriter(_Writer, ABC):
|
|
19
|
+
"""Abstract batch writer class"""
|
|
20
|
+
|
|
18
21
|
@abstractmethod
|
|
19
22
|
def _get_default_import_call_bin_prefix(self):
|
|
20
23
|
"""
|
|
@@ -40,7 +43,7 @@ class _BatchWriter(ABC):
|
|
|
40
43
|
str: The database-specific string representation of an array
|
|
41
44
|
"""
|
|
42
45
|
raise NotImplementedError(
|
|
43
|
-
"Database writer must override '
|
|
46
|
+
"Database writer must override '_write_array_string'"
|
|
44
47
|
)
|
|
45
48
|
|
|
46
49
|
@abstractmethod
|
|
@@ -117,6 +120,8 @@ class _BatchWriter(ABC):
|
|
|
117
120
|
db_password: str = None,
|
|
118
121
|
db_host: str = None,
|
|
119
122
|
db_port: str = None,
|
|
123
|
+
rdf_format: str = None,
|
|
124
|
+
rdf_namespaces: dict = {},
|
|
120
125
|
):
|
|
121
126
|
"""
|
|
122
127
|
|
|
@@ -196,12 +201,26 @@ class _BatchWriter(ABC):
|
|
|
196
201
|
|
|
197
202
|
db_port:
|
|
198
203
|
The database port.
|
|
204
|
+
|
|
205
|
+
rdf_format:
|
|
206
|
+
The format of RDF.
|
|
207
|
+
|
|
208
|
+
rdf_namespaces:
|
|
209
|
+
The namespaces for RDF.
|
|
199
210
|
"""
|
|
211
|
+
super().__init__(
|
|
212
|
+
translator=translator,
|
|
213
|
+
deduplicator=deduplicator,
|
|
214
|
+
output_directory=output_directory,
|
|
215
|
+
strict_mode=strict_mode,
|
|
216
|
+
)
|
|
200
217
|
self.db_name = db_name
|
|
201
218
|
self.db_user = db_user
|
|
202
219
|
self.db_password = db_password
|
|
203
220
|
self.db_host = db_host or "localhost"
|
|
204
221
|
self.db_port = db_port
|
|
222
|
+
self.rdf_format = rdf_format
|
|
223
|
+
self.rdf_namespaces = rdf_namespaces
|
|
205
224
|
|
|
206
225
|
self.delim, self.escaped_delim = self._process_delimiter(delimiter)
|
|
207
226
|
self.adelim, self.escaped_adelim = self._process_delimiter(
|
|
@@ -228,32 +247,15 @@ class _BatchWriter(ABC):
|
|
|
228
247
|
self.import_call_nodes = set()
|
|
229
248
|
self.import_call_edges = set()
|
|
230
249
|
|
|
231
|
-
self.
|
|
250
|
+
self.outdir = output_directory
|
|
232
251
|
|
|
233
252
|
self._import_call_file_prefix = import_call_file_prefix
|
|
234
253
|
|
|
235
|
-
if os.path.exists(self.outdir):
|
|
236
|
-
logger.warning(
|
|
237
|
-
f"Output directory `{self.outdir}` already exists. "
|
|
238
|
-
"If this is not planned, file consistency may be compromised."
|
|
239
|
-
)
|
|
240
|
-
else:
|
|
241
|
-
logger.info(f"Creating output directory `{self.outdir}`.")
|
|
242
|
-
os.makedirs(self.outdir)
|
|
243
|
-
|
|
244
254
|
self.parts = {} # dict to store the paths of part files for each label
|
|
245
255
|
|
|
246
256
|
# TODO not memory efficient, but should be fine for most cases; is
|
|
247
257
|
# there a more elegant solution?
|
|
248
258
|
|
|
249
|
-
@property
|
|
250
|
-
def outdir(self):
|
|
251
|
-
"""
|
|
252
|
-
Property for output directory path.
|
|
253
|
-
"""
|
|
254
|
-
|
|
255
|
-
return self._outdir
|
|
256
|
-
|
|
257
259
|
@property
|
|
258
260
|
def import_call_file_prefix(self):
|
|
259
261
|
"""
|
|
@@ -261,7 +263,7 @@ class _BatchWriter(ABC):
|
|
|
261
263
|
"""
|
|
262
264
|
|
|
263
265
|
if self._import_call_file_prefix is None:
|
|
264
|
-
return self.
|
|
266
|
+
return self.outdir
|
|
265
267
|
else:
|
|
266
268
|
return self._import_call_file_prefix
|
|
267
269
|
|
|
@@ -994,7 +996,9 @@ class _BatchWriter(ABC):
|
|
|
994
996
|
"""
|
|
995
997
|
|
|
996
998
|
file_path = os.path.join(self.outdir, self._get_import_script_name())
|
|
997
|
-
logger.info(
|
|
999
|
+
logger.info(
|
|
1000
|
+
f"Writing {self.db_name + ' ' if self.db_name else ''}import call to `{file_path}`."
|
|
1001
|
+
)
|
|
998
1002
|
|
|
999
1003
|
with open(file_path, "w", encoding="utf-8") as f:
|
|
1000
1004
|
f.write(self._construct_import_call())
|
|
@@ -14,10 +14,13 @@ suitable for import into a DBMS.
|
|
|
14
14
|
"""
|
|
15
15
|
|
|
16
16
|
from biocypher._logger import logger
|
|
17
|
-
from biocypher.write.graph.
|
|
18
|
-
from biocypher.write.graph.
|
|
19
|
-
from biocypher.write.
|
|
20
|
-
from biocypher.write.
|
|
17
|
+
from biocypher.output.write.graph._rdf import _RDFWriter
|
|
18
|
+
from biocypher.output.write.graph._neo4j import _Neo4jBatchWriter
|
|
19
|
+
from biocypher.output.write.graph._arangodb import _ArangoDBBatchWriter
|
|
20
|
+
from biocypher.output.write.graph._networkx import _NetworkXWriter
|
|
21
|
+
from biocypher.output.write.relational._csv import _PandasCSVWriter
|
|
22
|
+
from biocypher.output.write.relational._sqlite import _SQLiteBatchWriter
|
|
23
|
+
from biocypher.output.write.relational._postgresql import _PostgreSQLBatchWriter
|
|
21
24
|
|
|
22
25
|
logger.debug(f"Loading module {__name__}.")
|
|
23
26
|
|
|
@@ -43,6 +46,14 @@ DBMS_TO_CLASS = {
|
|
|
43
46
|
"ArangoDB": _ArangoDBBatchWriter,
|
|
44
47
|
"sqlite": _SQLiteBatchWriter,
|
|
45
48
|
"sqlite3": _SQLiteBatchWriter,
|
|
49
|
+
"rdf": _RDFWriter,
|
|
50
|
+
"RDF": _RDFWriter,
|
|
51
|
+
"csv": _PandasCSVWriter,
|
|
52
|
+
"CSV": _PandasCSVWriter,
|
|
53
|
+
"pandas": _PandasCSVWriter,
|
|
54
|
+
"Pandas": _PandasCSVWriter,
|
|
55
|
+
"networkx": _NetworkXWriter,
|
|
56
|
+
"NetworkX": _NetworkXWriter,
|
|
46
57
|
}
|
|
47
58
|
|
|
48
59
|
|
|
@@ -58,19 +69,14 @@ def get_writer(
|
|
|
58
69
|
file.
|
|
59
70
|
|
|
60
71
|
Args:
|
|
61
|
-
|
|
62
72
|
dbms: the database management system; for options, see DBMS_TO_CLASS.
|
|
63
|
-
|
|
64
73
|
translator: the Translator object.
|
|
65
|
-
|
|
66
|
-
output_directory: the directory to write the output files to.
|
|
67
|
-
|
|
74
|
+
deduplicator: the Deduplicator object.
|
|
75
|
+
output_directory: the directory to output.write the output files to.
|
|
68
76
|
strict_mode: whether to use strict mode.
|
|
69
77
|
|
|
70
78
|
Returns:
|
|
71
|
-
|
|
72
79
|
instance: an instance of the selected writer class.
|
|
73
|
-
|
|
74
80
|
"""
|
|
75
81
|
|
|
76
82
|
dbms_config = _config(dbms)
|
|
@@ -102,4 +108,6 @@ def get_writer(
|
|
|
102
108
|
db_user=dbms_config.get("user"), # psql
|
|
103
109
|
db_password=dbms_config.get("password"), # psql
|
|
104
110
|
db_port=dbms_config.get("port"), # psql
|
|
111
|
+
rdf_format=dbms_config.get("rdf_format"), # rdf
|
|
112
|
+
rdf_namespaces=dbms_config.get("rdf_namespaces"), # rdf
|
|
105
113
|
)
|