biocypher 0.6.2__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biocypher might be problematic. Click here for more details.
- biocypher/__init__.py +3 -13
- biocypher/_config/__init__.py +6 -23
- biocypher/_core.py +360 -262
- biocypher/_create.py +13 -27
- biocypher/_deduplicate.py +4 -11
- biocypher/_get.py +21 -60
- biocypher/_logger.py +4 -16
- biocypher/_mapping.py +4 -17
- biocypher/_metadata.py +3 -15
- biocypher/_misc.py +14 -28
- biocypher/_ontology.py +127 -212
- biocypher/_translate.py +34 -58
- biocypher/output/connect/_get_connector.py +40 -0
- biocypher/output/connect/_neo4j_driver.py +9 -65
- biocypher/output/in_memory/_get_in_memory_kg.py +34 -0
- biocypher/output/in_memory/_in_memory_kg.py +40 -0
- biocypher/output/in_memory/_networkx.py +44 -0
- biocypher/output/in_memory/_pandas.py +20 -15
- biocypher/output/write/_batch_writer.py +132 -177
- biocypher/output/write/_get_writer.py +11 -24
- biocypher/output/write/_writer.py +14 -33
- biocypher/output/write/graph/_arangodb.py +7 -24
- biocypher/output/write/graph/_neo4j.py +51 -56
- biocypher/output/write/graph/_networkx.py +36 -43
- biocypher/output/write/graph/_rdf.py +107 -95
- biocypher/output/write/relational/_csv.py +6 -11
- biocypher/output/write/relational/_postgresql.py +5 -13
- biocypher/output/write/relational/_sqlite.py +3 -1
- {biocypher-0.6.2.dist-info → biocypher-0.7.0.dist-info}/LICENSE +1 -1
- {biocypher-0.6.2.dist-info → biocypher-0.7.0.dist-info}/METADATA +3 -3
- biocypher-0.7.0.dist-info/RECORD +43 -0
- {biocypher-0.6.2.dist-info → biocypher-0.7.0.dist-info}/WHEEL +1 -1
- biocypher-0.6.2.dist-info/RECORD +0 -39
|
@@ -1,38 +1,27 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
# Copyright 2021, Heidelberg University Clinic
|
|
5
|
-
#
|
|
6
|
-
# File author(s): Sebastian Lobentanzer
|
|
7
|
-
# Michael Hartung
|
|
8
|
-
#
|
|
9
|
-
# Distributed under MIT licence, see the file `LICENSE`.
|
|
10
|
-
#
|
|
11
1
|
"""
|
|
12
2
|
BioCypher 'offline' module. Handles the writing of node and edge representations
|
|
13
3
|
suitable for import into a DBMS.
|
|
14
4
|
"""
|
|
15
5
|
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from biocypher._config import config as _config
|
|
16
9
|
from biocypher._logger import logger
|
|
17
|
-
from biocypher.output.write.graph._rdf import _RDFWriter
|
|
18
|
-
from biocypher.output.write.graph._neo4j import _Neo4jBatchWriter
|
|
19
10
|
from biocypher.output.write.graph._arangodb import _ArangoDBBatchWriter
|
|
11
|
+
from biocypher.output.write.graph._neo4j import _Neo4jBatchWriter
|
|
20
12
|
from biocypher.output.write.graph._networkx import _NetworkXWriter
|
|
13
|
+
from biocypher.output.write.graph._rdf import _RDFWriter
|
|
21
14
|
from biocypher.output.write.relational._csv import _PandasCSVWriter
|
|
22
|
-
from biocypher.output.write.relational._sqlite import _SQLiteBatchWriter
|
|
23
15
|
from biocypher.output.write.relational._postgresql import _PostgreSQLBatchWriter
|
|
16
|
+
from biocypher.output.write.relational._sqlite import _SQLiteBatchWriter
|
|
24
17
|
|
|
25
18
|
logger.debug(f"Loading module {__name__}.")
|
|
26
19
|
|
|
27
|
-
from typing import TYPE_CHECKING
|
|
28
|
-
|
|
29
|
-
from biocypher._config import config as _config
|
|
30
|
-
|
|
31
20
|
__all__ = ["get_writer", "DBMS_TO_CLASS"]
|
|
32
21
|
|
|
33
22
|
if TYPE_CHECKING:
|
|
34
|
-
from biocypher._translate import Translator
|
|
35
23
|
from biocypher._deduplicate import Deduplicator
|
|
24
|
+
from biocypher._translate import Translator
|
|
36
25
|
|
|
37
26
|
DBMS_TO_CLASS = {
|
|
38
27
|
"neo": _Neo4jBatchWriter,
|
|
@@ -52,6 +41,8 @@ DBMS_TO_CLASS = {
|
|
|
52
41
|
"CSV": _PandasCSVWriter,
|
|
53
42
|
"pandas": _PandasCSVWriter,
|
|
54
43
|
"Pandas": _PandasCSVWriter,
|
|
44
|
+
"tabular": _PandasCSVWriter,
|
|
45
|
+
"Tabular": _PandasCSVWriter,
|
|
55
46
|
"networkx": _NetworkXWriter,
|
|
56
47
|
"NetworkX": _NetworkXWriter,
|
|
57
48
|
}
|
|
@@ -99,12 +90,8 @@ def get_writer(
|
|
|
99
90
|
import_call_file_prefix=dbms_config.get("import_call_file_prefix"),
|
|
100
91
|
wipe=dbms_config.get("wipe"),
|
|
101
92
|
strict_mode=strict_mode,
|
|
102
|
-
skip_bad_relationships=dbms_config.get(
|
|
103
|
-
|
|
104
|
-
), # neo4j
|
|
105
|
-
skip_duplicate_nodes=dbms_config.get(
|
|
106
|
-
"skip_duplicate_nodes"
|
|
107
|
-
), # neo4j
|
|
93
|
+
skip_bad_relationships=dbms_config.get("skip_bad_relationships"), # neo4j
|
|
94
|
+
skip_duplicate_nodes=dbms_config.get("skip_duplicate_nodes"), # neo4j
|
|
108
95
|
db_user=dbms_config.get("user"), # psql
|
|
109
96
|
db_password=dbms_config.get("password"), # psql
|
|
110
97
|
db_port=dbms_config.get("port"), # psql
|
|
@@ -1,12 +1,13 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
1
3
|
from abc import ABC, abstractmethod
|
|
2
|
-
from typing import Union, Optional
|
|
3
4
|
from collections.abc import Iterable
|
|
4
|
-
import
|
|
5
|
+
from typing import Optional, Union
|
|
5
6
|
|
|
6
7
|
from biocypher._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
|
|
8
|
+
from biocypher._deduplicate import Deduplicator
|
|
7
9
|
from biocypher._logger import logger
|
|
8
10
|
from biocypher._translate import Translator
|
|
9
|
-
from biocypher._deduplicate import Deduplicator
|
|
10
11
|
|
|
11
12
|
__all__ = ["_Writer"]
|
|
12
13
|
|
|
@@ -75,9 +76,7 @@ class _Writer(ABC):
|
|
|
75
76
|
@abstractmethod
|
|
76
77
|
def _write_node_data(
|
|
77
78
|
self,
|
|
78
|
-
nodes: Iterable[
|
|
79
|
-
Union[BioCypherNode, BioCypherEdge, BioCypherRelAsNode]
|
|
80
|
-
],
|
|
79
|
+
nodes: Iterable[Union[BioCypherNode, BioCypherEdge, BioCypherRelAsNode]],
|
|
81
80
|
) -> bool:
|
|
82
81
|
"""Implement how to output.write nodes to disk.
|
|
83
82
|
|
|
@@ -87,16 +86,12 @@ class _Writer(ABC):
|
|
|
87
86
|
Returns:
|
|
88
87
|
bool: The return value. True for success, False otherwise.
|
|
89
88
|
"""
|
|
90
|
-
raise NotImplementedError(
|
|
91
|
-
"Writer implementation must override 'write_nodes'"
|
|
92
|
-
)
|
|
89
|
+
raise NotImplementedError("Writer implementation must override 'write_nodes'")
|
|
93
90
|
|
|
94
91
|
@abstractmethod
|
|
95
92
|
def _write_edge_data(
|
|
96
93
|
self,
|
|
97
|
-
edges: Iterable[
|
|
98
|
-
Union[BioCypherNode, BioCypherEdge, BioCypherRelAsNode]
|
|
99
|
-
],
|
|
94
|
+
edges: Iterable[Union[BioCypherNode, BioCypherEdge, BioCypherRelAsNode]],
|
|
100
95
|
) -> bool:
|
|
101
96
|
"""Implement how to output.write edges to disk.
|
|
102
97
|
|
|
@@ -106,9 +101,7 @@ class _Writer(ABC):
|
|
|
106
101
|
Returns:
|
|
107
102
|
bool: The return value. True for success, False otherwise.
|
|
108
103
|
"""
|
|
109
|
-
raise NotImplementedError(
|
|
110
|
-
"Writer implementation must override 'write_edges'"
|
|
111
|
-
)
|
|
104
|
+
raise NotImplementedError("Writer implementation must override 'write_edges'")
|
|
112
105
|
|
|
113
106
|
@abstractmethod
|
|
114
107
|
def _construct_import_call(self) -> str:
|
|
@@ -121,9 +114,7 @@ class _Writer(ABC):
|
|
|
121
114
|
Returns:
|
|
122
115
|
str: command for importing the output files into a DBMS.
|
|
123
116
|
"""
|
|
124
|
-
raise NotImplementedError(
|
|
125
|
-
"Writer implementation must override '_construct_import_call'"
|
|
126
|
-
)
|
|
117
|
+
raise NotImplementedError("Writer implementation must override '_construct_import_call'")
|
|
127
118
|
|
|
128
119
|
@abstractmethod
|
|
129
120
|
def _get_import_script_name(self) -> str:
|
|
@@ -132,13 +123,9 @@ class _Writer(ABC):
|
|
|
132
123
|
Returns:
|
|
133
124
|
str: The name of the import script (ending in .sh)
|
|
134
125
|
"""
|
|
135
|
-
raise NotImplementedError(
|
|
136
|
-
"Writer implementation must override '_get_import_script_name'"
|
|
137
|
-
)
|
|
126
|
+
raise NotImplementedError("Writer implementation must override '_get_import_script_name'")
|
|
138
127
|
|
|
139
|
-
def write_nodes(
|
|
140
|
-
self, nodes, batch_size: int = int(1e6), force: bool = False
|
|
141
|
-
):
|
|
128
|
+
def write_nodes(self, nodes, batch_size: int = int(1e6), force: bool = False):
|
|
142
129
|
"""Wrapper for writing nodes.
|
|
143
130
|
|
|
144
131
|
Args:
|
|
@@ -157,9 +144,7 @@ class _Writer(ABC):
|
|
|
157
144
|
return False
|
|
158
145
|
return True
|
|
159
146
|
|
|
160
|
-
def write_edges(
|
|
161
|
-
self, edges, batch_size: int = int(1e6), force: bool = False
|
|
162
|
-
):
|
|
147
|
+
def write_edges(self, edges, batch_size: int = int(1e6), force: bool = False):
|
|
163
148
|
"""Wrapper for writing edges.
|
|
164
149
|
|
|
165
150
|
Args:
|
|
@@ -187,12 +172,8 @@ class _Writer(ABC):
|
|
|
187
172
|
Returns:
|
|
188
173
|
str: The path of the file holding the import call.
|
|
189
174
|
"""
|
|
190
|
-
file_path = os.path.join(
|
|
191
|
-
|
|
192
|
-
)
|
|
193
|
-
logger.info(
|
|
194
|
-
f"Writing {self.__class__.__name__} import call to `{file_path}`."
|
|
195
|
-
)
|
|
175
|
+
file_path = os.path.join(self.output_directory, self._get_import_script_name())
|
|
176
|
+
logger.info(f"Writing {self.__class__.__name__} import call to `{file_path}`.")
|
|
196
177
|
|
|
197
178
|
with open(file_path, "w", encoding="utf-8") as f:
|
|
198
179
|
f.write(self._construct_import_call())
|
|
@@ -61,9 +61,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
|
|
|
61
61
|
|
|
62
62
|
# check if file already exists
|
|
63
63
|
if os.path.exists(header_path):
|
|
64
|
-
logger.warning(
|
|
65
|
-
f"File {header_path} already exists. Overwriting."
|
|
66
|
-
)
|
|
64
|
+
logger.warning(f"File {header_path} already exists. Overwriting.")
|
|
67
65
|
|
|
68
66
|
# concatenate key:value in props
|
|
69
67
|
props_list = []
|
|
@@ -81,9 +79,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
|
|
|
81
79
|
f.write(row)
|
|
82
80
|
|
|
83
81
|
# add collection from schema config
|
|
84
|
-
collection = self.translator.ontology.mapping.extended_schema[
|
|
85
|
-
label
|
|
86
|
-
].get("db_collection_name", None)
|
|
82
|
+
collection = self.translator.ontology.mapping.extended_schema[label].get("db_collection_name", None)
|
|
87
83
|
|
|
88
84
|
# add file path to neo4 admin import statement
|
|
89
85
|
# do once for each part file
|
|
@@ -91,8 +87,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
|
|
|
91
87
|
|
|
92
88
|
if not parts:
|
|
93
89
|
raise ValueError(
|
|
94
|
-
f"No parts found for node label {label}. "
|
|
95
|
-
f"Check that the data was parsed first.",
|
|
90
|
+
f"No parts found for node label {label}. " f"Check that the data was parsed first.",
|
|
96
91
|
)
|
|
97
92
|
|
|
98
93
|
for part in parts:
|
|
@@ -145,9 +140,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
|
|
|
145
140
|
|
|
146
141
|
# check for file exists
|
|
147
142
|
if os.path.exists(header_path):
|
|
148
|
-
logger.warning(
|
|
149
|
-
f"Header file {header_path} already exists. Overwriting."
|
|
150
|
-
)
|
|
143
|
+
logger.warning(f"Header file {header_path} already exists. Overwriting.")
|
|
151
144
|
|
|
152
145
|
# concatenate key:value in props
|
|
153
146
|
props_list = []
|
|
@@ -172,9 +165,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
|
|
|
172
165
|
break
|
|
173
166
|
|
|
174
167
|
else:
|
|
175
|
-
collection = self.translator.ontology.mapping.extended_schema[
|
|
176
|
-
label
|
|
177
|
-
].get("db_collection_name", None)
|
|
168
|
+
collection = self.translator.ontology.mapping.extended_schema[label].get("db_collection_name", None)
|
|
178
169
|
|
|
179
170
|
# add file path to neo4 admin import statement (import call path
|
|
180
171
|
# may be different from actual output path)
|
|
@@ -206,11 +197,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
|
|
|
206
197
|
Returns:
|
|
207
198
|
str: a bash command for neo4j-admin import
|
|
208
199
|
"""
|
|
209
|
-
import_call =
|
|
210
|
-
f"{self.import_call_bin_prefix}arangoimp "
|
|
211
|
-
f"--type csv "
|
|
212
|
-
f'--separator="{self.escaped_delim}" '
|
|
213
|
-
)
|
|
200
|
+
import_call = f"{self.import_call_bin_prefix}arangoimp " f"--type csv " f'--separator="{self.escaped_delim}" '
|
|
214
201
|
|
|
215
202
|
if self.quote == "'":
|
|
216
203
|
import_call += f'--quote="{self.quote}" '
|
|
@@ -221,11 +208,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
|
|
|
221
208
|
|
|
222
209
|
# node import calls: one line per node type
|
|
223
210
|
for header_path, parts_path, collection in self.import_call_nodes:
|
|
224
|
-
line =
|
|
225
|
-
f"{import_call} "
|
|
226
|
-
f"--headers-file {header_path} "
|
|
227
|
-
f"--file= {parts_path} "
|
|
228
|
-
)
|
|
211
|
+
line = f"{import_call} --headers-file {header_path} --file= {parts_path} "
|
|
229
212
|
|
|
230
213
|
if collection:
|
|
231
214
|
line += f"--create-collection --collection {collection} "
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
import os
|
|
2
2
|
|
|
3
3
|
from biocypher._logger import logger
|
|
4
|
-
from biocypher.output.write._batch_writer import
|
|
4
|
+
from biocypher.output.write._batch_writer import _BatchWriter, parse_label
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class _Neo4jBatchWriter(_BatchWriter):
|
|
8
|
-
"""
|
|
9
|
-
Class for writing node and edge representations to disk using the
|
|
8
|
+
"""Class for writing node and edge representations to disk using the
|
|
10
9
|
format specified by Neo4j for the use of admin import. Each batch
|
|
11
10
|
writer instance has a fixed representation that needs to be passed
|
|
12
11
|
at instantiation via the :py:attr:`schema` argument. The instance
|
|
@@ -23,26 +22,26 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
23
22
|
"""
|
|
24
23
|
|
|
25
24
|
def __init__(self, *args, **kwargs):
|
|
26
|
-
"""
|
|
27
|
-
Constructor.
|
|
25
|
+
"""Constructor.
|
|
28
26
|
|
|
29
27
|
Check the version of Neo4j and adds a command scope if version >= 5.
|
|
30
28
|
|
|
31
|
-
Returns
|
|
29
|
+
Returns
|
|
30
|
+
-------
|
|
32
31
|
_Neo4jBatchWriter: An instance of the writer.
|
|
33
|
-
"""
|
|
34
32
|
|
|
33
|
+
"""
|
|
35
34
|
# Should read the configuration and setup import_call_bin_prefix.
|
|
36
35
|
super().__init__(*args, **kwargs)
|
|
37
36
|
|
|
38
37
|
def _get_default_import_call_bin_prefix(self):
|
|
39
|
-
"""
|
|
40
|
-
Method to provide the default string for the import call bin prefix.
|
|
38
|
+
"""Method to provide the default string for the import call bin prefix.
|
|
41
39
|
|
|
42
|
-
Returns
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
43
42
|
str: The default location for the neo4j admin import location
|
|
44
|
-
"""
|
|
45
43
|
|
|
44
|
+
"""
|
|
46
45
|
return "bin/"
|
|
47
46
|
|
|
48
47
|
def _quote_string(self, value: str) -> str:
|
|
@@ -53,27 +52,30 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
53
52
|
return f"{self.quote}{value.replace(self.quote, self.quote * 2)}{self.quote}"
|
|
54
53
|
|
|
55
54
|
def _write_array_string(self, string_list):
|
|
56
|
-
"""
|
|
57
|
-
Abstract method to output.write the string representation of an array into a .csv file
|
|
55
|
+
"""Abstract method to output.write the string representation of an array into a .csv file
|
|
58
56
|
as required by the neo4j admin-import.
|
|
59
57
|
|
|
60
58
|
Args:
|
|
59
|
+
----
|
|
61
60
|
string_list (list): list of ontology strings
|
|
62
61
|
|
|
63
62
|
Returns:
|
|
63
|
+
-------
|
|
64
64
|
str: The string representation of an array for the neo4j admin import
|
|
65
|
+
|
|
65
66
|
"""
|
|
66
67
|
string = self.adelim.join(string_list)
|
|
67
68
|
return self._quote_string(string)
|
|
68
69
|
|
|
69
70
|
def _write_node_headers(self):
|
|
70
|
-
"""
|
|
71
|
-
Writes single CSV file for a graph entity that is represented
|
|
71
|
+
"""Writes single CSV file for a graph entity that is represented
|
|
72
72
|
as a node as per the definition in the `schema_config.yaml`,
|
|
73
73
|
containing only the header for this type of node.
|
|
74
74
|
|
|
75
|
-
Returns
|
|
75
|
+
Returns
|
|
76
|
+
-------
|
|
76
77
|
bool: The return value. True for success, False otherwise.
|
|
78
|
+
|
|
77
79
|
"""
|
|
78
80
|
# load headers from data parse
|
|
79
81
|
if not self.node_property_dict:
|
|
@@ -86,9 +88,7 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
86
88
|
_id = ":ID"
|
|
87
89
|
|
|
88
90
|
# translate label to PascalCase
|
|
89
|
-
pascal_label = self.translator.name_sentence_to_pascal(
|
|
90
|
-
parse_label(label)
|
|
91
|
-
)
|
|
91
|
+
pascal_label = self.translator.name_sentence_to_pascal(parse_label(label))
|
|
92
92
|
|
|
93
93
|
header = f"{pascal_label}-header.csv"
|
|
94
94
|
header_path = os.path.join(
|
|
@@ -143,20 +143,19 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
143
143
|
self.import_call_file_prefix,
|
|
144
144
|
parts,
|
|
145
145
|
)
|
|
146
|
-
self.import_call_nodes.add(
|
|
147
|
-
(import_call_header_path, import_call_parts_path)
|
|
148
|
-
)
|
|
146
|
+
self.import_call_nodes.add((import_call_header_path, import_call_parts_path))
|
|
149
147
|
|
|
150
148
|
return True
|
|
151
149
|
|
|
152
150
|
def _write_edge_headers(self):
|
|
153
|
-
"""
|
|
154
|
-
Writes single CSV file for a graph entity that is represented
|
|
151
|
+
"""Writes single CSV file for a graph entity that is represented
|
|
155
152
|
as an edge as per the definition in the `schema_config.yaml`,
|
|
156
153
|
containing only the header for this type of edge.
|
|
157
154
|
|
|
158
|
-
Returns
|
|
155
|
+
Returns
|
|
156
|
+
-------
|
|
159
157
|
bool: The return value. True for success, False otherwise.
|
|
158
|
+
|
|
160
159
|
"""
|
|
161
160
|
# load headers from data parse
|
|
162
161
|
if not self.edge_property_dict:
|
|
@@ -167,9 +166,7 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
167
166
|
|
|
168
167
|
for label, props in self.edge_property_dict.items():
|
|
169
168
|
# translate label to PascalCase
|
|
170
|
-
pascal_label = self.translator.name_sentence_to_pascal(
|
|
171
|
-
parse_label(label)
|
|
172
|
-
)
|
|
169
|
+
pascal_label = self.translator.name_sentence_to_pascal(parse_label(label))
|
|
173
170
|
|
|
174
171
|
# paths
|
|
175
172
|
header = f"{pascal_label}-header.csv"
|
|
@@ -181,9 +178,7 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
181
178
|
|
|
182
179
|
# check for file exists
|
|
183
180
|
if os.path.exists(header_path):
|
|
184
|
-
logger.warning(
|
|
185
|
-
f"File {header_path} already exists. Overwriting."
|
|
186
|
-
)
|
|
181
|
+
logger.warning(f"File {header_path} already exists. Overwriting.")
|
|
187
182
|
|
|
188
183
|
# concatenate key:value in props
|
|
189
184
|
props_list = []
|
|
@@ -213,9 +208,7 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
213
208
|
|
|
214
209
|
if label in ["IS_SOURCE_OF", "IS_TARGET_OF", "IS_PART_OF"]:
|
|
215
210
|
skip_id = True
|
|
216
|
-
elif not self.translator.ontology.mapping.extended_schema.get(
|
|
217
|
-
label
|
|
218
|
-
):
|
|
211
|
+
elif not self.translator.ontology.mapping.extended_schema.get(label):
|
|
219
212
|
# find label in schema by label_as_edge
|
|
220
213
|
for (
|
|
221
214
|
k,
|
|
@@ -231,10 +224,10 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
231
224
|
|
|
232
225
|
if schema_label:
|
|
233
226
|
if (
|
|
234
|
-
self.translator.ontology.mapping.extended_schema.get(
|
|
235
|
-
schema_label
|
|
227
|
+
self.translator.ontology.mapping.extended_schema.get( # (seems to not work with 'not')
|
|
228
|
+
schema_label,
|
|
236
229
|
).get("use_id")
|
|
237
|
-
== False
|
|
230
|
+
== False # noqa: E712 (seems to not work with 'not')
|
|
238
231
|
):
|
|
239
232
|
skip_id = True
|
|
240
233
|
|
|
@@ -259,54 +252,56 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
259
252
|
self.import_call_file_prefix,
|
|
260
253
|
parts,
|
|
261
254
|
)
|
|
262
|
-
self.import_call_edges.add(
|
|
263
|
-
(import_call_header_path, import_call_parts_path)
|
|
264
|
-
)
|
|
255
|
+
self.import_call_edges.add((import_call_header_path, import_call_parts_path))
|
|
265
256
|
|
|
266
257
|
return True
|
|
267
258
|
|
|
268
259
|
def _get_import_script_name(self) -> str:
|
|
269
|
-
"""
|
|
270
|
-
Returns the name of the neo4j admin import script
|
|
260
|
+
"""Returns the name of the neo4j admin import script
|
|
271
261
|
|
|
272
|
-
Returns
|
|
262
|
+
Returns
|
|
263
|
+
-------
|
|
273
264
|
str: The name of the import script (ending in .sh)
|
|
265
|
+
|
|
274
266
|
"""
|
|
275
267
|
return "neo4j-admin-import-call.sh"
|
|
276
268
|
|
|
277
269
|
def _construct_import_call(self) -> str:
|
|
278
|
-
"""
|
|
279
|
-
Function to construct the import call detailing folder and
|
|
270
|
+
"""Function to construct the import call detailing folder and
|
|
280
271
|
individual node and edge headers and data files, as well as
|
|
281
272
|
delimiters and database name. Built after all data has been
|
|
282
273
|
processed to ensure that nodes are called before any edges.
|
|
283
274
|
|
|
284
|
-
Returns
|
|
275
|
+
Returns
|
|
276
|
+
-------
|
|
285
277
|
str: a bash command for neo4j-admin import
|
|
278
|
+
|
|
286
279
|
"""
|
|
287
|
-
import_call_neo4j_v4 = self._get_import_call(
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
"database import full", "", "--overwrite-destination="
|
|
280
|
+
import_call_neo4j_v4 = self._get_import_call("import", "--database=", "--force=")
|
|
281
|
+
import_call_neo4j_v5 = self._get_import_call("database import full", "", "--overwrite-destination=")
|
|
282
|
+
neo4j_version_check = (
|
|
283
|
+
f"version=$({self._get_default_import_call_bin_prefix()}neo4j-admin --version | cut -d '.' -f 1)"
|
|
292
284
|
)
|
|
293
|
-
neo4j_version_check = f"version=$({self._get_default_import_call_bin_prefix()}neo4j-admin --version | cut -d '.' -f 1)"
|
|
294
285
|
|
|
295
|
-
import_script =
|
|
286
|
+
import_script = (
|
|
287
|
+
f"#!/bin/bash\n{neo4j_version_check}\nif [[ $version -ge 5 ]]; "
|
|
288
|
+
f"then\n\t{import_call_neo4j_v5}\nelse\n\t{import_call_neo4j_v4}\nfi"
|
|
289
|
+
)
|
|
296
290
|
return import_script
|
|
297
291
|
|
|
298
|
-
def _get_import_call(
|
|
299
|
-
self, import_cmd: str, database_cmd: str, wipe_cmd: str
|
|
300
|
-
) -> str:
|
|
292
|
+
def _get_import_call(self, import_cmd: str, database_cmd: str, wipe_cmd: str) -> str:
|
|
301
293
|
"""Get parametrized import call for Neo4j 4 or 5+.
|
|
302
294
|
|
|
303
295
|
Args:
|
|
296
|
+
----
|
|
304
297
|
import_cmd (str): The import command to use.
|
|
305
298
|
database_cmd (str): The database command to use.
|
|
306
299
|
wipe_cmd (str): The wipe command to use.
|
|
307
300
|
|
|
308
301
|
Returns:
|
|
302
|
+
-------
|
|
309
303
|
str: The import call.
|
|
304
|
+
|
|
310
305
|
"""
|
|
311
306
|
import_call = f"{self.import_call_bin_prefix}neo4j-admin {import_cmd} "
|
|
312
307
|
|
|
@@ -1,31 +1,34 @@
|
|
|
1
1
|
import pickle
|
|
2
2
|
|
|
3
|
-
import networkx as nx
|
|
4
|
-
|
|
5
3
|
from biocypher._logger import logger
|
|
4
|
+
from biocypher.output.in_memory._networkx import NetworkxKG
|
|
6
5
|
from biocypher.output.write._writer import _Writer
|
|
7
|
-
from biocypher.output.write.relational._csv import _PandasCSVWriter
|
|
8
6
|
|
|
9
7
|
|
|
10
8
|
class _NetworkXWriter(_Writer):
|
|
11
9
|
"""
|
|
12
|
-
Class for writing
|
|
10
|
+
Class for writing the in-memory networkx DiGraph to file.
|
|
11
|
+
|
|
12
|
+
Call `_construct_import_call` to write the networkx DiGraph to a pickle
|
|
13
|
+
file and return the Python call to load it.
|
|
14
|
+
|
|
15
|
+
TODO: this is a non-intuitive name, should be adjusted.
|
|
13
16
|
"""
|
|
14
17
|
|
|
15
18
|
def __init__(self, *args, **kwargs):
|
|
16
19
|
super().__init__(*args, **kwargs)
|
|
17
|
-
self.
|
|
18
|
-
|
|
20
|
+
self.in_memory_networkx_kg = NetworkxKG(
|
|
21
|
+
deduplicator=self.deduplicator,
|
|
22
|
+
)
|
|
19
23
|
|
|
20
24
|
def _construct_import_call(self) -> str:
|
|
21
|
-
"""
|
|
25
|
+
"""Dump networkx graph to a pickle file and return Python call.
|
|
22
26
|
|
|
23
27
|
Returns:
|
|
24
|
-
str: Python code to load the
|
|
28
|
+
str: Python code to load the networkx graph from a pickle file.
|
|
25
29
|
"""
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
)
|
|
30
|
+
self.G = self.in_memory_networkx_kg._create_networkx_kg()
|
|
31
|
+
logger.info(f"Writing networkx {self.G} to pickle file networkx_graph.pkl.")
|
|
29
32
|
with open(f"{self.output_directory}/networkx_graph.pkl", "wb") as f:
|
|
30
33
|
pickle.dump(self.G, f)
|
|
31
34
|
|
|
@@ -38,39 +41,29 @@ class _NetworkXWriter(_Writer):
|
|
|
38
41
|
return "import_networkx.py"
|
|
39
42
|
|
|
40
43
|
def _write_node_data(self, nodes) -> bool:
|
|
41
|
-
|
|
42
|
-
|
|
44
|
+
"""Add nodes to the networkx graph.
|
|
45
|
+
|
|
46
|
+
TODO: this is not strictly writing, should be refactored.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
nodes (list): List of nodes to add to the networkx graph.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
bool: True if the nodes were added successfully, False otherwise.
|
|
53
|
+
"""
|
|
54
|
+
passed = self.in_memory_networkx_kg.add_nodes(nodes)
|
|
43
55
|
return passed
|
|
44
56
|
|
|
45
57
|
def _write_edge_data(self, edges) -> bool:
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
58
|
+
"""Add edges to the networkx graph.
|
|
59
|
+
|
|
60
|
+
TODO: this is not strictly writing, should be refactored.
|
|
49
61
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
df
|
|
59
|
-
for df in all_dfs.values()
|
|
60
|
-
if df.columns.str.contains("source_id").any()
|
|
61
|
-
and df.columns.str.contains("target_id").any()
|
|
62
|
-
]
|
|
63
|
-
for df in node_dfs:
|
|
64
|
-
nodes = df.set_index("node_id").to_dict(orient="index")
|
|
65
|
-
self.G.add_nodes_from(nodes.items())
|
|
66
|
-
for df in edge_dfs:
|
|
67
|
-
edges = df.set_index(["source_id", "target_id"]).to_dict(
|
|
68
|
-
orient="index"
|
|
69
|
-
)
|
|
70
|
-
self.G.add_edges_from(
|
|
71
|
-
(
|
|
72
|
-
(source, target, attrs)
|
|
73
|
-
for (source, target), attrs in edges.items()
|
|
74
|
-
)
|
|
75
|
-
)
|
|
76
|
-
return True
|
|
62
|
+
Args:
|
|
63
|
+
edges (list): List of edges to add to the networkx graph.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
bool: True if the edges were added successfully, False otherwise.
|
|
67
|
+
"""
|
|
68
|
+
passed = self.in_memory_networkx_kg.add_edges(edges)
|
|
69
|
+
return passed
|