biocypher 0.6.1__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biocypher might be problematic. Click here for more details.
- biocypher/__init__.py +3 -13
- biocypher/_config/__init__.py +6 -23
- biocypher/_core.py +360 -262
- biocypher/_create.py +13 -27
- biocypher/_deduplicate.py +4 -11
- biocypher/_get.py +21 -60
- biocypher/_logger.py +4 -16
- biocypher/_mapping.py +4 -17
- biocypher/_metadata.py +3 -15
- biocypher/_misc.py +14 -28
- biocypher/_ontology.py +127 -212
- biocypher/_translate.py +34 -58
- biocypher/output/connect/_get_connector.py +40 -0
- biocypher/output/connect/_neo4j_driver.py +9 -65
- biocypher/output/in_memory/_get_in_memory_kg.py +34 -0
- biocypher/output/in_memory/_in_memory_kg.py +40 -0
- biocypher/output/in_memory/_networkx.py +44 -0
- biocypher/output/in_memory/_pandas.py +20 -15
- biocypher/output/write/_batch_writer.py +137 -172
- biocypher/output/write/_get_writer.py +11 -24
- biocypher/output/write/_writer.py +14 -33
- biocypher/output/write/graph/_arangodb.py +7 -24
- biocypher/output/write/graph/_neo4j.py +59 -57
- biocypher/output/write/graph/_networkx.py +36 -43
- biocypher/output/write/graph/_rdf.py +114 -95
- biocypher/output/write/relational/_csv.py +6 -11
- biocypher/output/write/relational/_postgresql.py +12 -13
- biocypher/output/write/relational/_sqlite.py +3 -1
- {biocypher-0.6.1.dist-info → biocypher-0.7.0.dist-info}/LICENSE +1 -1
- {biocypher-0.6.1.dist-info → biocypher-0.7.0.dist-info}/METADATA +3 -3
- biocypher-0.7.0.dist-info/RECORD +43 -0
- {biocypher-0.6.1.dist-info → biocypher-0.7.0.dist-info}/WHEEL +1 -1
- biocypher-0.6.1.dist-info/RECORD +0 -39
|
@@ -1,38 +1,27 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
# Copyright 2021, Heidelberg University Clinic
|
|
5
|
-
#
|
|
6
|
-
# File author(s): Sebastian Lobentanzer
|
|
7
|
-
# Michael Hartung
|
|
8
|
-
#
|
|
9
|
-
# Distributed under MIT licence, see the file `LICENSE`.
|
|
10
|
-
#
|
|
11
1
|
"""
|
|
12
2
|
BioCypher 'offline' module. Handles the writing of node and edge representations
|
|
13
3
|
suitable for import into a DBMS.
|
|
14
4
|
"""
|
|
15
5
|
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from biocypher._config import config as _config
|
|
16
9
|
from biocypher._logger import logger
|
|
17
|
-
from biocypher.output.write.graph._rdf import _RDFWriter
|
|
18
|
-
from biocypher.output.write.graph._neo4j import _Neo4jBatchWriter
|
|
19
10
|
from biocypher.output.write.graph._arangodb import _ArangoDBBatchWriter
|
|
11
|
+
from biocypher.output.write.graph._neo4j import _Neo4jBatchWriter
|
|
20
12
|
from biocypher.output.write.graph._networkx import _NetworkXWriter
|
|
13
|
+
from biocypher.output.write.graph._rdf import _RDFWriter
|
|
21
14
|
from biocypher.output.write.relational._csv import _PandasCSVWriter
|
|
22
|
-
from biocypher.output.write.relational._sqlite import _SQLiteBatchWriter
|
|
23
15
|
from biocypher.output.write.relational._postgresql import _PostgreSQLBatchWriter
|
|
16
|
+
from biocypher.output.write.relational._sqlite import _SQLiteBatchWriter
|
|
24
17
|
|
|
25
18
|
logger.debug(f"Loading module {__name__}.")
|
|
26
19
|
|
|
27
|
-
from typing import TYPE_CHECKING
|
|
28
|
-
|
|
29
|
-
from biocypher._config import config as _config
|
|
30
|
-
|
|
31
20
|
__all__ = ["get_writer", "DBMS_TO_CLASS"]
|
|
32
21
|
|
|
33
22
|
if TYPE_CHECKING:
|
|
34
|
-
from biocypher._translate import Translator
|
|
35
23
|
from biocypher._deduplicate import Deduplicator
|
|
24
|
+
from biocypher._translate import Translator
|
|
36
25
|
|
|
37
26
|
DBMS_TO_CLASS = {
|
|
38
27
|
"neo": _Neo4jBatchWriter,
|
|
@@ -52,6 +41,8 @@ DBMS_TO_CLASS = {
|
|
|
52
41
|
"CSV": _PandasCSVWriter,
|
|
53
42
|
"pandas": _PandasCSVWriter,
|
|
54
43
|
"Pandas": _PandasCSVWriter,
|
|
44
|
+
"tabular": _PandasCSVWriter,
|
|
45
|
+
"Tabular": _PandasCSVWriter,
|
|
55
46
|
"networkx": _NetworkXWriter,
|
|
56
47
|
"NetworkX": _NetworkXWriter,
|
|
57
48
|
}
|
|
@@ -99,12 +90,8 @@ def get_writer(
|
|
|
99
90
|
import_call_file_prefix=dbms_config.get("import_call_file_prefix"),
|
|
100
91
|
wipe=dbms_config.get("wipe"),
|
|
101
92
|
strict_mode=strict_mode,
|
|
102
|
-
skip_bad_relationships=dbms_config.get(
|
|
103
|
-
|
|
104
|
-
), # neo4j
|
|
105
|
-
skip_duplicate_nodes=dbms_config.get(
|
|
106
|
-
"skip_duplicate_nodes"
|
|
107
|
-
), # neo4j
|
|
93
|
+
skip_bad_relationships=dbms_config.get("skip_bad_relationships"), # neo4j
|
|
94
|
+
skip_duplicate_nodes=dbms_config.get("skip_duplicate_nodes"), # neo4j
|
|
108
95
|
db_user=dbms_config.get("user"), # psql
|
|
109
96
|
db_password=dbms_config.get("password"), # psql
|
|
110
97
|
db_port=dbms_config.get("port"), # psql
|
|
@@ -1,12 +1,13 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
1
3
|
from abc import ABC, abstractmethod
|
|
2
|
-
from typing import Union, Optional
|
|
3
4
|
from collections.abc import Iterable
|
|
4
|
-
import
|
|
5
|
+
from typing import Optional, Union
|
|
5
6
|
|
|
6
7
|
from biocypher._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
|
|
8
|
+
from biocypher._deduplicate import Deduplicator
|
|
7
9
|
from biocypher._logger import logger
|
|
8
10
|
from biocypher._translate import Translator
|
|
9
|
-
from biocypher._deduplicate import Deduplicator
|
|
10
11
|
|
|
11
12
|
__all__ = ["_Writer"]
|
|
12
13
|
|
|
@@ -75,9 +76,7 @@ class _Writer(ABC):
|
|
|
75
76
|
@abstractmethod
|
|
76
77
|
def _write_node_data(
|
|
77
78
|
self,
|
|
78
|
-
nodes: Iterable[
|
|
79
|
-
Union[BioCypherNode, BioCypherEdge, BioCypherRelAsNode]
|
|
80
|
-
],
|
|
79
|
+
nodes: Iterable[Union[BioCypherNode, BioCypherEdge, BioCypherRelAsNode]],
|
|
81
80
|
) -> bool:
|
|
82
81
|
"""Implement how to output.write nodes to disk.
|
|
83
82
|
|
|
@@ -87,16 +86,12 @@ class _Writer(ABC):
|
|
|
87
86
|
Returns:
|
|
88
87
|
bool: The return value. True for success, False otherwise.
|
|
89
88
|
"""
|
|
90
|
-
raise NotImplementedError(
|
|
91
|
-
"Writer implementation must override 'write_nodes'"
|
|
92
|
-
)
|
|
89
|
+
raise NotImplementedError("Writer implementation must override 'write_nodes'")
|
|
93
90
|
|
|
94
91
|
@abstractmethod
|
|
95
92
|
def _write_edge_data(
|
|
96
93
|
self,
|
|
97
|
-
edges: Iterable[
|
|
98
|
-
Union[BioCypherNode, BioCypherEdge, BioCypherRelAsNode]
|
|
99
|
-
],
|
|
94
|
+
edges: Iterable[Union[BioCypherNode, BioCypherEdge, BioCypherRelAsNode]],
|
|
100
95
|
) -> bool:
|
|
101
96
|
"""Implement how to output.write edges to disk.
|
|
102
97
|
|
|
@@ -106,9 +101,7 @@ class _Writer(ABC):
|
|
|
106
101
|
Returns:
|
|
107
102
|
bool: The return value. True for success, False otherwise.
|
|
108
103
|
"""
|
|
109
|
-
raise NotImplementedError(
|
|
110
|
-
"Writer implementation must override 'write_edges'"
|
|
111
|
-
)
|
|
104
|
+
raise NotImplementedError("Writer implementation must override 'write_edges'")
|
|
112
105
|
|
|
113
106
|
@abstractmethod
|
|
114
107
|
def _construct_import_call(self) -> str:
|
|
@@ -121,9 +114,7 @@ class _Writer(ABC):
|
|
|
121
114
|
Returns:
|
|
122
115
|
str: command for importing the output files into a DBMS.
|
|
123
116
|
"""
|
|
124
|
-
raise NotImplementedError(
|
|
125
|
-
"Writer implementation must override '_construct_import_call'"
|
|
126
|
-
)
|
|
117
|
+
raise NotImplementedError("Writer implementation must override '_construct_import_call'")
|
|
127
118
|
|
|
128
119
|
@abstractmethod
|
|
129
120
|
def _get_import_script_name(self) -> str:
|
|
@@ -132,13 +123,9 @@ class _Writer(ABC):
|
|
|
132
123
|
Returns:
|
|
133
124
|
str: The name of the import script (ending in .sh)
|
|
134
125
|
"""
|
|
135
|
-
raise NotImplementedError(
|
|
136
|
-
"Writer implementation must override '_get_import_script_name'"
|
|
137
|
-
)
|
|
126
|
+
raise NotImplementedError("Writer implementation must override '_get_import_script_name'")
|
|
138
127
|
|
|
139
|
-
def write_nodes(
|
|
140
|
-
self, nodes, batch_size: int = int(1e6), force: bool = False
|
|
141
|
-
):
|
|
128
|
+
def write_nodes(self, nodes, batch_size: int = int(1e6), force: bool = False):
|
|
142
129
|
"""Wrapper for writing nodes.
|
|
143
130
|
|
|
144
131
|
Args:
|
|
@@ -157,9 +144,7 @@ class _Writer(ABC):
|
|
|
157
144
|
return False
|
|
158
145
|
return True
|
|
159
146
|
|
|
160
|
-
def write_edges(
|
|
161
|
-
self, edges, batch_size: int = int(1e6), force: bool = False
|
|
162
|
-
):
|
|
147
|
+
def write_edges(self, edges, batch_size: int = int(1e6), force: bool = False):
|
|
163
148
|
"""Wrapper for writing edges.
|
|
164
149
|
|
|
165
150
|
Args:
|
|
@@ -187,12 +172,8 @@ class _Writer(ABC):
|
|
|
187
172
|
Returns:
|
|
188
173
|
str: The path of the file holding the import call.
|
|
189
174
|
"""
|
|
190
|
-
file_path = os.path.join(
|
|
191
|
-
|
|
192
|
-
)
|
|
193
|
-
logger.info(
|
|
194
|
-
f"Writing {self.__class__.__name__} import call to `{file_path}`."
|
|
195
|
-
)
|
|
175
|
+
file_path = os.path.join(self.output_directory, self._get_import_script_name())
|
|
176
|
+
logger.info(f"Writing {self.__class__.__name__} import call to `{file_path}`.")
|
|
196
177
|
|
|
197
178
|
with open(file_path, "w", encoding="utf-8") as f:
|
|
198
179
|
f.write(self._construct_import_call())
|
|
@@ -61,9 +61,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
|
|
|
61
61
|
|
|
62
62
|
# check if file already exists
|
|
63
63
|
if os.path.exists(header_path):
|
|
64
|
-
logger.warning(
|
|
65
|
-
f"File {header_path} already exists. Overwriting."
|
|
66
|
-
)
|
|
64
|
+
logger.warning(f"File {header_path} already exists. Overwriting.")
|
|
67
65
|
|
|
68
66
|
# concatenate key:value in props
|
|
69
67
|
props_list = []
|
|
@@ -81,9 +79,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
|
|
|
81
79
|
f.write(row)
|
|
82
80
|
|
|
83
81
|
# add collection from schema config
|
|
84
|
-
collection = self.translator.ontology.mapping.extended_schema[
|
|
85
|
-
label
|
|
86
|
-
].get("db_collection_name", None)
|
|
82
|
+
collection = self.translator.ontology.mapping.extended_schema[label].get("db_collection_name", None)
|
|
87
83
|
|
|
88
84
|
# add file path to neo4 admin import statement
|
|
89
85
|
# do once for each part file
|
|
@@ -91,8 +87,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
|
|
|
91
87
|
|
|
92
88
|
if not parts:
|
|
93
89
|
raise ValueError(
|
|
94
|
-
f"No parts found for node label {label}. "
|
|
95
|
-
f"Check that the data was parsed first.",
|
|
90
|
+
f"No parts found for node label {label}. " f"Check that the data was parsed first.",
|
|
96
91
|
)
|
|
97
92
|
|
|
98
93
|
for part in parts:
|
|
@@ -145,9 +140,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
|
|
|
145
140
|
|
|
146
141
|
# check for file exists
|
|
147
142
|
if os.path.exists(header_path):
|
|
148
|
-
logger.warning(
|
|
149
|
-
f"Header file {header_path} already exists. Overwriting."
|
|
150
|
-
)
|
|
143
|
+
logger.warning(f"Header file {header_path} already exists. Overwriting.")
|
|
151
144
|
|
|
152
145
|
# concatenate key:value in props
|
|
153
146
|
props_list = []
|
|
@@ -172,9 +165,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
|
|
|
172
165
|
break
|
|
173
166
|
|
|
174
167
|
else:
|
|
175
|
-
collection = self.translator.ontology.mapping.extended_schema[
|
|
176
|
-
label
|
|
177
|
-
].get("db_collection_name", None)
|
|
168
|
+
collection = self.translator.ontology.mapping.extended_schema[label].get("db_collection_name", None)
|
|
178
169
|
|
|
179
170
|
# add file path to neo4 admin import statement (import call path
|
|
180
171
|
# may be different from actual output path)
|
|
@@ -206,11 +197,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
|
|
|
206
197
|
Returns:
|
|
207
198
|
str: a bash command for neo4j-admin import
|
|
208
199
|
"""
|
|
209
|
-
import_call =
|
|
210
|
-
f"{self.import_call_bin_prefix}arangoimp "
|
|
211
|
-
f"--type csv "
|
|
212
|
-
f'--separator="{self.escaped_delim}" '
|
|
213
|
-
)
|
|
200
|
+
import_call = f"{self.import_call_bin_prefix}arangoimp " f"--type csv " f'--separator="{self.escaped_delim}" '
|
|
214
201
|
|
|
215
202
|
if self.quote == "'":
|
|
216
203
|
import_call += f'--quote="{self.quote}" '
|
|
@@ -221,11 +208,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
|
|
|
221
208
|
|
|
222
209
|
# node import calls: one line per node type
|
|
223
210
|
for header_path, parts_path, collection in self.import_call_nodes:
|
|
224
|
-
line =
|
|
225
|
-
f"{import_call} "
|
|
226
|
-
f"--headers-file {header_path} "
|
|
227
|
-
f"--file= {parts_path} "
|
|
228
|
-
)
|
|
211
|
+
line = f"{import_call} --headers-file {header_path} --file= {parts_path} "
|
|
229
212
|
|
|
230
213
|
if collection:
|
|
231
214
|
line += f"--create-collection --collection {collection} "
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
import os
|
|
2
2
|
|
|
3
3
|
from biocypher._logger import logger
|
|
4
|
-
from biocypher.output.write._batch_writer import
|
|
4
|
+
from biocypher.output.write._batch_writer import _BatchWriter, parse_label
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class _Neo4jBatchWriter(_BatchWriter):
|
|
8
|
-
"""
|
|
9
|
-
Class for writing node and edge representations to disk using the
|
|
8
|
+
"""Class for writing node and edge representations to disk using the
|
|
10
9
|
format specified by Neo4j for the use of admin import. Each batch
|
|
11
10
|
writer instance has a fixed representation that needs to be passed
|
|
12
11
|
at instantiation via the :py:attr:`schema` argument. The instance
|
|
@@ -23,50 +22,60 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
23
22
|
"""
|
|
24
23
|
|
|
25
24
|
def __init__(self, *args, **kwargs):
|
|
26
|
-
"""
|
|
27
|
-
Constructor.
|
|
25
|
+
"""Constructor.
|
|
28
26
|
|
|
29
27
|
Check the version of Neo4j and adds a command scope if version >= 5.
|
|
30
28
|
|
|
31
|
-
Returns
|
|
29
|
+
Returns
|
|
30
|
+
-------
|
|
32
31
|
_Neo4jBatchWriter: An instance of the writer.
|
|
33
|
-
"""
|
|
34
32
|
|
|
33
|
+
"""
|
|
35
34
|
# Should read the configuration and setup import_call_bin_prefix.
|
|
36
35
|
super().__init__(*args, **kwargs)
|
|
37
36
|
|
|
38
37
|
def _get_default_import_call_bin_prefix(self):
|
|
39
|
-
"""
|
|
40
|
-
Method to provide the default string for the import call bin prefix.
|
|
38
|
+
"""Method to provide the default string for the import call bin prefix.
|
|
41
39
|
|
|
42
|
-
Returns
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
43
42
|
str: The default location for the neo4j admin import location
|
|
44
|
-
"""
|
|
45
43
|
|
|
44
|
+
"""
|
|
46
45
|
return "bin/"
|
|
47
46
|
|
|
48
|
-
def
|
|
47
|
+
def _quote_string(self, value: str) -> str:
|
|
48
|
+
"""
|
|
49
|
+
Quote a string. Quote character is escaped by doubling it.
|
|
49
50
|
"""
|
|
50
|
-
|
|
51
|
+
|
|
52
|
+
return f"{self.quote}{value.replace(self.quote, self.quote * 2)}{self.quote}"
|
|
53
|
+
|
|
54
|
+
def _write_array_string(self, string_list):
|
|
55
|
+
"""Abstract method to output.write the string representation of an array into a .csv file
|
|
51
56
|
as required by the neo4j admin-import.
|
|
52
57
|
|
|
53
58
|
Args:
|
|
59
|
+
----
|
|
54
60
|
string_list (list): list of ontology strings
|
|
55
61
|
|
|
56
62
|
Returns:
|
|
63
|
+
-------
|
|
57
64
|
str: The string representation of an array for the neo4j admin import
|
|
65
|
+
|
|
58
66
|
"""
|
|
59
67
|
string = self.adelim.join(string_list)
|
|
60
|
-
return
|
|
68
|
+
return self._quote_string(string)
|
|
61
69
|
|
|
62
70
|
def _write_node_headers(self):
|
|
63
|
-
"""
|
|
64
|
-
Writes single CSV file for a graph entity that is represented
|
|
71
|
+
"""Writes single CSV file for a graph entity that is represented
|
|
65
72
|
as a node as per the definition in the `schema_config.yaml`,
|
|
66
73
|
containing only the header for this type of node.
|
|
67
74
|
|
|
68
|
-
Returns
|
|
75
|
+
Returns
|
|
76
|
+
-------
|
|
69
77
|
bool: The return value. True for success, False otherwise.
|
|
78
|
+
|
|
70
79
|
"""
|
|
71
80
|
# load headers from data parse
|
|
72
81
|
if not self.node_property_dict:
|
|
@@ -79,9 +88,7 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
79
88
|
_id = ":ID"
|
|
80
89
|
|
|
81
90
|
# translate label to PascalCase
|
|
82
|
-
pascal_label = self.translator.name_sentence_to_pascal(
|
|
83
|
-
parse_label(label)
|
|
84
|
-
)
|
|
91
|
+
pascal_label = self.translator.name_sentence_to_pascal(parse_label(label))
|
|
85
92
|
|
|
86
93
|
header = f"{pascal_label}-header.csv"
|
|
87
94
|
header_path = os.path.join(
|
|
@@ -136,20 +143,19 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
136
143
|
self.import_call_file_prefix,
|
|
137
144
|
parts,
|
|
138
145
|
)
|
|
139
|
-
self.import_call_nodes.add(
|
|
140
|
-
(import_call_header_path, import_call_parts_path)
|
|
141
|
-
)
|
|
146
|
+
self.import_call_nodes.add((import_call_header_path, import_call_parts_path))
|
|
142
147
|
|
|
143
148
|
return True
|
|
144
149
|
|
|
145
150
|
def _write_edge_headers(self):
|
|
146
|
-
"""
|
|
147
|
-
Writes single CSV file for a graph entity that is represented
|
|
151
|
+
"""Writes single CSV file for a graph entity that is represented
|
|
148
152
|
as an edge as per the definition in the `schema_config.yaml`,
|
|
149
153
|
containing only the header for this type of edge.
|
|
150
154
|
|
|
151
|
-
Returns
|
|
155
|
+
Returns
|
|
156
|
+
-------
|
|
152
157
|
bool: The return value. True for success, False otherwise.
|
|
158
|
+
|
|
153
159
|
"""
|
|
154
160
|
# load headers from data parse
|
|
155
161
|
if not self.edge_property_dict:
|
|
@@ -160,9 +166,7 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
160
166
|
|
|
161
167
|
for label, props in self.edge_property_dict.items():
|
|
162
168
|
# translate label to PascalCase
|
|
163
|
-
pascal_label = self.translator.name_sentence_to_pascal(
|
|
164
|
-
parse_label(label)
|
|
165
|
-
)
|
|
169
|
+
pascal_label = self.translator.name_sentence_to_pascal(parse_label(label))
|
|
166
170
|
|
|
167
171
|
# paths
|
|
168
172
|
header = f"{pascal_label}-header.csv"
|
|
@@ -174,9 +178,7 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
174
178
|
|
|
175
179
|
# check for file exists
|
|
176
180
|
if os.path.exists(header_path):
|
|
177
|
-
logger.warning(
|
|
178
|
-
f"File {header_path} already exists. Overwriting."
|
|
179
|
-
)
|
|
181
|
+
logger.warning(f"File {header_path} already exists. Overwriting.")
|
|
180
182
|
|
|
181
183
|
# concatenate key:value in props
|
|
182
184
|
props_list = []
|
|
@@ -206,9 +208,7 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
206
208
|
|
|
207
209
|
if label in ["IS_SOURCE_OF", "IS_TARGET_OF", "IS_PART_OF"]:
|
|
208
210
|
skip_id = True
|
|
209
|
-
elif not self.translator.ontology.mapping.extended_schema.get(
|
|
210
|
-
label
|
|
211
|
-
):
|
|
211
|
+
elif not self.translator.ontology.mapping.extended_schema.get(label):
|
|
212
212
|
# find label in schema by label_as_edge
|
|
213
213
|
for (
|
|
214
214
|
k,
|
|
@@ -224,10 +224,10 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
224
224
|
|
|
225
225
|
if schema_label:
|
|
226
226
|
if (
|
|
227
|
-
self.translator.ontology.mapping.extended_schema.get(
|
|
228
|
-
schema_label
|
|
227
|
+
self.translator.ontology.mapping.extended_schema.get( # (seems to not work with 'not')
|
|
228
|
+
schema_label,
|
|
229
229
|
).get("use_id")
|
|
230
|
-
== False
|
|
230
|
+
== False # noqa: E712 (seems to not work with 'not')
|
|
231
231
|
):
|
|
232
232
|
skip_id = True
|
|
233
233
|
|
|
@@ -252,54 +252,56 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
252
252
|
self.import_call_file_prefix,
|
|
253
253
|
parts,
|
|
254
254
|
)
|
|
255
|
-
self.import_call_edges.add(
|
|
256
|
-
(import_call_header_path, import_call_parts_path)
|
|
257
|
-
)
|
|
255
|
+
self.import_call_edges.add((import_call_header_path, import_call_parts_path))
|
|
258
256
|
|
|
259
257
|
return True
|
|
260
258
|
|
|
261
259
|
def _get_import_script_name(self) -> str:
|
|
262
|
-
"""
|
|
263
|
-
Returns the name of the neo4j admin import script
|
|
260
|
+
"""Returns the name of the neo4j admin import script
|
|
264
261
|
|
|
265
|
-
Returns
|
|
262
|
+
Returns
|
|
263
|
+
-------
|
|
266
264
|
str: The name of the import script (ending in .sh)
|
|
265
|
+
|
|
267
266
|
"""
|
|
268
267
|
return "neo4j-admin-import-call.sh"
|
|
269
268
|
|
|
270
269
|
def _construct_import_call(self) -> str:
|
|
271
|
-
"""
|
|
272
|
-
Function to construct the import call detailing folder and
|
|
270
|
+
"""Function to construct the import call detailing folder and
|
|
273
271
|
individual node and edge headers and data files, as well as
|
|
274
272
|
delimiters and database name. Built after all data has been
|
|
275
273
|
processed to ensure that nodes are called before any edges.
|
|
276
274
|
|
|
277
|
-
Returns
|
|
275
|
+
Returns
|
|
276
|
+
-------
|
|
278
277
|
str: a bash command for neo4j-admin import
|
|
278
|
+
|
|
279
279
|
"""
|
|
280
|
-
import_call_neo4j_v4 = self._get_import_call(
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
"database import full", "", "--overwrite-destination="
|
|
280
|
+
import_call_neo4j_v4 = self._get_import_call("import", "--database=", "--force=")
|
|
281
|
+
import_call_neo4j_v5 = self._get_import_call("database import full", "", "--overwrite-destination=")
|
|
282
|
+
neo4j_version_check = (
|
|
283
|
+
f"version=$({self._get_default_import_call_bin_prefix()}neo4j-admin --version | cut -d '.' -f 1)"
|
|
285
284
|
)
|
|
286
|
-
neo4j_version_check = f"version=$({self._get_default_import_call_bin_prefix()}neo4j-admin --version | cut -d '.' -f 1)"
|
|
287
285
|
|
|
288
|
-
import_script =
|
|
286
|
+
import_script = (
|
|
287
|
+
f"#!/bin/bash\n{neo4j_version_check}\nif [[ $version -ge 5 ]]; "
|
|
288
|
+
f"then\n\t{import_call_neo4j_v5}\nelse\n\t{import_call_neo4j_v4}\nfi"
|
|
289
|
+
)
|
|
289
290
|
return import_script
|
|
290
291
|
|
|
291
|
-
def _get_import_call(
|
|
292
|
-
self, import_cmd: str, database_cmd: str, wipe_cmd: str
|
|
293
|
-
) -> str:
|
|
292
|
+
def _get_import_call(self, import_cmd: str, database_cmd: str, wipe_cmd: str) -> str:
|
|
294
293
|
"""Get parametrized import call for Neo4j 4 or 5+.
|
|
295
294
|
|
|
296
295
|
Args:
|
|
296
|
+
----
|
|
297
297
|
import_cmd (str): The import command to use.
|
|
298
298
|
database_cmd (str): The database command to use.
|
|
299
299
|
wipe_cmd (str): The wipe command to use.
|
|
300
300
|
|
|
301
301
|
Returns:
|
|
302
|
+
-------
|
|
302
303
|
str: The import call.
|
|
304
|
+
|
|
303
305
|
"""
|
|
304
306
|
import_call = f"{self.import_call_bin_prefix}neo4j-admin {import_cmd} "
|
|
305
307
|
|
|
@@ -1,31 +1,34 @@
|
|
|
1
1
|
import pickle
|
|
2
2
|
|
|
3
|
-
import networkx as nx
|
|
4
|
-
|
|
5
3
|
from biocypher._logger import logger
|
|
4
|
+
from biocypher.output.in_memory._networkx import NetworkxKG
|
|
6
5
|
from biocypher.output.write._writer import _Writer
|
|
7
|
-
from biocypher.output.write.relational._csv import _PandasCSVWriter
|
|
8
6
|
|
|
9
7
|
|
|
10
8
|
class _NetworkXWriter(_Writer):
|
|
11
9
|
"""
|
|
12
|
-
Class for writing
|
|
10
|
+
Class for writing the in-memory networkx DiGraph to file.
|
|
11
|
+
|
|
12
|
+
Call `_construct_import_call` to write the networkx DiGraph to a pickle
|
|
13
|
+
file and return the Python call to load it.
|
|
14
|
+
|
|
15
|
+
TODO: this is a non-intuitive name, should be adjusted.
|
|
13
16
|
"""
|
|
14
17
|
|
|
15
18
|
def __init__(self, *args, **kwargs):
|
|
16
19
|
super().__init__(*args, **kwargs)
|
|
17
|
-
self.
|
|
18
|
-
|
|
20
|
+
self.in_memory_networkx_kg = NetworkxKG(
|
|
21
|
+
deduplicator=self.deduplicator,
|
|
22
|
+
)
|
|
19
23
|
|
|
20
24
|
def _construct_import_call(self) -> str:
|
|
21
|
-
"""
|
|
25
|
+
"""Dump networkx graph to a pickle file and return Python call.
|
|
22
26
|
|
|
23
27
|
Returns:
|
|
24
|
-
str: Python code to load the
|
|
28
|
+
str: Python code to load the networkx graph from a pickle file.
|
|
25
29
|
"""
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
)
|
|
30
|
+
self.G = self.in_memory_networkx_kg._create_networkx_kg()
|
|
31
|
+
logger.info(f"Writing networkx {self.G} to pickle file networkx_graph.pkl.")
|
|
29
32
|
with open(f"{self.output_directory}/networkx_graph.pkl", "wb") as f:
|
|
30
33
|
pickle.dump(self.G, f)
|
|
31
34
|
|
|
@@ -38,39 +41,29 @@ class _NetworkXWriter(_Writer):
|
|
|
38
41
|
return "import_networkx.py"
|
|
39
42
|
|
|
40
43
|
def _write_node_data(self, nodes) -> bool:
|
|
41
|
-
|
|
42
|
-
|
|
44
|
+
"""Add nodes to the networkx graph.
|
|
45
|
+
|
|
46
|
+
TODO: this is not strictly writing, should be refactored.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
nodes (list): List of nodes to add to the networkx graph.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
bool: True if the nodes were added successfully, False otherwise.
|
|
53
|
+
"""
|
|
54
|
+
passed = self.in_memory_networkx_kg.add_nodes(nodes)
|
|
43
55
|
return passed
|
|
44
56
|
|
|
45
57
|
def _write_edge_data(self, edges) -> bool:
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
58
|
+
"""Add edges to the networkx graph.
|
|
59
|
+
|
|
60
|
+
TODO: this is not strictly writing, should be refactored.
|
|
49
61
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
df
|
|
59
|
-
for df in all_dfs.values()
|
|
60
|
-
if df.columns.str.contains("source_id").any()
|
|
61
|
-
and df.columns.str.contains("target_id").any()
|
|
62
|
-
]
|
|
63
|
-
for df in node_dfs:
|
|
64
|
-
nodes = df.set_index("node_id").to_dict(orient="index")
|
|
65
|
-
self.G.add_nodes_from(nodes.items())
|
|
66
|
-
for df in edge_dfs:
|
|
67
|
-
edges = df.set_index(["source_id", "target_id"]).to_dict(
|
|
68
|
-
orient="index"
|
|
69
|
-
)
|
|
70
|
-
self.G.add_edges_from(
|
|
71
|
-
(
|
|
72
|
-
(source, target, attrs)
|
|
73
|
-
for (source, target), attrs in edges.items()
|
|
74
|
-
)
|
|
75
|
-
)
|
|
76
|
-
return True
|
|
62
|
+
Args:
|
|
63
|
+
edges (list): List of edges to add to the networkx graph.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
bool: True if the edges were added successfully, False otherwise.
|
|
67
|
+
"""
|
|
68
|
+
passed = self.in_memory_networkx_kg.add_edges(edges)
|
|
69
|
+
return passed
|