biocypher 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biocypher might be problematic. Click here for more details.
- biocypher/_config/biocypher_config.yaml +21 -4
- biocypher/_metadata.py +1 -1
- biocypher/_ontology.py +144 -51
- biocypher/_translate.py +84 -79
- biocypher/output/write/_batch_writer.py +133 -52
- biocypher/output/write/_get_writer.py +28 -11
- biocypher/output/write/_writer.py +32 -14
- biocypher/output/write/graph/_arangodb.py +44 -32
- biocypher/output/write/graph/_neo4j.py +3 -4
- biocypher/output/write/graph/_owl.py +569 -0
- biocypher/output/write/graph/_rdf.py +234 -97
- {biocypher-0.7.0.dist-info → biocypher-0.9.0.dist-info}/METADATA +1 -1
- {biocypher-0.7.0.dist-info → biocypher-0.9.0.dist-info}/RECORD +15 -14
- {biocypher-0.7.0.dist-info → biocypher-0.9.0.dist-info}/LICENSE +0 -0
- {biocypher-0.7.0.dist-info → biocypher-0.9.0.dist-info}/WHEEL +0 -0
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Module to provide the ArangoDB writer class."""
|
|
2
|
+
|
|
1
3
|
import os
|
|
2
4
|
|
|
3
5
|
from biocypher._logger import logger
|
|
@@ -5,38 +7,43 @@ from biocypher.output.write.graph._neo4j import _Neo4jBatchWriter
|
|
|
5
7
|
|
|
6
8
|
|
|
7
9
|
class _ArangoDBBatchWriter(_Neo4jBatchWriter):
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
specified by ArangoDB for the use of "arangoimport".
|
|
11
|
-
similar to Neo4j, but with a different header format.
|
|
10
|
+
"""Class for writing node and edge representations to disk.
|
|
11
|
+
|
|
12
|
+
Uses the format specified by ArangoDB for the use of "arangoimport".
|
|
13
|
+
Output files are similar to Neo4j, but with a different header format.
|
|
12
14
|
"""
|
|
13
15
|
|
|
14
16
|
def _get_default_import_call_bin_prefix(self):
|
|
15
|
-
"""
|
|
16
|
-
Method to provide the default string for the import call bin prefix.
|
|
17
|
+
"""Provide the default string for the import call bin prefix.
|
|
17
18
|
|
|
18
|
-
Returns
|
|
19
|
+
Returns
|
|
20
|
+
-------
|
|
19
21
|
str: The default location for the neo4j admin import location
|
|
22
|
+
|
|
20
23
|
"""
|
|
21
24
|
return ""
|
|
22
25
|
|
|
23
26
|
def _get_import_script_name(self) -> str:
|
|
24
|
-
"""
|
|
25
|
-
Returns the name of the neo4j admin import script
|
|
27
|
+
"""Return the name of the neo4j admin import script.
|
|
26
28
|
|
|
27
|
-
Returns
|
|
29
|
+
Returns
|
|
30
|
+
-------
|
|
28
31
|
str: The name of the import script (ending in .sh)
|
|
32
|
+
|
|
29
33
|
"""
|
|
30
34
|
return "arangodb-import-call.sh"
|
|
31
35
|
|
|
32
36
|
def _write_node_headers(self):
|
|
33
|
-
"""
|
|
34
|
-
|
|
35
|
-
as a node as per the definition
|
|
36
|
-
containing only the header for this type
|
|
37
|
+
"""Write single CSV file for a graph entity.
|
|
38
|
+
|
|
39
|
+
The graph entity is represented as a node as per the definition
|
|
40
|
+
in the `schema_config.yaml`, containing only the header for this type
|
|
41
|
+
of node.
|
|
37
42
|
|
|
38
|
-
Returns
|
|
43
|
+
Returns
|
|
44
|
+
-------
|
|
39
45
|
bool: The return value. True for success, False otherwise.
|
|
46
|
+
|
|
40
47
|
"""
|
|
41
48
|
# load headers from data parse
|
|
42
49
|
if not self.node_property_dict:
|
|
@@ -86,9 +93,9 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
|
|
|
86
93
|
parts = self.parts.get(label, [])
|
|
87
94
|
|
|
88
95
|
if not parts:
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
)
|
|
96
|
+
msg = f"No parts found for node label {label}. Check that the data was parsed first."
|
|
97
|
+
logger.error(msg)
|
|
98
|
+
raise ValueError(msg)
|
|
92
99
|
|
|
93
100
|
for part in parts:
|
|
94
101
|
import_call_header_path = os.path.join(
|
|
@@ -105,19 +112,22 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
|
|
|
105
112
|
import_call_header_path,
|
|
106
113
|
import_call_parts_path,
|
|
107
114
|
collection,
|
|
108
|
-
)
|
|
115
|
+
),
|
|
109
116
|
)
|
|
110
117
|
|
|
111
118
|
return True
|
|
112
119
|
|
|
113
120
|
def _write_edge_headers(self):
|
|
114
|
-
"""
|
|
115
|
-
Writes single CSV file for a graph entity that is represented
|
|
116
|
-
as an edge as per the definition in the `schema_config.yaml`,
|
|
117
|
-
containing only the header for this type of edge.
|
|
121
|
+
"""Write single CSV file for a graph entity.
|
|
118
122
|
|
|
119
|
-
|
|
123
|
+
The graph entity is represented as an edge as per the definition
|
|
124
|
+
in the `schema_config.yaml`, containing only the header for this type
|
|
125
|
+
of edge.
|
|
126
|
+
|
|
127
|
+
Returns
|
|
128
|
+
-------
|
|
120
129
|
bool: The return value. True for success, False otherwise.
|
|
130
|
+
|
|
121
131
|
"""
|
|
122
132
|
# load headers from data parse
|
|
123
133
|
if not self.edge_property_dict:
|
|
@@ -182,22 +192,24 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
|
|
|
182
192
|
header_import_call_path,
|
|
183
193
|
parts_import_call_path,
|
|
184
194
|
collection,
|
|
185
|
-
)
|
|
195
|
+
),
|
|
186
196
|
)
|
|
187
197
|
|
|
188
198
|
return True
|
|
189
199
|
|
|
190
200
|
def _construct_import_call(self) -> str:
|
|
191
|
-
"""
|
|
192
|
-
|
|
193
|
-
individual node and edge headers and data files,
|
|
194
|
-
delimiters and database name. Built after all data has been
|
|
201
|
+
"""Construct the import call.
|
|
202
|
+
|
|
203
|
+
Details folder and individual node and edge headers and data files,
|
|
204
|
+
as well as delimiters and database name. Built after all data has been
|
|
195
205
|
processed to ensure that nodes are called before any edges.
|
|
196
206
|
|
|
197
|
-
Returns
|
|
198
|
-
|
|
207
|
+
Returns
|
|
208
|
+
-------
|
|
209
|
+
str: a bash command for arangoimport
|
|
210
|
+
|
|
199
211
|
"""
|
|
200
|
-
import_call = f"{self.import_call_bin_prefix}arangoimp
|
|
212
|
+
import_call = f"{self.import_call_bin_prefix}arangoimp --type csv " f'--separator="{self.escaped_delim}" '
|
|
201
213
|
|
|
202
214
|
if self.quote == "'":
|
|
203
215
|
import_call += f'--quote="{self.quote}" '
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Module to provide the Neo4j writer class."""
|
|
2
|
+
|
|
1
3
|
import os
|
|
2
4
|
|
|
3
5
|
from biocypher._logger import logger
|
|
@@ -45,10 +47,7 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
45
47
|
return "bin/"
|
|
46
48
|
|
|
47
49
|
def _quote_string(self, value: str) -> str:
|
|
48
|
-
"""
|
|
49
|
-
Quote a string. Quote character is escaped by doubling it.
|
|
50
|
-
"""
|
|
51
|
-
|
|
50
|
+
"""Quote a string. Quote character is escaped by doubling it."""
|
|
52
51
|
return f"{self.quote}{value.replace(self.quote, self.quote * 2)}{self.quote}"
|
|
53
52
|
|
|
54
53
|
def _write_array_string(self, string_list):
|