biocypher 0.8.0__py3-none-any.whl → 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biocypher might be problematic. Click here for more details.

@@ -1,15 +1,18 @@
1
- """
2
- BioCypher 'offline' module. Handles the writing of node and edge representations
3
- suitable for import into a DBMS.
1
+ """Module to provide one of the available writer classes.
2
+
3
+ The writer classes are responsible for writing the node and edge representations
4
+ to disk in a format suitable for import into a DBMS.
4
5
  """
5
6
 
6
7
  from typing import TYPE_CHECKING
7
8
 
8
9
  from biocypher._config import config as _config
9
10
  from biocypher._logger import logger
11
+ from biocypher.output.write._batch_writer import _BatchWriter
10
12
  from biocypher.output.write.graph._arangodb import _ArangoDBBatchWriter
11
13
  from biocypher.output.write.graph._neo4j import _Neo4jBatchWriter
12
14
  from biocypher.output.write.graph._networkx import _NetworkXWriter
15
+ from biocypher.output.write.graph._owl import _OWLWriter
13
16
  from biocypher.output.write.graph._rdf import _RDFWriter
14
17
  from biocypher.output.write.relational._csv import _PandasCSVWriter
15
18
  from biocypher.output.write.relational._postgresql import _PostgreSQLBatchWriter
@@ -37,6 +40,8 @@ DBMS_TO_CLASS = {
37
40
  "sqlite3": _SQLiteBatchWriter,
38
41
  "rdf": _RDFWriter,
39
42
  "RDF": _RDFWriter,
43
+ "owl": _OWLWriter,
44
+ "OWL": _OWLWriter,
40
45
  "csv": _PandasCSVWriter,
41
46
  "CSV": _PandasCSVWriter,
42
47
  "pandas": _PandasCSVWriter,
@@ -54,12 +59,11 @@ def get_writer(
54
59
  deduplicator: "Deduplicator",
55
60
  output_directory: str,
56
61
  strict_mode: bool,
57
- ):
58
- """
59
- Function to return the writer class based on the selection in the config
60
- file.
62
+ ) -> _BatchWriter | None:
63
+ """Return the writer class based on the selection in the config file.
61
64
 
62
65
  Args:
66
+ ----
63
67
  dbms: the database management system; for options, see DBMS_TO_CLASS.
64
68
  translator: the Translator object.
65
69
  deduplicator: the Deduplicator object.
@@ -67,15 +71,26 @@ def get_writer(
67
71
  strict_mode: whether to use strict mode.
68
72
 
69
73
  Returns:
74
+ -------
70
75
  instance: an instance of the selected writer class.
71
- """
72
76
 
73
- dbms_config = _config(dbms)
77
+ """
78
+ dbms_config = _config(dbms) or {}
74
79
 
75
80
  writer = DBMS_TO_CLASS[dbms]
76
81
 
82
+ if "rdf_format" in dbms_config:
83
+ logger.warning("The 'rdf_format' config option is deprecated, use 'file_format' instead.")
84
+ if "file_format" not in dbms_config:
85
+ format = dbms_config["rdf_format"]
86
+ logger.warning(f"I will set 'file_format: {format}' for you.")
87
+ dbms_config["file_format"] = format
88
+ dbms_config.pop("rdf_format")
89
+ logger.warning("NOTE: this warning will become an error in next versions.")
90
+
77
91
  if not writer:
78
- raise ValueError(f"Unknown dbms: {dbms}")
92
+ msg = f"Unknown dbms: {dbms}"
93
+ raise ValueError(msg)
79
94
 
80
95
  if writer is not None:
81
96
  return writer(
@@ -95,6 +110,8 @@ def get_writer(
95
110
  db_user=dbms_config.get("user"), # psql
96
111
  db_password=dbms_config.get("password"), # psql
97
112
  db_port=dbms_config.get("port"), # psql
98
- rdf_format=dbms_config.get("rdf_format"), # rdf
99
- rdf_namespaces=dbms_config.get("rdf_namespaces"), # rdf
113
+ file_format=dbms_config.get("file_format"), # rdf, owl
114
+ rdf_namespaces=dbms_config.get("rdf_namespaces"), # rdf, owl
115
+ edge_model=dbms_config.get("edge_model"), # owl
100
116
  )
117
+ return None
@@ -1,3 +1,5 @@
1
+ """Module to provide the ArangoDB writer class."""
2
+
1
3
  import os
2
4
 
3
5
  from biocypher._logger import logger
@@ -5,38 +7,43 @@ from biocypher.output.write.graph._neo4j import _Neo4jBatchWriter
5
7
 
6
8
 
7
9
  class _ArangoDBBatchWriter(_Neo4jBatchWriter):
8
- """
9
- Class for writing node and edge representations to disk using the format
10
- specified by ArangoDB for the use of "arangoimport". Output files are
11
- similar to Neo4j, but with a different header format.
10
+ """Class for writing node and edge representations to disk.
11
+
12
+ Uses the format specified by ArangoDB for the use of "arangoimport".
13
+ Output files are similar to Neo4j, but with a different header format.
12
14
  """
13
15
 
14
16
  def _get_default_import_call_bin_prefix(self):
15
- """
16
- Method to provide the default string for the import call bin prefix.
17
+ """Provide the default string for the import call bin prefix.
17
18
 
18
- Returns:
19
+ Returns
20
+ -------
19
21
  str: The default location for the neo4j admin import location
22
+
20
23
  """
21
24
  return ""
22
25
 
23
26
  def _get_import_script_name(self) -> str:
24
- """
25
- Returns the name of the neo4j admin import script
27
+ """Return the name of the neo4j admin import script.
26
28
 
27
- Returns:
29
+ Returns
30
+ -------
28
31
  str: The name of the import script (ending in .sh)
32
+
29
33
  """
30
34
  return "arangodb-import-call.sh"
31
35
 
32
36
  def _write_node_headers(self):
33
- """
34
- Writes single CSV file for a graph entity that is represented
35
- as a node as per the definition in the `schema_config.yaml`,
36
- containing only the header for this type of node.
37
+ """Write single CSV file for a graph entity.
38
+
39
+ The graph entity is represented as a node as per the definition
40
+ in the `schema_config.yaml`, containing only the header for this type
41
+ of node.
37
42
 
38
- Returns:
43
+ Returns
44
+ -------
39
45
  bool: The return value. True for success, False otherwise.
46
+
40
47
  """
41
48
  # load headers from data parse
42
49
  if not self.node_property_dict:
@@ -86,9 +93,9 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
86
93
  parts = self.parts.get(label, [])
87
94
 
88
95
  if not parts:
89
- raise ValueError(
90
- f"No parts found for node label {label}. " f"Check that the data was parsed first.",
91
- )
96
+ msg = f"No parts found for node label {label}. Check that the data was parsed first."
97
+ logger.error(msg)
98
+ raise ValueError(msg)
92
99
 
93
100
  for part in parts:
94
101
  import_call_header_path = os.path.join(
@@ -105,19 +112,22 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
105
112
  import_call_header_path,
106
113
  import_call_parts_path,
107
114
  collection,
108
- )
115
+ ),
109
116
  )
110
117
 
111
118
  return True
112
119
 
113
120
  def _write_edge_headers(self):
114
- """
115
- Writes single CSV file for a graph entity that is represented
116
- as an edge as per the definition in the `schema_config.yaml`,
117
- containing only the header for this type of edge.
121
+ """Write single CSV file for a graph entity.
118
122
 
119
- Returns:
123
+ The graph entity is represented as an edge as per the definition
124
+ in the `schema_config.yaml`, containing only the header for this type
125
+ of edge.
126
+
127
+ Returns
128
+ -------
120
129
  bool: The return value. True for success, False otherwise.
130
+
121
131
  """
122
132
  # load headers from data parse
123
133
  if not self.edge_property_dict:
@@ -182,22 +192,24 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
182
192
  header_import_call_path,
183
193
  parts_import_call_path,
184
194
  collection,
185
- )
195
+ ),
186
196
  )
187
197
 
188
198
  return True
189
199
 
190
200
  def _construct_import_call(self) -> str:
191
- """
192
- Function to construct the import call detailing folder and
193
- individual node and edge headers and data files, as well as
194
- delimiters and database name. Built after all data has been
201
+ """Construct the import call.
202
+
203
+ Details folder and individual node and edge headers and data files,
204
+ as well as delimiters and database name. Built after all data has been
195
205
  processed to ensure that nodes are called before any edges.
196
206
 
197
- Returns:
198
- str: a bash command for neo4j-admin import
207
+ Returns
208
+ -------
209
+ str: a bash command for arangoimport
210
+
199
211
  """
200
- import_call = f"{self.import_call_bin_prefix}arangoimp " f"--type csv " f'--separator="{self.escaped_delim}" '
212
+ import_call = f"{self.import_call_bin_prefix}arangoimp --type csv " f'--separator="{self.escaped_delim}" '
201
213
 
202
214
  if self.quote == "'":
203
215
  import_call += f'--quote="{self.quote}" '
@@ -1,3 +1,5 @@
1
+ """Module to provide the Neo4j writer class."""
2
+
1
3
  import os
2
4
 
3
5
  from biocypher._logger import logger
@@ -45,10 +47,7 @@ class _Neo4jBatchWriter(_BatchWriter):
45
47
  return "bin/"
46
48
 
47
49
  def _quote_string(self, value: str) -> str:
48
- """
49
- Quote a string. Quote character is escaped by doubling it.
50
- """
51
-
50
+ """Quote a string. Quote character is escaped by doubling it."""
52
51
  return f"{self.quote}{value.replace(self.quote, self.quote * 2)}{self.quote}"
53
52
 
54
53
  def _write_array_string(self, string_list):