biocypher 0.5.42__tar.gz → 0.5.44__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biocypher might be problematic. Click here for more details.

Files changed (39) hide show
  1. {biocypher-0.5.42 → biocypher-0.5.44}/PKG-INFO +1 -1
  2. {biocypher-0.5.42 → biocypher-0.5.44}/biocypher/_config/biocypher_config.yaml +8 -0
  3. {biocypher-0.5.42 → biocypher-0.5.44}/biocypher/_core.py +3 -3
  4. {biocypher-0.5.42 → biocypher-0.5.44}/biocypher/_metadata.py +1 -1
  5. biocypher-0.5.42/biocypher/_connect.py → biocypher-0.5.44/biocypher/output/connect/_neo4j_driver.py +5 -5
  6. {biocypher-0.5.42/biocypher → biocypher-0.5.44/biocypher/output/in_memory}/_pandas.py +2 -1
  7. biocypher-0.5.44/biocypher/output/write/__init__.py +0 -0
  8. {biocypher-0.5.42/biocypher → biocypher-0.5.44/biocypher/output}/write/_batch_writer.py +16 -23
  9. biocypher-0.5.42/biocypher/write/_write.py → biocypher-0.5.44/biocypher/output/write/_get_writer.py +15 -12
  10. biocypher-0.5.44/biocypher/output/write/_writer.py +200 -0
  11. biocypher-0.5.44/biocypher/output/write/graph/__init__.py +0 -0
  12. {biocypher-0.5.42/biocypher → biocypher-0.5.44/biocypher/output}/write/graph/_arangodb.py +1 -1
  13. {biocypher-0.5.42/biocypher → biocypher-0.5.44/biocypher/output}/write/graph/_neo4j.py +9 -11
  14. biocypher-0.5.44/biocypher/output/write/graph/_networkx.py +76 -0
  15. {biocypher-0.5.42/biocypher → biocypher-0.5.44/biocypher/output}/write/graph/_rdf.py +3 -4
  16. biocypher-0.5.44/biocypher/output/write/relational/__init__.py +0 -0
  17. biocypher-0.5.44/biocypher/output/write/relational/_csv.py +76 -0
  18. {biocypher-0.5.42/biocypher → biocypher-0.5.44/biocypher/output}/write/relational/_postgresql.py +2 -2
  19. {biocypher-0.5.42/biocypher → biocypher-0.5.44/biocypher/output}/write/relational/_sqlite.py +1 -1
  20. {biocypher-0.5.42 → biocypher-0.5.44}/pyproject.toml +7 -1
  21. {biocypher-0.5.42 → biocypher-0.5.44}/LICENSE +0 -0
  22. {biocypher-0.5.42 → biocypher-0.5.44}/README.md +0 -0
  23. {biocypher-0.5.42 → biocypher-0.5.44}/biocypher/__init__.py +0 -0
  24. {biocypher-0.5.42 → biocypher-0.5.44}/biocypher/_config/__init__.py +0 -0
  25. {biocypher-0.5.42 → biocypher-0.5.44}/biocypher/_config/test_config.yaml +0 -0
  26. {biocypher-0.5.42 → biocypher-0.5.44}/biocypher/_config/test_schema_config.yaml +0 -0
  27. {biocypher-0.5.42 → biocypher-0.5.44}/biocypher/_config/test_schema_config_disconnected.yaml +0 -0
  28. {biocypher-0.5.42 → biocypher-0.5.44}/biocypher/_config/test_schema_config_extended.yaml +0 -0
  29. {biocypher-0.5.42 → biocypher-0.5.44}/biocypher/_create.py +0 -0
  30. {biocypher-0.5.42 → biocypher-0.5.44}/biocypher/_deduplicate.py +0 -0
  31. {biocypher-0.5.42 → biocypher-0.5.44}/biocypher/_get.py +0 -0
  32. {biocypher-0.5.42 → biocypher-0.5.44}/biocypher/_logger.py +0 -0
  33. {biocypher-0.5.42 → biocypher-0.5.44}/biocypher/_mapping.py +0 -0
  34. {biocypher-0.5.42 → biocypher-0.5.44}/biocypher/_misc.py +0 -0
  35. {biocypher-0.5.42 → biocypher-0.5.44}/biocypher/_ontology.py +0 -0
  36. {biocypher-0.5.42 → biocypher-0.5.44}/biocypher/_translate.py +0 -0
  37. {biocypher-0.5.42/biocypher/write → biocypher-0.5.44/biocypher/output}/__init__.py +0 -0
  38. {biocypher-0.5.42/biocypher/write/graph → biocypher-0.5.44/biocypher/output/connect}/__init__.py +0 -0
  39. {biocypher-0.5.42/biocypher/write/relational → biocypher-0.5.44/biocypher/output/in_memory}/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: biocypher
3
- Version: 0.5.42
3
+ Version: 0.5.44
4
4
  Summary: A unifying framework for biomedical research knowledge graphs
5
5
  Home-page: https://github.com/biocypher/biocypher
6
6
  License: MIT
@@ -131,3 +131,11 @@ sqlite:
131
131
  delimiter: '\t'
132
132
  # import_call_bin_prefix: '' # path to "sqlite3"
133
133
  # import_call_file_prefix: '/path/to/files'
134
+
135
+ csv:
136
+ ### CSV/Pandas configuration ###
137
+ delimiter: ","
138
+
139
+ networkx:
140
+ ### NetworkX configuration ###
141
+ some_config: some_value # placeholder for technical reasons TODO
@@ -26,17 +26,17 @@ from ._logger import logger
26
26
 
27
27
  logger.debug(f"Loading module {__name__}.")
28
28
 
29
- from biocypher.write._write import DBMS_TO_CLASS, get_writer
30
29
  from ._get import Downloader
31
30
  from ._config import config as _config
32
31
  from ._config import update_from_file as _file_update
33
32
  from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
34
- from ._pandas import Pandas
35
- from ._connect import get_driver
36
33
  from ._mapping import OntologyMapping
37
34
  from ._ontology import Ontology
38
35
  from ._translate import Translator
39
36
  from ._deduplicate import Deduplicator
37
+ from .output.in_memory._pandas import Pandas
38
+ from .output.write._get_writer import DBMS_TO_CLASS, get_writer
39
+ from .output.connect._neo4j_driver import get_driver
40
40
 
41
41
  __all__ = ["BioCypher"]
42
42
 
@@ -19,7 +19,7 @@ import importlib.metadata
19
19
 
20
20
  import toml
21
21
 
22
- _VERSION = "0.5.42"
22
+ _VERSION = "0.5.44"
23
23
 
24
24
 
25
25
  def get_metadata():
@@ -13,7 +13,7 @@ BioCypher 'online' mode. Handles connection and manipulation of a running DBMS.
13
13
  """
14
14
  import subprocess
15
15
 
16
- from ._logger import logger
16
+ from biocypher._logger import logger
17
17
 
18
18
  logger.debug(f"Loading module {__name__}.")
19
19
 
@@ -22,10 +22,10 @@ import itertools
22
22
 
23
23
  import neo4j_utils
24
24
 
25
- from . import _misc
26
- from ._config import config as _config
27
- from ._create import BioCypherEdge, BioCypherNode
28
- from ._translate import Translator
25
+ from biocypher import _misc
26
+ from biocypher._config import config as _config
27
+ from biocypher._create import BioCypherEdge, BioCypherNode
28
+ from biocypher._translate import Translator
29
29
 
30
30
  __all__ = ["_Neo4jDriver"]
31
31
 
@@ -1,6 +1,6 @@
1
1
  import pandas as pd
2
2
 
3
- from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
3
+ from biocypher._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
4
4
 
5
5
 
6
6
  class Pandas:
@@ -87,3 +87,4 @@ class Pandas:
87
87
  self.dfs[_type] = pd.concat(
88
88
  [self.dfs[_type], df], ignore_index=True
89
89
  )
90
+ return self.dfs[_type]
File without changes
@@ -6,16 +6,18 @@ import os
6
6
  import re
7
7
  import glob
8
8
 
9
- from rdflib import Graph
10
9
  from more_itertools import peekable
11
10
 
12
11
  from biocypher._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
13
12
  from biocypher._logger import logger
14
13
  from biocypher._translate import Translator
15
14
  from biocypher._deduplicate import Deduplicator
15
+ from biocypher.output.write._writer import _Writer
16
16
 
17
17
 
18
- class _BatchWriter(ABC):
18
+ class _BatchWriter(_Writer, ABC):
19
+ """Abstract batch writer class"""
20
+
19
21
  @abstractmethod
20
22
  def _get_default_import_call_bin_prefix(self):
21
23
  """
@@ -41,7 +43,7 @@ class _BatchWriter(ABC):
41
43
  str: The database-specific string representation of an array
42
44
  """
43
45
  raise NotImplementedError(
44
- "Database writer must override '_write_node_headers'"
46
+ "Database writer must override '_write_array_string'"
45
47
  )
46
48
 
47
49
  @abstractmethod
@@ -206,6 +208,12 @@ class _BatchWriter(ABC):
206
208
  rdf_namespaces:
207
209
  The namespaces for RDF.
208
210
  """
211
+ super().__init__(
212
+ translator=translator,
213
+ deduplicator=deduplicator,
214
+ output_directory=output_directory,
215
+ strict_mode=strict_mode,
216
+ )
209
217
  self.db_name = db_name
210
218
  self.db_user = db_user
211
219
  self.db_password = db_password
@@ -239,32 +247,15 @@ class _BatchWriter(ABC):
239
247
  self.import_call_nodes = set()
240
248
  self.import_call_edges = set()
241
249
 
242
- self._outdir = output_directory
250
+ self.outdir = output_directory
243
251
 
244
252
  self._import_call_file_prefix = import_call_file_prefix
245
253
 
246
- if os.path.exists(self.outdir):
247
- logger.warning(
248
- f"Output directory `{self.outdir}` already exists. "
249
- "If this is not planned, file consistency may be compromised."
250
- )
251
- else:
252
- logger.info(f"Creating output directory `{self.outdir}`.")
253
- os.makedirs(self.outdir)
254
-
255
254
  self.parts = {} # dict to store the paths of part files for each label
256
255
 
257
256
  # TODO not memory efficient, but should be fine for most cases; is
258
257
  # there a more elegant solution?
259
258
 
260
- @property
261
- def outdir(self):
262
- """
263
- Property for output directory path.
264
- """
265
-
266
- return self._outdir
267
-
268
259
  @property
269
260
  def import_call_file_prefix(self):
270
261
  """
@@ -272,7 +263,7 @@ class _BatchWriter(ABC):
272
263
  """
273
264
 
274
265
  if self._import_call_file_prefix is None:
275
- return self._outdir
266
+ return self.outdir
276
267
  else:
277
268
  return self._import_call_file_prefix
278
269
 
@@ -1005,7 +996,9 @@ class _BatchWriter(ABC):
1005
996
  """
1006
997
 
1007
998
  file_path = os.path.join(self.outdir, self._get_import_script_name())
1008
- logger.info(f"Writing {self.db_name} import call to `{file_path}`.")
999
+ logger.info(
1000
+ f"Writing {self.db_name + ' ' if self.db_name else ''}import call to `{file_path}`."
1001
+ )
1009
1002
 
1010
1003
  with open(file_path, "w", encoding="utf-8") as f:
1011
1004
  f.write(self._construct_import_call())
@@ -14,11 +14,13 @@ suitable for import into a DBMS.
14
14
  """
15
15
 
16
16
  from biocypher._logger import logger
17
- from biocypher.write.graph._rdf import _RDFWriter
18
- from biocypher.write.graph._neo4j import _Neo4jBatchWriter
19
- from biocypher.write.graph._arangodb import _ArangoDBBatchWriter
20
- from biocypher.write.relational._sqlite import _SQLiteBatchWriter
21
- from biocypher.write.relational._postgresql import _PostgreSQLBatchWriter
17
+ from biocypher.output.write.graph._rdf import _RDFWriter
18
+ from biocypher.output.write.graph._neo4j import _Neo4jBatchWriter
19
+ from biocypher.output.write.graph._arangodb import _ArangoDBBatchWriter
20
+ from biocypher.output.write.graph._networkx import _NetworkXWriter
21
+ from biocypher.output.write.relational._csv import _PandasCSVWriter
22
+ from biocypher.output.write.relational._sqlite import _SQLiteBatchWriter
23
+ from biocypher.output.write.relational._postgresql import _PostgreSQLBatchWriter
22
24
 
23
25
  logger.debug(f"Loading module {__name__}.")
24
26
 
@@ -46,6 +48,12 @@ DBMS_TO_CLASS = {
46
48
  "sqlite3": _SQLiteBatchWriter,
47
49
  "rdf": _RDFWriter,
48
50
  "RDF": _RDFWriter,
51
+ "csv": _PandasCSVWriter,
52
+ "CSV": _PandasCSVWriter,
53
+ "pandas": _PandasCSVWriter,
54
+ "Pandas": _PandasCSVWriter,
55
+ "networkx": _NetworkXWriter,
56
+ "NetworkX": _NetworkXWriter,
49
57
  }
50
58
 
51
59
 
@@ -61,19 +69,14 @@ def get_writer(
61
69
  file.
62
70
 
63
71
  Args:
64
-
65
72
  dbms: the database management system; for options, see DBMS_TO_CLASS.
66
-
67
73
  translator: the Translator object.
68
-
69
- output_directory: the directory to write the output files to.
70
-
74
+ deduplicator: the Deduplicator object.
75
+ output_directory: the directory to output.write the output files to.
71
76
  strict_mode: whether to use strict mode.
72
77
 
73
78
  Returns:
74
-
75
79
  instance: an instance of the selected writer class.
76
-
77
80
  """
78
81
 
79
82
  dbms_config = _config(dbms)
@@ -0,0 +1,200 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Union, Optional
3
+ from collections.abc import Iterable
4
+ import os
5
+
6
+ from biocypher._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
7
+ from biocypher._logger import logger
8
+ from biocypher._translate import Translator
9
+ from biocypher._deduplicate import Deduplicator
10
+
11
+ __all__ = ["_Writer"]
12
+
13
+
14
+ class _Writer(ABC):
15
+ """Abstract class for writing node and edge representations to disk.
16
+ Specifics of the different writers (e.g. neo4j, postgresql, csv, etc.)
17
+ are implemented in the child classes. Any concrete writer needs to
18
+ implement at least:
19
+ - _write_node_data
20
+ - _write_edge_data
21
+ - _construct_import_call
22
+ - _get_import_script_name
23
+
24
+ Args:
25
+ translator (Translator): Instance of :py:class:`Translator` to enable translation of
26
+ nodes and manipulation of properties.
27
+ deduplicator (Deduplicator): Instance of :py:class:`Deduplicator` to enable deduplication
28
+ of nodes and edges.
29
+ output_directory (str, optional): Path for exporting CSV files. Defaults to None.
30
+ strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
31
+ strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
32
+
33
+ Raises:
34
+ NotImplementedError: Writer implementation must override '_write_node_data'
35
+ NotImplementedError: Writer implementation must override '_write_edge_data'
36
+ NotImplementedError: Writer implementation must override '_construct_import_call'
37
+ NotImplementedError: Writer implementation must override '_get_import_script_name'
38
+ """
39
+
40
+ def __init__(
41
+ self,
42
+ translator: Translator,
43
+ deduplicator: Deduplicator,
44
+ output_directory: Optional[str] = None,
45
+ strict_mode: bool = False,
46
+ *args,
47
+ **kwargs,
48
+ ):
49
+ """Abstract class for writing node and edge representations to disk.
50
+
51
+ Args:
52
+ translator (Translator): Instance of :py:class:`Translator` to enable translation of
53
+ nodes and manipulation of properties.
54
+ deduplicator (Deduplicator): Instance of :py:class:`Deduplicator` to enable deduplication
55
+ of nodes and edges.
56
+ output_directory (str, optional): Path for exporting CSV files. Defaults to None.
57
+ strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
58
+ strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
59
+ """
60
+ self.translator = translator
61
+ self.deduplicator = deduplicator
62
+ self.strict_mode = strict_mode
63
+ self.output_directory = output_directory
64
+
65
+ if os.path.exists(self.output_directory):
66
+ if kwargs.get("write_to_file", True):
67
+ logger.warning(
68
+ f"Output directory `{self.output_directory}` already exists. "
69
+ "If this is not planned, file consistency may be compromised."
70
+ )
71
+ else:
72
+ logger.info(f"Creating output directory `{self.output_directory}`.")
73
+ os.makedirs(self.output_directory)
74
+
75
+ @abstractmethod
76
+ def _write_node_data(
77
+ self,
78
+ nodes: Iterable[
79
+ Union[BioCypherNode, BioCypherEdge, BioCypherRelAsNode]
80
+ ],
81
+ ) -> bool:
82
+ """Implement how to output.write nodes to disk.
83
+
84
+ Args:
85
+ nodes (Iterable): An iterable of BioCypherNode / BioCypherEdge / BioCypherRelAsNode objects.
86
+
87
+ Returns:
88
+ bool: The return value. True for success, False otherwise.
89
+ """
90
+ raise NotImplementedError(
91
+ "Writer implementation must override 'write_nodes'"
92
+ )
93
+
94
+ @abstractmethod
95
+ def _write_edge_data(
96
+ self,
97
+ edges: Iterable[
98
+ Union[BioCypherNode, BioCypherEdge, BioCypherRelAsNode]
99
+ ],
100
+ ) -> bool:
101
+ """Implement how to output.write edges to disk.
102
+
103
+ Args:
104
+ edges (Iterable): An iterable of BioCypherNode / BioCypherEdge / BioCypherRelAsNode objects.
105
+
106
+ Returns:
107
+ bool: The return value. True for success, False otherwise.
108
+ """
109
+ raise NotImplementedError(
110
+ "Writer implementation must override 'write_edges'"
111
+ )
112
+
113
+ @abstractmethod
114
+ def _construct_import_call(self) -> str:
115
+ """
116
+ Function to construct the import call detailing folder and
117
+ individual node and edge headers and data files, as well as
118
+ delimiters and database name. Built after all data has been
119
+ processed to ensure that nodes are called before any edges.
120
+
121
+ Returns:
122
+ str: command for importing the output files into a DBMS.
123
+ """
124
+ raise NotImplementedError(
125
+ "Writer implementation must override '_construct_import_call'"
126
+ )
127
+
128
+ @abstractmethod
129
+ def _get_import_script_name(self) -> str:
130
+ """Returns the name of the import script.
131
+
132
+ Returns:
133
+ str: The name of the import script (ending in .sh)
134
+ """
135
+ raise NotImplementedError(
136
+ "Writer implementation must override '_get_import_script_name'"
137
+ )
138
+
139
+ def write_nodes(
140
+ self, nodes, batch_size: int = int(1e6), force: bool = False
141
+ ):
142
+ """Wrapper for writing nodes.
143
+
144
+ Args:
145
+ nodes (BioCypherNode): a list or generator of nodes in
146
+ :py:class:`BioCypherNode` format
147
+ batch_size (int): The batch size for writing nodes.
148
+ force (bool): Whether to force writing nodes even if their type is
149
+ not present in the schema.
150
+
151
+ Returns:
152
+ bool: The return value. True for success, False otherwise.
153
+ """
154
+ passed = self._write_node_data(nodes)
155
+ if not passed:
156
+ logger.error("Error while writing node data.")
157
+ return False
158
+ return True
159
+
160
+ def write_edges(
161
+ self, edges, batch_size: int = int(1e6), force: bool = False
162
+ ):
163
+ """Wrapper for writing edges.
164
+
165
+ Args:
166
+ nodes (BioCypherNode): a list or generator of nodes in
167
+ :py:class:`BioCypherNode` format
168
+ batch_size (int): The batch size for writing nodes.
169
+ force (bool): Whether to force writing nodes even if their type is
170
+ not present in the schema.
171
+
172
+ Returns:
173
+ bool: The return value. True for success, False otherwise.
174
+ """
175
+ passed = self._write_edge_data(edges)
176
+ if not passed:
177
+ logger.error("Error while writing edge data.")
178
+ return False
179
+ return True
180
+
181
+ def write_import_call(self):
182
+ """
183
+ Function to output.write the import call detailing folder and
184
+ individual node and edge headers and data files, as well as
185
+ delimiters and database name, to the export folder as txt.
186
+
187
+ Returns:
188
+ str: The path of the file holding the import call.
189
+ """
190
+ file_path = os.path.join(
191
+ self.output_directory, self._get_import_script_name()
192
+ )
193
+ logger.info(
194
+ f"Writing {self.__class__.__name__} import call to `{file_path}`."
195
+ )
196
+
197
+ with open(file_path, "w", encoding="utf-8") as f:
198
+ f.write(self._construct_import_call())
199
+
200
+ return file_path
@@ -1,7 +1,7 @@
1
1
  import os
2
2
 
3
3
  from biocypher._logger import logger
4
- from biocypher.write.graph._neo4j import _Neo4jBatchWriter
4
+ from biocypher.output.write.graph._neo4j import _Neo4jBatchWriter
5
5
 
6
6
 
7
7
  class _ArangoDBBatchWriter(_Neo4jBatchWriter):
@@ -1,9 +1,7 @@
1
1
  import os
2
- import re
3
- import subprocess
4
2
 
5
3
  from biocypher._logger import logger
6
- from biocypher.write._batch_writer import parse_label, _BatchWriter
4
+ from biocypher.output.write._batch_writer import parse_label, _BatchWriter
7
5
 
8
6
 
9
7
  class _Neo4jBatchWriter(_BatchWriter):
@@ -49,7 +47,7 @@ class _Neo4jBatchWriter(_BatchWriter):
49
47
 
50
48
  def _write_array_string(self, string_list):
51
49
  """
52
- Abstract method to write the string representation of an array into a .csv file
50
+ Abstract method to output.write the string representation of an array into a .csv file
53
51
  as required by the neo4j admin-import.
54
52
 
55
53
  Args:
@@ -303,11 +301,13 @@ class _Neo4jBatchWriter(_BatchWriter):
303
301
  Returns:
304
302
  str: The import call.
305
303
  """
306
- import_call = (
307
- f"{self.import_call_bin_prefix}neo4j-admin {import_cmd} "
308
- f'--delimiter="{self.escaped_delim}" '
309
- f'--array-delimiter="{self.escaped_adelim}" '
310
- )
304
+ import_call = f"{self.import_call_bin_prefix}neo4j-admin {import_cmd} "
305
+
306
+ import_call += f"{database_cmd}{self.db_name} "
307
+
308
+ import_call += f'--delimiter="{self.escaped_delim}" '
309
+
310
+ import_call += f'--array-delimiter="{self.escaped_adelim}" '
311
311
 
312
312
  if self.quote == "'":
313
313
  import_call += f'--quote="{self.quote}" '
@@ -329,6 +329,4 @@ class _Neo4jBatchWriter(_BatchWriter):
329
329
  for header_path, parts_path in self.import_call_edges:
330
330
  import_call += f'--relationships="{header_path},{parts_path}" '
331
331
 
332
- # Database needs to be at the end starting with Neo4j 5.0+.
333
- import_call += f"{database_cmd}{self.db_name} "
334
332
  return import_call
@@ -0,0 +1,76 @@
1
+ import pickle
2
+
3
+ import networkx as nx
4
+
5
+ from biocypher._logger import logger
6
+ from biocypher.output.write._writer import _Writer
7
+ from biocypher.output.write.relational._csv import _PandasCSVWriter
8
+
9
+
10
+ class _NetworkXWriter(_Writer):
11
+ """
12
+ Class for writing node and edges to a networkx DiGraph.
13
+ """
14
+
15
+ def __init__(self, *args, **kwargs):
16
+ super().__init__(*args, **kwargs)
17
+ self.csv_writer = _PandasCSVWriter(*args, write_to_file=False, **kwargs)
18
+ self.G = nx.DiGraph()
19
+
20
+ def _construct_import_call(self) -> str:
21
+ """Function to construct the Python code to load all node and edge csv files again into Pandas dfs.
22
+
23
+ Returns:
24
+ str: Python code to load the csv files into Pandas dfs.
25
+ """
26
+ logger.info(
27
+ f"Writing networkx {self.G} to pickle file networkx_graph.pkl."
28
+ )
29
+ with open(f"{self.output_directory}/networkx_graph.pkl", "wb") as f:
30
+ pickle.dump(self.G, f)
31
+
32
+ import_call = "import pickle\n"
33
+ import_call += "with open('./networkx_graph.pkl', 'rb') as f:\n\tG_loaded = pickle.load(f)"
34
+ return import_call
35
+
36
+ def _get_import_script_name(self) -> str:
37
+ """Function to return the name of the import script."""
38
+ return "import_networkx.py"
39
+
40
+ def _write_node_data(self, nodes) -> bool:
41
+ passed = self.csv_writer._write_entities_to_file(nodes)
42
+ self.add_to_networkx()
43
+ return passed
44
+
45
+ def _write_edge_data(self, edges) -> bool:
46
+ passed = self.csv_writer._write_entities_to_file(edges)
47
+ self.add_to_networkx()
48
+ return passed
49
+
50
+ def add_to_networkx(self) -> bool:
51
+ all_dfs = self.csv_writer.stored_dfs
52
+ node_dfs = [
53
+ df
54
+ for df in all_dfs.values()
55
+ if df.columns.str.contains("node_id").any()
56
+ ]
57
+ edge_dfs = [
58
+ df
59
+ for df in all_dfs.values()
60
+ if df.columns.str.contains("source_id").any()
61
+ and df.columns.str.contains("target_id").any()
62
+ ]
63
+ for df in node_dfs:
64
+ nodes = df.set_index("node_id").to_dict(orient="index")
65
+ self.G.add_nodes_from(nodes.items())
66
+ for df in edge_dfs:
67
+ edges = df.set_index(["source_id", "target_id"]).to_dict(
68
+ orient="index"
69
+ )
70
+ self.G.add_edges_from(
71
+ (
72
+ (source, target, attrs)
73
+ for (source, target), attrs in edges.items()
74
+ )
75
+ )
76
+ return True
@@ -24,11 +24,10 @@ from rdflib.namespace import (
24
24
 
25
25
  from biocypher._create import BioCypherEdge, BioCypherNode
26
26
  from biocypher._logger import logger
27
- from biocypher.write._batch_writer import _BatchWriter
27
+ from biocypher.output.write._batch_writer import _BatchWriter
28
28
 
29
29
 
30
30
  class _RDFWriter(_BatchWriter):
31
-
32
31
  """
33
32
  Class to write BioCypher's property graph into an RDF format using
34
33
  rdflib and all the extensions it supports (RDF/XML, N3, NTriples,
@@ -125,7 +124,7 @@ class _RDFWriter(_BatchWriter):
125
124
 
126
125
  # create file name
127
126
  file_name = os.path.join(
128
- self._outdir, f"{label_pascal}.{self.extension}"
127
+ self.outdir, f"{label_pascal}.{self.extension}"
129
128
  )
130
129
 
131
130
  # write data in graph
@@ -287,7 +286,7 @@ class _RDFWriter(_BatchWriter):
287
286
 
288
287
  # create file name
289
288
  file_name = os.path.join(
290
- self._outdir, f"{label_pascal}.{self.extension}"
289
+ self.outdir, f"{label_pascal}.{self.extension}"
291
290
  )
292
291
 
293
292
  # write data in graph
@@ -0,0 +1,76 @@
1
+ from more_itertools import peekable
2
+
3
+ from biocypher._logger import logger
4
+ from biocypher.output.write._writer import _Writer
5
+ from biocypher.output.in_memory._pandas import Pandas
6
+
7
+
8
+ class _PandasCSVWriter(_Writer):
9
+ """
10
+ Class for writing node and edge representations to a CSV file.
11
+ """
12
+
13
+ def __init__(self, *args, write_to_file: bool = True, **kwargs):
14
+ kwargs["write_to_file"] = write_to_file
15
+ super().__init__(*args, **kwargs)
16
+ self.in_memory_dfs = {}
17
+ self.stored_dfs = {}
18
+ self.pandas_in_memory = Pandas(
19
+ translator=self.translator,
20
+ deduplicator=self.deduplicator,
21
+ )
22
+ self.delimiter = kwargs.get("delimiter")
23
+ if not self.delimiter:
24
+ self.delimiter = ","
25
+ self.write_to_file = write_to_file
26
+
27
+ def _construct_import_call(self) -> str:
28
+ """Function to construct the Python code to load all node and edge csv files again into Pandas dfs.
29
+
30
+ Returns:
31
+ str: Python code to load the csv files into Pandas dfs.
32
+ """
33
+ import_call = "import pandas as pd\n\n"
34
+ for df_name in self.stored_dfs.keys():
35
+ import_call += f"{df_name} = pd.read_csv('./{df_name}.csv', header=0, index_col=0)\n"
36
+ return import_call
37
+
38
+ def _get_import_script_name(self) -> str:
39
+ """Function to return the name of the import script."""
40
+ return "import_pandas_csv.py"
41
+
42
+ def _write_node_data(self, nodes) -> bool:
43
+ passed = self._write_entities_to_file(nodes)
44
+ return passed
45
+
46
+ def _write_edge_data(self, edges) -> bool:
47
+ passed = self._write_entities_to_file(edges)
48
+ return passed
49
+
50
+ def _write_entities_to_file(self, entities: iter) -> bool:
51
+ """Function to output.write the entities to a CSV file.
52
+
53
+ Args:
54
+ entities (iterable): An iterable of BioCypherNode / BioCypherEdge / BioCypherRelAsNode objects.
55
+ """
56
+ entities = peekable(entities)
57
+ entity_list = self.pandas_in_memory._separate_entity_types(entities)
58
+ for entity_type, entities in entity_list.items():
59
+ self.in_memory_dfs[
60
+ entity_type
61
+ ] = self.pandas_in_memory._add_entity_df(entity_type, entities)
62
+ for entity_type in self.in_memory_dfs.keys():
63
+ entity_df = self.in_memory_dfs[entity_type]
64
+ if " " in entity_type or "." in entity_type:
65
+ entity_type = entity_type.replace(" ", "_").replace(".", "_")
66
+ if self.write_to_file:
67
+ logger.info(
68
+ f"Writing {entity_df.shape[0]} entries to {entity_type}.csv."
69
+ )
70
+ entity_df.to_csv(
71
+ f"{self.output_directory}/{entity_type}.csv",
72
+ sep=self.delimiter,
73
+ )
74
+ self.stored_dfs[entity_type] = entity_df
75
+ self.in_memory_dfs = {}
76
+ return True
@@ -2,7 +2,7 @@ import os
2
2
  import glob
3
3
 
4
4
  from biocypher._logger import logger
5
- from biocypher.write._batch_writer import _BatchWriter
5
+ from biocypher.output.write._batch_writer import _BatchWriter
6
6
 
7
7
 
8
8
  class _PostgreSQLBatchWriter(_BatchWriter):
@@ -59,7 +59,7 @@ class _PostgreSQLBatchWriter(_BatchWriter):
59
59
 
60
60
  def _write_array_string(self, string_list) -> str:
61
61
  """
62
- Abstract method to write the string representation of an array into a .csv file
62
+ Abstract method to output.write the string representation of an array into a .csv file
63
63
  as required by the postgresql COPY command, with '{','}' brackets and ',' separation.
64
64
 
65
65
  Args:
@@ -1,4 +1,4 @@
1
- from biocypher.write.relational._postgresql import _PostgreSQLBatchWriter
1
+ from biocypher.output.write.relational._postgresql import _PostgreSQLBatchWriter
2
2
 
3
3
 
4
4
  class _SQLiteBatchWriter(_PostgreSQLBatchWriter):
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "biocypher"
3
- version = "0.5.42"
3
+ version = "0.5.44"
4
4
  description = "A unifying framework for biomedical research knowledge graphs"
5
5
  authors = [
6
6
  "Sebastian Lobentanzer <sebastian.lobentanzer@gmail.com>",
@@ -23,6 +23,12 @@ classifiers = [
23
23
  repository = "https://github.com/biocypher/biocypher"
24
24
  readme = "README.md"
25
25
 
26
+ [project.urls]
27
+ Homepage = "https://biocypher.org"
28
+ Documentation = "https://biocypher.org"
29
+ Repository = "https://github.com/biocypher/biocypher.git"
30
+ Issues = "https://github.com/biocypher/biocypher/issues"
31
+
26
32
  [tool.poetry.dependencies]
27
33
  python = "^3.9"
28
34
  PyYAML = ">=5.0"
File without changes
File without changes
File without changes