biocypher 0.9.4__tar.gz → 0.9.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biocypher might be problematic. Click here for more details.
- {biocypher-0.9.4 → biocypher-0.9.6}/PKG-INFO +1 -1
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/_get.py +1 -1
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/_metadata.py +1 -1
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/connect/_get_connector.py +13 -7
- biocypher-0.9.6/biocypher/output/in_memory/_get_in_memory_kg.py +44 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/write/graph/_neo4j.py +1 -3
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/write/relational/_postgresql.py +37 -27
- {biocypher-0.9.4 → biocypher-0.9.6}/pyproject.toml +7 -9
- biocypher-0.9.4/biocypher/output/in_memory/_get_in_memory_kg.py +0 -34
- {biocypher-0.9.4 → biocypher-0.9.6}/LICENSE +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/README.md +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/__init__.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/_config/__init__.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/_config/biocypher_config.yaml +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/_config/test_config.yaml +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/_config/test_schema_config.yaml +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/_config/test_schema_config_disconnected.yaml +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/_config/test_schema_config_extended.yaml +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/_core.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/_create.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/_deduplicate.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/_logger.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/_mapping.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/_misc.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/_ontology.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/_translate.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/__init__.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/connect/__init__.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/connect/_neo4j_driver.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/in_memory/__init__.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/in_memory/_in_memory_kg.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/in_memory/_networkx.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/in_memory/_pandas.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/write/__init__.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/write/_batch_writer.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/write/_get_writer.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/write/_writer.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/write/graph/__init__.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/write/graph/_arangodb.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/write/graph/_networkx.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/write/graph/_owl.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/write/graph/_rdf.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/write/relational/__init__.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/write/relational/_csv.py +0 -0
- {biocypher-0.9.4 → biocypher-0.9.6}/biocypher/output/write/relational/_sqlite.py +0 -0
|
@@ -16,14 +16,18 @@ __all__ = ["get_connector"]
|
|
|
16
16
|
def get_connector(
|
|
17
17
|
dbms: str,
|
|
18
18
|
translator: Translator,
|
|
19
|
-
):
|
|
20
|
-
"""
|
|
21
|
-
Function to return the connector class.
|
|
19
|
+
) -> _Neo4jDriver:
|
|
20
|
+
"""Return the connector class.
|
|
22
21
|
|
|
23
|
-
Returns
|
|
22
|
+
Returns
|
|
23
|
+
-------
|
|
24
24
|
class: the connector class
|
|
25
|
-
"""
|
|
26
25
|
|
|
26
|
+
Raises
|
|
27
|
+
------
|
|
28
|
+
NotImplementedError: if the DBMS is not supported
|
|
29
|
+
|
|
30
|
+
"""
|
|
27
31
|
dbms_config = _config(dbms)
|
|
28
32
|
|
|
29
33
|
if dbms == "neo4j":
|
|
@@ -36,5 +40,7 @@ def get_connector(
|
|
|
36
40
|
multi_db=dbms_config["multi_db"],
|
|
37
41
|
translator=translator,
|
|
38
42
|
)
|
|
39
|
-
|
|
40
|
-
|
|
43
|
+
|
|
44
|
+
msg = f"Online mode is not supported for the DBMS {dbms}."
|
|
45
|
+
logger.error(msg)
|
|
46
|
+
raise NotImplementedError(msg)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""BioCypher 'in_memory' module.
|
|
2
|
+
|
|
3
|
+
Handles the in-memory Knowledge Graph instance.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
from biocypher._logger import logger
|
|
11
|
+
from biocypher.output.in_memory._networkx import NetworkxKG
|
|
12
|
+
from biocypher.output.in_memory._pandas import PandasKG
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from biocypher._deduplicate import Deduplicator
|
|
16
|
+
from biocypher.output.in_memory._in_memory_kg import _InMemoryKG
|
|
17
|
+
|
|
18
|
+
logger.debug(f"Loading module {__name__}.")
|
|
19
|
+
|
|
20
|
+
__all__ = ["get_in_memory_kg"]
|
|
21
|
+
|
|
22
|
+
IN_MEMORY_DBMS = ["csv", "pandas", "tabular", "networkx"]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_in_memory_kg(
|
|
26
|
+
dbms: str,
|
|
27
|
+
deduplicator: Deduplicator,
|
|
28
|
+
) -> _InMemoryKG:
|
|
29
|
+
"""Return the in-memory KG class.
|
|
30
|
+
|
|
31
|
+
Returns
|
|
32
|
+
-------
|
|
33
|
+
_InMemoryKG: the in-memory KG class
|
|
34
|
+
|
|
35
|
+
"""
|
|
36
|
+
if dbms in ["csv", "pandas", "tabular"]:
|
|
37
|
+
return PandasKG(deduplicator)
|
|
38
|
+
|
|
39
|
+
if dbms == "networkx":
|
|
40
|
+
return NetworkxKG(deduplicator)
|
|
41
|
+
|
|
42
|
+
msg = f"Getting the in memory BioCypher KG is not supported for the DBMS {dbms}. Supported: {IN_MEMORY_DBMS}."
|
|
43
|
+
logger.error(msg)
|
|
44
|
+
raise NotImplementedError(msg)
|
|
@@ -278,9 +278,7 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
278
278
|
"""
|
|
279
279
|
import_call_neo4j_v4 = self._get_import_call("import", "--database=", "--force=")
|
|
280
280
|
import_call_neo4j_v5 = self._get_import_call("database import full", "", "--overwrite-destination=")
|
|
281
|
-
neo4j_version_check = (
|
|
282
|
-
f"version=$({self._get_default_import_call_bin_prefix()}neo4j-admin --version | cut -d '.' -f 1)"
|
|
283
|
-
)
|
|
281
|
+
neo4j_version_check = f"version=$({self.import_call_bin_prefix}neo4j-admin --version | cut -d '.' -f 1)"
|
|
284
282
|
|
|
285
283
|
import_script = (
|
|
286
284
|
f"#!/bin/bash\n{neo4j_version_check}\nif [[ $version -ge 5 ]]; "
|
|
@@ -6,7 +6,8 @@ from biocypher.output.write._batch_writer import _BatchWriter
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class _PostgreSQLBatchWriter(_BatchWriter):
|
|
9
|
-
"""
|
|
9
|
+
"""Write node and edge representations for PostgreSQL.
|
|
10
|
+
|
|
10
11
|
Class for writing node and edge representations to disk using the
|
|
11
12
|
format specified by PostgreSQL for the use of "COPY FROM...". Each batch
|
|
12
13
|
writer instance has a fixed representation that needs to be passed
|
|
@@ -39,12 +40,13 @@ class _PostgreSQLBatchWriter(_BatchWriter):
|
|
|
39
40
|
self._copy_from_csv_commands = set()
|
|
40
41
|
super().__init__(*args, **kwargs)
|
|
41
42
|
|
|
42
|
-
def _get_default_import_call_bin_prefix(self):
|
|
43
|
-
"""
|
|
44
|
-
Method to provide the default string for the import call bin prefix.
|
|
43
|
+
def _get_default_import_call_bin_prefix(self) -> str:
|
|
44
|
+
"""Provide the default string for the import call bin prefix.
|
|
45
45
|
|
|
46
|
-
Returns
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
47
48
|
str: The default location for the psql command
|
|
49
|
+
|
|
48
50
|
"""
|
|
49
51
|
return ""
|
|
50
52
|
|
|
@@ -56,33 +58,36 @@ class _PostgreSQLBatchWriter(_BatchWriter):
|
|
|
56
58
|
return "VARCHAR"
|
|
57
59
|
|
|
58
60
|
def _quote_string(self, value: str) -> str:
|
|
59
|
-
"""
|
|
60
|
-
Quote a string.
|
|
61
|
-
"""
|
|
62
|
-
|
|
61
|
+
"""Quote a string."""
|
|
63
62
|
return f"{self.quote}{value}{self.quote}"
|
|
64
63
|
|
|
65
64
|
def _write_array_string(self, string_list) -> str:
|
|
66
|
-
"""
|
|
67
|
-
|
|
68
|
-
|
|
65
|
+
"""Write the string representation of an array into a .csv file.
|
|
66
|
+
|
|
67
|
+
Abstract method to output.write the string representation of an array
|
|
68
|
+
into a .csv file as required by the postgresql COPY command, with
|
|
69
|
+
'{','}' brackets and ',' separation.
|
|
69
70
|
|
|
70
71
|
Args:
|
|
72
|
+
----
|
|
71
73
|
string_list (list): list of ontology strings
|
|
72
74
|
|
|
73
75
|
Returns:
|
|
76
|
+
-------
|
|
74
77
|
str: The string representation of an array for postgres COPY
|
|
78
|
+
|
|
75
79
|
"""
|
|
76
80
|
string = ",".join(string_list)
|
|
77
81
|
string = f'"{{{string}}}"'
|
|
78
82
|
return string
|
|
79
83
|
|
|
80
84
|
def _get_import_script_name(self) -> str:
|
|
81
|
-
"""
|
|
82
|
-
Returns the name of the psql import script
|
|
85
|
+
"""Return the name of the psql import script.
|
|
83
86
|
|
|
84
|
-
Returns
|
|
87
|
+
Returns
|
|
88
|
+
-------
|
|
85
89
|
str: The name of the import script (ending in .sh)
|
|
90
|
+
|
|
86
91
|
"""
|
|
87
92
|
return f"{self.db_name}-import-call.sh"
|
|
88
93
|
|
|
@@ -91,14 +96,17 @@ class _PostgreSQLBatchWriter(_BatchWriter):
|
|
|
91
96
|
string = string.lower()
|
|
92
97
|
return string
|
|
93
98
|
|
|
94
|
-
def _write_node_headers(self):
|
|
95
|
-
"""
|
|
99
|
+
def _write_node_headers(self) -> bool:
|
|
100
|
+
"""Write node header files for PostgreSQL.
|
|
101
|
+
|
|
96
102
|
Writes single CSV file for a graph entity that is represented
|
|
97
103
|
as a node as per the definition in the `schema_config.yaml`,
|
|
98
104
|
containing only the header for this type of node.
|
|
99
105
|
|
|
100
|
-
Returns
|
|
106
|
+
Returns
|
|
107
|
+
-------
|
|
101
108
|
bool: The return value. True for success, False otherwise.
|
|
109
|
+
|
|
102
110
|
"""
|
|
103
111
|
# load headers from data parse
|
|
104
112
|
if not self.node_property_dict:
|
|
@@ -158,7 +166,7 @@ class _PostgreSQLBatchWriter(_BatchWriter):
|
|
|
158
166
|
)
|
|
159
167
|
|
|
160
168
|
self._copy_from_csv_commands.add(
|
|
161
|
-
f"\\copy {pascal_label} FROM '{parts_path}' DELIMITER E'{self.delim}' CSV;"
|
|
169
|
+
f"\\copy {pascal_label} FROM '{parts_path}' DELIMITER E'{self.delim}' CSV;",
|
|
162
170
|
)
|
|
163
171
|
|
|
164
172
|
# add file path to import statement
|
|
@@ -175,13 +183,14 @@ class _PostgreSQLBatchWriter(_BatchWriter):
|
|
|
175
183
|
return True
|
|
176
184
|
|
|
177
185
|
def _write_edge_headers(self):
|
|
178
|
-
"""
|
|
179
|
-
Writes single CSV file for a graph entity that is represented
|
|
186
|
+
"""Writes single CSV file for a graph entity that is represented
|
|
180
187
|
as an edge as per the definition in the `schema_config.yaml`,
|
|
181
188
|
containing only the header for this type of edge.
|
|
182
189
|
|
|
183
|
-
Returns
|
|
190
|
+
Returns
|
|
191
|
+
-------
|
|
184
192
|
bool: The return value. True for success, False otherwise.
|
|
193
|
+
|
|
185
194
|
"""
|
|
186
195
|
# load headers from data parse
|
|
187
196
|
if not self.edge_property_dict:
|
|
@@ -221,7 +230,7 @@ class _PostgreSQLBatchWriter(_BatchWriter):
|
|
|
221
230
|
raise ValueError(
|
|
222
231
|
"Column name '_ID' is reserved for internal use, "
|
|
223
232
|
"denoting the relationship ID. Please choose a "
|
|
224
|
-
"different name for your column."
|
|
233
|
+
"different name for your column.",
|
|
225
234
|
)
|
|
226
235
|
|
|
227
236
|
columns.append(f"{col_name} {col_type}")
|
|
@@ -255,7 +264,7 @@ class _PostgreSQLBatchWriter(_BatchWriter):
|
|
|
255
264
|
)
|
|
256
265
|
|
|
257
266
|
self._copy_from_csv_commands.add(
|
|
258
|
-
f"\\copy {pascal_label} FROM '{parts_path}' DELIMITER E'{self.delim}' CSV;"
|
|
267
|
+
f"\\copy {pascal_label} FROM '{parts_path}' DELIMITER E'{self.delim}' CSV;",
|
|
259
268
|
)
|
|
260
269
|
|
|
261
270
|
# add file path to import statement
|
|
@@ -272,14 +281,15 @@ class _PostgreSQLBatchWriter(_BatchWriter):
|
|
|
272
281
|
return True
|
|
273
282
|
|
|
274
283
|
def _construct_import_call(self) -> str:
|
|
275
|
-
"""
|
|
276
|
-
Function to construct the import call detailing folder and
|
|
284
|
+
"""Function to construct the import call detailing folder and
|
|
277
285
|
individual node and edge headers and data files, as well as
|
|
278
286
|
delimiters and database name. Built after all data has been
|
|
279
287
|
processed to ensure that nodes are called before any edges.
|
|
280
288
|
|
|
281
|
-
Returns
|
|
289
|
+
Returns
|
|
290
|
+
-------
|
|
282
291
|
str: a bash command for postgresql import
|
|
292
|
+
|
|
283
293
|
"""
|
|
284
294
|
import_call = ""
|
|
285
295
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "biocypher"
|
|
3
|
-
version = "0.9.
|
|
3
|
+
version = "0.9.6"
|
|
4
4
|
description = "A unifying framework for biomedical research knowledge graphs"
|
|
5
5
|
authors = [
|
|
6
6
|
"Sebastian Lobentanzer <sebastian.lobentanzer@gmail.com>",
|
|
@@ -46,12 +46,10 @@ pooch = "^1.7.0"
|
|
|
46
46
|
tqdm = "^4.65.0"
|
|
47
47
|
|
|
48
48
|
[tool.poetry.group.dev.dependencies]
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
sphinx-autodoc-typehints = ">=1.18.0"
|
|
54
|
-
myst-parser = "^0.18.0"
|
|
49
|
+
mkdocs-material = "^9.5"
|
|
50
|
+
mkdocstrings-python="^1.12"
|
|
51
|
+
mkdocs-jupyter = "^0.25"
|
|
52
|
+
neoteroi-mkdocs = "^1.1.0"
|
|
55
53
|
pytest = ">=6.0"
|
|
56
54
|
pre-commit = ">=2.17.0"
|
|
57
55
|
bump2version = "*"
|
|
@@ -60,10 +58,10 @@ pytest-cov = "^3.0.0"
|
|
|
60
58
|
hypothesis = "^6.50.1"
|
|
61
59
|
ipython = "^8.7.0"
|
|
62
60
|
ipykernel = "^6.23.1"
|
|
63
|
-
sphinxext-opengraph = "^0.8.2"
|
|
64
61
|
coverage-badge = "^1.1.0"
|
|
65
|
-
nbsphinx = "^0.9.
|
|
62
|
+
nbsphinx = "^0.9.6"
|
|
66
63
|
ruff = "^0.2.0"
|
|
64
|
+
mike = "^2.1.3"
|
|
67
65
|
|
|
68
66
|
[build-system]
|
|
69
67
|
requires = ["poetry-core>=1.0.0"]
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
BioCypher 'in_memory' module. Handles the in-memory Knowledge Graph instance.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
from biocypher._deduplicate import Deduplicator
|
|
6
|
-
from biocypher._logger import logger
|
|
7
|
-
from biocypher.output.in_memory._networkx import NetworkxKG
|
|
8
|
-
from biocypher.output.in_memory._pandas import PandasKG
|
|
9
|
-
|
|
10
|
-
logger.debug(f"Loading module {__name__}.")
|
|
11
|
-
|
|
12
|
-
__all__ = ["get_in_memory_kg"]
|
|
13
|
-
|
|
14
|
-
IN_MEMORY_DBMS = ["csv", "pandas", "tabular", "networkx"]
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def get_in_memory_kg(
|
|
18
|
-
dbms: str,
|
|
19
|
-
deduplicator: Deduplicator,
|
|
20
|
-
):
|
|
21
|
-
"""
|
|
22
|
-
Function to return the in-memory KG class.
|
|
23
|
-
|
|
24
|
-
Returns:
|
|
25
|
-
class: the in-memory KG class
|
|
26
|
-
"""
|
|
27
|
-
if dbms in ["csv", "pandas", "tabular"]:
|
|
28
|
-
return PandasKG(deduplicator)
|
|
29
|
-
elif dbms == "networkx":
|
|
30
|
-
return NetworkxKG(deduplicator)
|
|
31
|
-
else:
|
|
32
|
-
raise NotImplementedError(
|
|
33
|
-
f"Getting the in memory BioCypher KG is not supported for the DBMS {dbms}. Supported: {IN_MEMORY_DBMS}."
|
|
34
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|