PyPI - biocypher - Versions diffs - 0.6.2__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

biocypher 0.6.2py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biocypher might be problematic. Click here for more details.

Files changed (34) hide show

biocypher/__init__.py +3 -13
biocypher/_config/__init__.py +6 -23
biocypher/_config/biocypher_config.yaml +14 -3
biocypher/_core.py +360 -262
biocypher/_create.py +13 -27
biocypher/_deduplicate.py +4 -11
biocypher/_get.py +21 -60
biocypher/_logger.py +4 -16
biocypher/_mapping.py +4 -17
biocypher/_metadata.py +3 -15
biocypher/_misc.py +14 -28
biocypher/_ontology.py +127 -212
biocypher/_translate.py +34 -58
biocypher/output/connect/_get_connector.py +40 -0
biocypher/output/connect/_neo4j_driver.py +9 -65
biocypher/output/in_memory/_get_in_memory_kg.py +34 -0
biocypher/output/in_memory/_in_memory_kg.py +40 -0
biocypher/output/in_memory/_networkx.py +44 -0
biocypher/output/in_memory/_pandas.py +20 -15
biocypher/output/write/_batch_writer.py +166 -179
biocypher/output/write/_get_writer.py +11 -24
biocypher/output/write/_writer.py +43 -44
biocypher/output/write/graph/_arangodb.py +7 -24
biocypher/output/write/graph/_neo4j.py +51 -56
biocypher/output/write/graph/_networkx.py +36 -43
biocypher/output/write/graph/_rdf.py +107 -95
biocypher/output/write/relational/_csv.py +6 -11
biocypher/output/write/relational/_postgresql.py +5 -13
biocypher/output/write/relational/_sqlite.py +3 -1
{biocypher-0.6.2.dist-info → biocypher-0.8.0.dist-info}/LICENSE +1 -1
{biocypher-0.6.2.dist-info → biocypher-0.8.0.dist-info}/METADATA +3 -3
biocypher-0.8.0.dist-info/RECORD +43 -0
{biocypher-0.6.2.dist-info → biocypher-0.8.0.dist-info}/WHEEL +1 -1
biocypher-0.6.2.dist-info/RECORD +0 -39

biocypher/output/write/_get_writer.py CHANGED Viewed

@@ -1,38 +1,27 @@
-#!/usr/bin/env python
-#
-# Copyright 2021, Heidelberg University Clinic
-#
-# File author(s): Sebastian Lobentanzer
-#                 Michael Hartung
-#
-# Distributed under MIT licence, see the file `LICENSE`.
-#
 """
 BioCypher 'offline' module. Handles the writing of node and edge representations
 suitable for import into a DBMS.
 """
+from typing import TYPE_CHECKING
+from biocypher._config import config as _config
 from biocypher._logger import logger
-from biocypher.output.write.graph._rdf import _RDFWriter
-from biocypher.output.write.graph._neo4j import _Neo4jBatchWriter
 from biocypher.output.write.graph._arangodb import _ArangoDBBatchWriter
+from biocypher.output.write.graph._neo4j import _Neo4jBatchWriter
 from biocypher.output.write.graph._networkx import _NetworkXWriter
+from biocypher.output.write.graph._rdf import _RDFWriter
 from biocypher.output.write.relational._csv import _PandasCSVWriter
-from biocypher.output.write.relational._sqlite import _SQLiteBatchWriter
 from biocypher.output.write.relational._postgresql import _PostgreSQLBatchWriter
+from biocypher.output.write.relational._sqlite import _SQLiteBatchWriter
 logger.debug(f"Loading module {__name__}.")
-from typing import TYPE_CHECKING
-from biocypher._config import config as _config
 __all__ = ["get_writer", "DBMS_TO_CLASS"]
 if TYPE_CHECKING:
-    from biocypher._translate import Translator
     from biocypher._deduplicate import Deduplicator
+    from biocypher._translate import Translator
 DBMS_TO_CLASS = {
     "neo": _Neo4jBatchWriter,
@@ -52,6 +41,8 @@ DBMS_TO_CLASS = {
     "CSV": _PandasCSVWriter,
     "pandas": _PandasCSVWriter,
     "Pandas": _PandasCSVWriter,
+    "tabular": _PandasCSVWriter,
+    "Tabular": _PandasCSVWriter,
     "networkx": _NetworkXWriter,
     "NetworkX": _NetworkXWriter,
 }
@@ -99,12 +90,8 @@ def get_writer(
             import_call_file_prefix=dbms_config.get("import_call_file_prefix"),
             wipe=dbms_config.get("wipe"),
             strict_mode=strict_mode,
-            skip_bad_relationships=dbms_config.get(
-                "skip_bad_relationships"
-            ),  # neo4j
-            skip_duplicate_nodes=dbms_config.get(
-                "skip_duplicate_nodes"
-            ),  # neo4j
+            skip_bad_relationships=dbms_config.get("skip_bad_relationships"),  # neo4j
+            skip_duplicate_nodes=dbms_config.get("skip_duplicate_nodes"),  # neo4j
             db_user=dbms_config.get("user"),  # psql
             db_password=dbms_config.get("password"),  # psql
             db_port=dbms_config.get("port"),  # psql

biocypher/output/write/_writer.py CHANGED Viewed

@@ -1,12 +1,12 @@
+import os
 from abc import ABC, abstractmethod
-from typing import Union, Optional
 from collections.abc import Iterable
-import os
 from biocypher._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
+from biocypher._deduplicate import Deduplicator
 from biocypher._logger import logger
 from biocypher._translate import Translator
-from biocypher._deduplicate import Deduplicator
 __all__ = ["_Writer"]
@@ -22,26 +22,28 @@ class _Writer(ABC):
     - _get_import_script_name
     Args:
+    ----
         translator (Translator): Instance of :py:class:`Translator` to enable translation of
             nodes and manipulation of properties.
         deduplicator (Deduplicator): Instance of :py:class:`Deduplicator` to enable deduplication
             of nodes and edges.
         output_directory (str, optional): Path for exporting CSV files. Defaults to None.
         strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
-    strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
     Raises:
+    ------
         NotImplementedError: Writer implementation must override '_write_node_data'
         NotImplementedError: Writer implementation must override '_write_edge_data'
         NotImplementedError: Writer implementation must override '_construct_import_call'
         NotImplementedError: Writer implementation must override '_get_import_script_name'
     """
     def __init__(
         self,
         translator: Translator,
         deduplicator: Deduplicator,
-        output_directory: Optional[str] = None,
+        output_directory: str | None = None,
         strict_mode: bool = False,
         *args,
         **kwargs,
@@ -49,13 +51,14 @@ class _Writer(ABC):
         """Abstract class for writing node and edge representations to disk.
         Args:
+        ----
             translator (Translator): Instance of :py:class:`Translator` to enable translation of
                 nodes and manipulation of properties.
             deduplicator (Deduplicator): Instance of :py:class:`Deduplicator` to enable deduplication
                 of nodes and edges.
             output_directory (str, optional): Path for exporting CSV files. Defaults to None.
             strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
-        strict_mode (bool, optional): Whether to enforce source, version, and license properties. Defaults to False.
         """
         self.translator = translator
         self.deduplicator = deduplicator
@@ -66,7 +69,7 @@ class _Writer(ABC):
             if kwargs.get("write_to_file", True):
                 logger.warning(
                     f"Output directory `{self.output_directory}` already exists. "
-                    "If this is not planned, file consistency may be compromised."
+                    "If this is not planned, file consistency may be compromised.",
                 )
         else:
             logger.info(f"Creating output directory `{self.output_directory}`.")
@@ -75,73 +78,69 @@ class _Writer(ABC):
     @abstractmethod
     def _write_node_data(
         self,
-        nodes: Iterable[
-            Union[BioCypherNode, BioCypherEdge, BioCypherRelAsNode]
-        ],
+        nodes: Iterable[BioCypherNode | BioCypherEdge | BioCypherRelAsNode],
     ) -> bool:
         """Implement how to output.write nodes to disk.
         Args:
+        ----
             nodes (Iterable): An iterable of BioCypherNode / BioCypherEdge / BioCypherRelAsNode objects.
         Returns:
+        -------
             bool: The return value. True for success, False otherwise.
         """
-        raise NotImplementedError(
-            "Writer implementation must override 'write_nodes'"
-        )
+        raise NotImplementedError("Writer implementation must override 'write_nodes'")
     @abstractmethod
     def _write_edge_data(
         self,
-        edges: Iterable[
-            Union[BioCypherNode, BioCypherEdge, BioCypherRelAsNode]
-        ],
+        edges: Iterable[BioCypherNode | BioCypherEdge | BioCypherRelAsNode],
     ) -> bool:
         """Implement how to output.write edges to disk.
         Args:
+        ----
             edges (Iterable): An iterable of BioCypherNode / BioCypherEdge / BioCypherRelAsNode objects.
         Returns:
+        -------
             bool: The return value. True for success, False otherwise.
         """
-        raise NotImplementedError(
-            "Writer implementation must override 'write_edges'"
-        )
+        raise NotImplementedError("Writer implementation must override 'write_edges'")
     @abstractmethod
     def _construct_import_call(self) -> str:
-        """
-        Function to construct the import call detailing folder and
+        """Function to construct the import call detailing folder and
         individual node and edge headers and data files, as well as
         delimiters and database name. Built after all data has been
         processed to ensure that nodes are called before any edges.
-        Returns:
+        Returns
+        -------
             str: command for importing the output files into a DBMS.
         """
-        raise NotImplementedError(
-            "Writer implementation must override '_construct_import_call'"
-        )
+        raise NotImplementedError("Writer implementation must override '_construct_import_call'")
     @abstractmethod
     def _get_import_script_name(self) -> str:
         """Returns the name of the import script.
-        Returns:
+        Returns
+        -------
             str: The name of the import script (ending in .sh)
         """
-        raise NotImplementedError(
-            "Writer implementation must override '_get_import_script_name'"
-        )
+        raise NotImplementedError("Writer implementation must override '_get_import_script_name'")
-    def write_nodes(
-        self, nodes, batch_size: int = int(1e6), force: bool = False
-    ):
+    def write_nodes(self, nodes, batch_size: int = int(1e6), force: bool = False):
         """Wrapper for writing nodes.
         Args:
+        ----
             nodes (BioCypherNode): a list or generator of nodes in
                 :py:class:`BioCypherNode` format
             batch_size (int): The batch size for writing nodes.
@@ -149,7 +148,9 @@ class _Writer(ABC):
                 not present in the schema.
         Returns:
+        -------
             bool: The return value. True for success, False otherwise.
         """
         passed = self._write_node_data(nodes)
         if not passed:
@@ -157,12 +158,11 @@ class _Writer(ABC):
             return False
         return True
-    def write_edges(
-        self, edges, batch_size: int = int(1e6), force: bool = False
-    ):
+    def write_edges(self, edges, batch_size: int = int(1e6), force: bool = False):
         """Wrapper for writing edges.
         Args:
+        ----
             nodes (BioCypherNode): a list or generator of nodes in
                 :py:class:`BioCypherNode` format
             batch_size (int): The batch size for writing nodes.
@@ -170,7 +170,9 @@ class _Writer(ABC):
                 not present in the schema.
         Returns:
+        -------
             bool: The return value. True for success, False otherwise.
         """
         passed = self._write_edge_data(edges)
         if not passed:
@@ -179,20 +181,17 @@ class _Writer(ABC):
         return True
     def write_import_call(self):
-        """
-        Function to output.write the import call detailing folder and
+        """Function to output.write the import call detailing folder and
         individual node and edge headers and data files, as well as
         delimiters and database name, to the export folder as txt.
-        Returns:
+        Returns
+        -------
             str: The path of the file holding the import call.
         """
-        file_path = os.path.join(
-            self.output_directory, self._get_import_script_name()
-        )
-        logger.info(
-            f"Writing {self.__class__.__name__} import call to `{file_path}`."
-        )
+        file_path = os.path.join(self.output_directory, self._get_import_script_name())
+        logger.info(f"Writing {self.__class__.__name__} import call to `{file_path}`.")
         with open(file_path, "w", encoding="utf-8") as f:
             f.write(self._construct_import_call())

biocypher/output/write/graph/_arangodb.py CHANGED Viewed

@@ -61,9 +61,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
             # check if file already exists
             if os.path.exists(header_path):
-                logger.warning(
-                    f"File {header_path} already exists. Overwriting."
-                )
+                logger.warning(f"File {header_path} already exists. Overwriting.")
             # concatenate key:value in props
             props_list = []
@@ -81,9 +79,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
                 f.write(row)
             # add collection from schema config
-            collection = self.translator.ontology.mapping.extended_schema[
-                label
-            ].get("db_collection_name", None)
+            collection = self.translator.ontology.mapping.extended_schema[label].get("db_collection_name", None)
             # add file path to neo4 admin import statement
             # do once for each part file
@@ -91,8 +87,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
             if not parts:
                 raise ValueError(
-                    f"No parts found for node label {label}. "
-                    f"Check that the data was parsed first.",
+                    f"No parts found for node label {label}. " f"Check that the data was parsed first.",
                 )
             for part in parts:
@@ -145,9 +140,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
             # check for file exists
             if os.path.exists(header_path):
-                logger.warning(
-                    f"Header file {header_path} already exists. Overwriting."
-                )
+                logger.warning(f"Header file {header_path} already exists. Overwriting.")
             # concatenate key:value in props
             props_list = []
@@ -172,9 +165,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
                         break
             else:
-                collection = self.translator.ontology.mapping.extended_schema[
-                    label
-                ].get("db_collection_name", None)
+                collection = self.translator.ontology.mapping.extended_schema[label].get("db_collection_name", None)
             # add file path to neo4 admin import statement (import call path
             # may be different from actual output path)
@@ -206,11 +197,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
         Returns:
             str: a bash command for neo4j-admin import
         """
-        import_call = (
-            f"{self.import_call_bin_prefix}arangoimp "
-            f"--type csv "
-            f'--separator="{self.escaped_delim}" '
-        )
+        import_call = f"{self.import_call_bin_prefix}arangoimp " f"--type csv " f'--separator="{self.escaped_delim}" '
         if self.quote == "'":
             import_call += f'--quote="{self.quote}" '
@@ -221,11 +208,7 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
         # node import calls: one line per node type
         for header_path, parts_path, collection in self.import_call_nodes:
-            line = (
-                f"{import_call} "
-                f"--headers-file {header_path} "
-                f"--file= {parts_path} "
-            )
+            line = f"{import_call} --headers-file {header_path} --file= {parts_path} "
             if collection:
                 line += f"--create-collection --collection {collection} "

biocypher/output/write/graph/_neo4j.py CHANGED Viewed

@@ -1,12 +1,11 @@
 import os
 from biocypher._logger import logger
-from biocypher.output.write._batch_writer import parse_label, _BatchWriter
+from biocypher.output.write._batch_writer import _BatchWriter, parse_label
 class _Neo4jBatchWriter(_BatchWriter):
-    """
-    Class for writing node and edge representations to disk using the
+    """Class for writing node and edge representations to disk using the
     format specified by Neo4j for the use of admin import. Each batch
     writer instance has a fixed representation that needs to be passed
     at instantiation via the :py:attr:`schema` argument. The instance
@@ -23,26 +22,26 @@ class _Neo4jBatchWriter(_BatchWriter):
     """
     def __init__(self, *args, **kwargs):
-        """
-        Constructor.
+        """Constructor.
         Check the version of Neo4j and adds a command scope if version >= 5.
-        Returns:
+        Returns
+        -------
             _Neo4jBatchWriter: An instance of the writer.
-        """
+        """
         # Should read the configuration and setup import_call_bin_prefix.
         super().__init__(*args, **kwargs)
     def _get_default_import_call_bin_prefix(self):
-        """
-        Method to provide the default string for the import call bin prefix.
+        """Method to provide the default string for the import call bin prefix.
-        Returns:
+        Returns
+        -------
             str: The default location for the neo4j admin import location
-        """
+        """
         return "bin/"
     def _quote_string(self, value: str) -> str:
@@ -53,27 +52,30 @@ class _Neo4jBatchWriter(_BatchWriter):
         return f"{self.quote}{value.replace(self.quote, self.quote * 2)}{self.quote}"
     def _write_array_string(self, string_list):
-        """
-        Abstract method to output.write the string representation of an array into a .csv file
+        """Abstract method to output.write the string representation of an array into a .csv file
         as required by the neo4j admin-import.
         Args:
+        ----
             string_list (list): list of ontology strings
         Returns:
+        -------
             str: The string representation of an array for the neo4j admin import
         """
         string = self.adelim.join(string_list)
         return self._quote_string(string)
     def _write_node_headers(self):
-        """
-        Writes single CSV file for a graph entity that is represented
+        """Writes single CSV file for a graph entity that is represented
         as a node as per the definition in the `schema_config.yaml`,
         containing only the header for this type of node.
-        Returns:
+        Returns
+        -------
             bool: The return value. True for success, False otherwise.
         """
         # load headers from data parse
         if not self.node_property_dict:
@@ -86,9 +88,7 @@ class _Neo4jBatchWriter(_BatchWriter):
             _id = ":ID"
             # translate label to PascalCase
-            pascal_label = self.translator.name_sentence_to_pascal(
-                parse_label(label)
-            )
+            pascal_label = self.translator.name_sentence_to_pascal(parse_label(label))
             header = f"{pascal_label}-header.csv"
             header_path = os.path.join(
@@ -143,20 +143,19 @@ class _Neo4jBatchWriter(_BatchWriter):
                 self.import_call_file_prefix,
                 parts,
             )
-            self.import_call_nodes.add(
-                (import_call_header_path, import_call_parts_path)
-            )
+            self.import_call_nodes.add((import_call_header_path, import_call_parts_path))
         return True
     def _write_edge_headers(self):
-        """
-        Writes single CSV file for a graph entity that is represented
+        """Writes single CSV file for a graph entity that is represented
         as an edge as per the definition in the `schema_config.yaml`,
         containing only the header for this type of edge.
-        Returns:
+        Returns
+        -------
             bool: The return value. True for success, False otherwise.
         """
         # load headers from data parse
         if not self.edge_property_dict:
@@ -167,9 +166,7 @@ class _Neo4jBatchWriter(_BatchWriter):
         for label, props in self.edge_property_dict.items():
             # translate label to PascalCase
-            pascal_label = self.translator.name_sentence_to_pascal(
-                parse_label(label)
-            )
+            pascal_label = self.translator.name_sentence_to_pascal(parse_label(label))
             # paths
             header = f"{pascal_label}-header.csv"
@@ -181,9 +178,7 @@ class _Neo4jBatchWriter(_BatchWriter):
             # check for file exists
             if os.path.exists(header_path):
-                logger.warning(
-                    f"File {header_path} already exists. Overwriting."
-                )
+                logger.warning(f"File {header_path} already exists. Overwriting.")
             # concatenate key:value in props
             props_list = []
@@ -213,9 +208,7 @@ class _Neo4jBatchWriter(_BatchWriter):
             if label in ["IS_SOURCE_OF", "IS_TARGET_OF", "IS_PART_OF"]:
                 skip_id = True
-            elif not self.translator.ontology.mapping.extended_schema.get(
-                label
-            ):
+            elif not self.translator.ontology.mapping.extended_schema.get(label):
                 # find label in schema by label_as_edge
                 for (
                     k,
@@ -231,10 +224,10 @@ class _Neo4jBatchWriter(_BatchWriter):
             if schema_label:
                 if (
-                    self.translator.ontology.mapping.extended_schema.get(
-                        schema_label
+                    self.translator.ontology.mapping.extended_schema.get(  # (seems to not work with 'not')
+                        schema_label,
                     ).get("use_id")
-                    == False
+                    == False  # noqa: E712 (seems to not work with 'not')
                 ):
                     skip_id = True
@@ -259,54 +252,56 @@ class _Neo4jBatchWriter(_BatchWriter):
                 self.import_call_file_prefix,
                 parts,
             )
-            self.import_call_edges.add(
-                (import_call_header_path, import_call_parts_path)
-            )
+            self.import_call_edges.add((import_call_header_path, import_call_parts_path))
         return True
     def _get_import_script_name(self) -> str:
-        """
-        Returns the name of the neo4j admin import script
+        """Returns the name of the neo4j admin import script
-        Returns:
+        Returns
+        -------
             str: The name of the import script (ending in .sh)
         """
         return "neo4j-admin-import-call.sh"
     def _construct_import_call(self) -> str:
-        """
-        Function to construct the import call detailing folder and
+        """Function to construct the import call detailing folder and
         individual node and edge headers and data files, as well as
         delimiters and database name. Built after all data has been
         processed to ensure that nodes are called before any edges.
-        Returns:
+        Returns
+        -------
             str: a bash command for neo4j-admin import
         """
-        import_call_neo4j_v4 = self._get_import_call(
-            "import", "--database=", "--force="
-        )
-        import_call_neo4j_v5 = self._get_import_call(
-            "database import full", "", "--overwrite-destination="
+        import_call_neo4j_v4 = self._get_import_call("import", "--database=", "--force=")
+        import_call_neo4j_v5 = self._get_import_call("database import full", "", "--overwrite-destination=")
+        neo4j_version_check = (
+            f"version=$({self._get_default_import_call_bin_prefix()}neo4j-admin --version | cut -d '.' -f 1)"
         )
-        neo4j_version_check = f"version=$({self._get_default_import_call_bin_prefix()}neo4j-admin --version | cut -d '.' -f 1)"
-        import_script = f"#!/bin/bash\n{neo4j_version_check}\nif [[ $version -ge 5 ]]; then\n\t{import_call_neo4j_v5}\nelse\n\t{import_call_neo4j_v4}\nfi"
+        import_script = (
+            f"#!/bin/bash\n{neo4j_version_check}\nif [[ $version -ge 5 ]]; "
+            f"then\n\t{import_call_neo4j_v5}\nelse\n\t{import_call_neo4j_v4}\nfi"
+        )
         return import_script
-    def _get_import_call(
-        self, import_cmd: str, database_cmd: str, wipe_cmd: str
-    ) -> str:
+    def _get_import_call(self, import_cmd: str, database_cmd: str, wipe_cmd: str) -> str:
         """Get parametrized import call for Neo4j 4 or 5+.
         Args:
+        ----
             import_cmd (str): The import command to use.
             database_cmd (str): The database command to use.
             wipe_cmd (str): The wipe command to use.
         Returns:
+        -------
             str: The import call.
         """
         import_call = f"{self.import_call_bin_prefix}neo4j-admin {import_cmd} "

biocypher 0.6.2__py3-none-any.whl → 0.8.0__py3-none-any.whl

Potentially problematic release.

biocypher 0.6.2py3-none-any.whl → 0.8.0py3-none-any.whl