PyPI - biocypher - Versions diffs - 0.5.19__py3-none-any.whl → 0.5.20__py3-none-any.whl - Mend

biocypher 0.5.19py3-none-any.whl → 0.5.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biocypher might be problematic. Click here for more details.

Files changed (12) hide show

biocypher/_connect.py +6 -12
biocypher/_core.py +87 -20
biocypher/_deduplicate.py +72 -30
biocypher/_metadata.py +1 -1
biocypher/_ontology.py +11 -9
biocypher/_pandas.py +32 -7
biocypher/_translate.py +29 -26
biocypher/_write.py +75 -57
{biocypher-0.5.19.dist-info → biocypher-0.5.20.dist-info}/METADATA +4 -2
{biocypher-0.5.19.dist-info → biocypher-0.5.20.dist-info}/RECORD +12 -12
{biocypher-0.5.19.dist-info → biocypher-0.5.20.dist-info}/LICENSE +0 -0
{biocypher-0.5.19.dist-info → biocypher-0.5.20.dist-info}/WHEEL +0 -0

biocypher/_connect.py CHANGED Viewed

@@ -53,8 +53,6 @@ class _Neo4jDriver:
         increment_version (bool): Whether to increment the version number.
-        ontology (Ontology): The ontology to use for mapping.
         translator (Translator): The translator to use for mapping.
     """
@@ -66,14 +64,12 @@ class _Neo4jDriver:
         user: str,
         password: str,
         multi_db: bool,
-        ontology: Ontology,
         translator: Translator,
         wipe: bool = False,
         fetch_size: int = 1000,
         increment_version: bool = True,
     ):
-        self._ontology = ontology
-        self._translator = translator
+        self.translator = translator
         self._driver = neo4j_utils.Driver(
             db_name=database_name,
@@ -103,7 +99,7 @@ class _Neo4jDriver:
             "MATCH (v:BioCypher) " "WHERE NOT (v)-[:PRECEDES]->() " "RETURN v",
         )
         # add version node
-        self.add_biocypher_nodes(self._ontology)
+        self.add_biocypher_nodes(self.translator.ontology)
         # connect version node to previous
         if db_version[0]:
@@ -111,7 +107,7 @@ class _Neo4jDriver:
             previous_id = previous["v"]["id"]
             e_meta = BioCypherEdge(
                 previous_id,
-                self._ontology.get_dict().get("node_id"),
+                self.translator.ontology.get_dict().get("node_id"),
                 "PRECEDES",
             )
             self.add_biocypher_edges(e_meta)
@@ -142,7 +138,7 @@ class _Neo4jDriver:
         logger.info("Creating constraints for node types in config.")
         # get structure
-        for leaf in self._ontology.extended_schema.items():
+        for leaf in self.translator.ontology.mapping.extended_schema.items():
             label = _misc.sentencecase_to_pascalcase(leaf[0])
             if leaf[1]["represented_as"] == "node":
                 s = (
@@ -172,7 +168,7 @@ class _Neo4jDriver:
                 - second entry: Neo4j summary.
         """
-        bn = self._translator.translate_nodes(id_type_tuples)
+        bn = self.translator.translate_nodes(id_type_tuples)
         return self.add_biocypher_nodes(bn)
     def add_edges(self, id_src_tar_type_tuples: Iterable[tuple]) -> tuple:
@@ -204,7 +200,7 @@ class _Neo4jDriver:
                 - second entry: Neo4j summary.
         """
-        bn = self._translator.translate_edges(id_src_tar_type_tuples)
+        bn = self.translator.translate_edges(id_src_tar_type_tuples)
         return self.add_biocypher_edges(bn)
     def add_biocypher_nodes(
@@ -375,7 +371,6 @@ class _Neo4jDriver:
 def get_driver(
     dbms: str,
     translator: "Translator",
-    ontology: "Ontology",
 ):
     """
     Function to return the writer class.
@@ -394,7 +389,6 @@ def get_driver(
             user=dbms_config["user"],
             password=dbms_config["password"],
             multi_db=dbms_config["multi_db"],
-            ontology=ontology,
             translator=translator,
         )

biocypher/_core.py CHANGED Viewed

@@ -13,8 +13,10 @@ BioCypher core module. Interfaces with the user and distributes tasks to
 submodules.
 """
 from typing import Optional
+import os
 from more_itertools import peekable
+import yaml
 import pandas as pd
@@ -25,7 +27,7 @@ logger.debug(f"Loading module {__name__}.")
 from ._write import get_writer
 from ._config import config as _config
 from ._config import update_from_file as _file_update
-from ._create import BioCypherEdge, BioCypherNode
+from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
 from ._pandas import Pandas
 from ._connect import get_driver
 from ._mapping import OntologyMapping
@@ -181,19 +183,6 @@ class BioCypher:
         return self._ontology_mapping
-    def _get_translator(self) -> Translator:
-        """
-        Create translator if not exists and return.
-        """
-        if not self._translator:
-            self._translator = Translator(
-                ontology_mapping=self._get_ontology_mapping(),
-                strict_mode=self._strict_mode,
-            )
-        return self._translator
     def _get_ontology(self) -> Ontology:
         """
         Create ontology if not exists and return.
@@ -208,17 +197,28 @@ class BioCypher:
         return self._ontology
+    def _get_translator(self) -> Translator:
+        """
+        Create translator if not exists and return.
+        """
+        if not self._translator:
+            self._translator = Translator(
+                ontology=self._get_ontology(),
+                strict_mode=self._strict_mode,
+            )
+        return self._translator
     def _get_writer(self):
         """
         Create writer if not online. Set as instance variable `self._writer`.
         """
-        # Get worker
         if self._offline:
             self._writer = get_writer(
                 dbms=self._dbms,
                 translator=self._get_translator(),
-                ontology=self._get_ontology(),
                 deduplicator=self._get_deduplicator(),
                 output_directory=self._output_directory,
                 strict_mode=self._strict_mode,
@@ -235,7 +235,6 @@ class BioCypher:
             self._driver = get_driver(
                 dbms=self._dbms,
                 translator=self._get_translator(),
-                ontology=self._get_ontology(),
                 deduplicator=self._get_deduplicator(),
             )
         else:
@@ -318,14 +317,15 @@ class BioCypher:
         if not self._pd:
             self._pd = Pandas(
                 translator=self._get_translator(),
-                ontology=self._get_ontology(),
                 deduplicator=self._get_deduplicator(),
             )
         entities = peekable(entities)
-        if isinstance(entities.peek(), BioCypherNode) or isinstance(
-            entities.peek(), BioCypherEdge
+        if (
+            isinstance(entities.peek(), BioCypherNode)
+            or isinstance(entities.peek(), BioCypherEdge)
+            or isinstance(entities.peek(), BioCypherRelAsNode)
         ):
             tentities = entities
         elif len(entities.peek()) < 4:
@@ -504,6 +504,73 @@ class BioCypher:
         self._writer.write_import_call()
+    def write_schema_info(self) -> None:
+        """
+        Write an extended schema info YAML file that extends the
+        `schema_config.yaml` with run-time information of the built KG. For
+        instance, include information on whether something present in the actual
+        knowledge graph, whether it is a relationship (which is important in the
+        case of representing relationships as nodes) and the actual sources and
+        targets of edges. Since this file can be used in place of the original
+        `schema_config.yaml` file, it indicates that it is the extended schema
+        by setting `is_schema_info` to `true`.
+        We start by using the `extended_schema` dictionary from the ontology
+        class instance, which contains all expanded entities and relationships.
+        The information of whether something is a relationship can be gathered
+        from the deduplicator instance, which keeps track of all entities that
+        have been seen.
+        """
+        if not self._offline:
+            raise NotImplementedError(
+                "Cannot write schema info in online mode."
+            )
+        ontology = self._get_ontology()
+        schema = ontology.mapping.extended_schema
+        schema["is_schema_info"] = True
+        deduplicator = self._get_deduplicator()
+        for node in deduplicator.entity_types:
+            if node in schema.keys():
+                schema[node]["present_in_knowledge_graph"] = True
+                schema[node]["is_relationship"] = False
+            else:
+                logger.info(
+                    f"Node {node} not present in extended schema. "
+                    "Skipping schema info."
+                )
+        # find 'label_as_edge' cases in schema entries
+        changed_labels = {}
+        for k, v in schema.items():
+            if not isinstance(v, dict):
+                continue
+            if "label_as_edge" in v.keys():
+                if v["label_as_edge"] in deduplicator.seen_relationships.keys():
+                    changed_labels[v["label_as_edge"]] = k
+        for edge in deduplicator.seen_relationships.keys():
+            if edge in changed_labels.keys():
+                edge = changed_labels[edge]
+            if edge in schema.keys():
+                schema[edge]["present_in_knowledge_graph"] = True
+                schema[edge]["is_relationship"] = True
+                # TODO information about source and target nodes
+            else:
+                logger.info(
+                    f"Edge {edge} not present in extended schema. "
+                    "Skipping schema info."
+                )
+        # write to output directory as YAML file
+        path = os.path.join(self._output_directory, "schema_info.yaml")
+        with open(path, "w") as f:
+            f.write(yaml.dump(schema))
+        return schema
     # TRANSLATION METHODS ###
     def translate_term(self, term: str) -> str:

biocypher/_deduplicate.py CHANGED Viewed

@@ -2,7 +2,7 @@ from ._logger import logger
 logger.debug(f"Loading module {__name__}.")
-from ._create import BioCypherEdge, BioCypherNode
+from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
 class Deduplicator:
@@ -19,15 +19,17 @@ class Deduplicator:
     """
     def __init__(self):
-        self.seen_node_ids = set()
-        self.duplicate_node_ids = set()
-        self.duplicate_node_types = set()
+        self.seen_entity_ids = set()
+        self.duplicate_entity_ids = set()
-        self.seen_edges = {}
-        self.duplicate_edge_ids = set()
-        self.duplicate_edge_types = set()
+        self.entity_types = set()
+        self.duplicate_entity_types = set()
-    def node_seen(self, node: BioCypherNode) -> bool:
+        self.seen_relationships = {}
+        self.duplicate_relationship_ids = set()
+        self.duplicate_relationship_types = set()
+    def node_seen(self, entity: BioCypherNode) -> bool:
         """
         Adds a node to the instance and checks if it has been seen before.
@@ -37,19 +39,22 @@ class Deduplicator:
         Returns:
             True if the node has been seen before, False otherwise.
         """
-        if node.get_id() in self.seen_node_ids:
-            self.duplicate_node_ids.add(node.get_id())
-            if node.get_label() not in self.duplicate_node_types:
+        if entity.get_label() not in self.entity_types:
+            self.entity_types.add(entity.get_label())
+        if entity.get_id() in self.seen_entity_ids:
+            self.duplicate_entity_ids.add(entity.get_id())
+            if entity.get_label() not in self.duplicate_entity_types:
                 logger.warning(
-                    f"Duplicate node type {node.get_label()} found. "
+                    f"Duplicate node type {entity.get_label()} found. "
                 )
-                self.duplicate_node_types.add(node.get_label())
+                self.duplicate_entity_types.add(entity.get_label())
             return True
-        self.seen_node_ids.add(node.get_id())
+        self.seen_entity_ids.add(entity.get_id())
         return False
-    def edge_seen(self, edge: BioCypherEdge) -> bool:
+    def edge_seen(self, relationship: BioCypherEdge) -> bool:
         """
         Adds an edge to the instance and checks if it has been seen before.
@@ -59,23 +64,57 @@ class Deduplicator:
         Returns:
             True if the edge has been seen before, False otherwise.
         """
-        if edge.get_type() not in self.seen_edges:
-            self.seen_edges[edge.get_type()] = set()
+        if relationship.get_type() not in self.seen_relationships:
+            self.seen_relationships[relationship.get_type()] = set()
         # concatenate source and target if no id is present
-        if not edge.get_id():
-            _id = f"{edge.get_source_id()}_{edge.get_target_id()}"
+        if not relationship.get_id():
+            _id = (
+                f"{relationship.get_source_id()}_{relationship.get_target_id()}"
+            )
         else:
-            _id = edge.get_id()
+            _id = relationship.get_id()
+        if _id in self.seen_relationships[relationship.get_type()]:
+            self.duplicate_relationship_ids.add(_id)
+            if relationship.get_type() not in self.duplicate_relationship_types:
+                logger.warning(
+                    f"Duplicate edge type {relationship.get_type()} found. "
+                )
+                self.duplicate_relationship_types.add(relationship.get_type())
+            return True
+        self.seen_relationships[relationship.get_type()].add(_id)
+        return False
+    def rel_as_node_seen(self, rel_as_node: BioCypherRelAsNode) -> bool:
+        """
+        Adds a rel_as_node to the instance (one entity and two relationships)
+        and checks if it has been seen before. Only the node is relevant for
+        identifying the rel_as_node as a duplicate.
+        Args:
+            rel_as_node: BioCypherRelAsNode to be added.
+        Returns:
+            True if the rel_as_node has been seen before, False otherwise.
+        """
+        node = rel_as_node.get_node()
+        if node.get_label() not in self.seen_relationships:
+            self.seen_relationships[node.get_label()] = set()
+        # rel as node always has an id
+        _id = node.get_id()
-        if _id in self.seen_edges[edge.get_type()]:
-            self.duplicate_edge_ids.add(_id)
-            if edge.get_type() not in self.duplicate_edge_types:
-                logger.warning(f"Duplicate edge type {edge.get_type()} found. ")
-                self.duplicate_edge_types.add(edge.get_type())
+        if _id in self.seen_relationships[node.get_type()]:
+            self.duplicate_relationship_ids.add(_id)
+            if node.get_type() not in self.duplicate_relationship_types:
+                logger.warning(f"Duplicate edge type {node.get_type()} found. ")
+                self.duplicate_relationship_types.add(node.get_type())
             return True
-        self.seen_edges[edge.get_type()].add(_id)
+        self.seen_relationships[node.get_type()].add(_id)
         return False
     def get_duplicate_nodes(self):
@@ -86,8 +125,8 @@ class Deduplicator:
             list: list of duplicate nodes
         """
-        if self.duplicate_node_types:
-            return (self.duplicate_node_types, self.duplicate_node_ids)
+        if self.duplicate_entity_types:
+            return (self.duplicate_entity_types, self.duplicate_entity_ids)
         else:
             return None
@@ -99,7 +138,10 @@ class Deduplicator:
             list: list of duplicate edges
         """
-        if self.duplicate_edge_types:
-            return (self.duplicate_edge_types, self.duplicate_edge_ids)
+        if self.duplicate_relationship_types:
+            return (
+                self.duplicate_relationship_types,
+                self.duplicate_relationship_ids,
+            )
         else:
             return None

biocypher/_metadata.py CHANGED Viewed

@@ -19,7 +19,7 @@ import importlib.metadata
 import toml
-_VERSION = "0.5.19"
+_VERSION = "0.5.20"
 def get_metadata():

biocypher/_ontology.py CHANGED Viewed

@@ -269,7 +269,7 @@ class Ontology:
         """
         self._head_ontology_meta = head_ontology
-        self.extended_schema = ontology_mapping.extended_schema
+        self.mapping = ontology_mapping
         self._tail_ontology_meta = tail_ontologies
         self._tail_ontologies = None
@@ -403,7 +403,7 @@ class Ontology:
         if not self._nx_graph:
             self._nx_graph = self._head_ontology.get_nx_graph().copy()
-        for key, value in self.extended_schema.items():
+        for key, value in self.mapping.extended_schema.items():
             if not value.get("is_a"):
                 if self._nx_graph.has_node(value.get("synonym_for")):
                     continue
@@ -485,7 +485,7 @@ class Ontology:
         setting the synonym as the primary node label.
         """
-        for key, value in self.extended_schema.items():
+        for key, value in self.mapping.extended_schema.items():
             if key in self._nx_graph.nodes:
                 self._nx_graph.nodes[key].update(value)
@@ -541,9 +541,9 @@ class Ontology:
         if not full:
             # set of leaves and their intermediate parents up to the root
-            filter_nodes = set(self.extended_schema.keys())
+            filter_nodes = set(self.mapping.extended_schema.keys())
-            for node in self.extended_schema.keys():
+            for node in self.mapping.extended_schema.keys():
                 filter_nodes.update(self.get_ancestors(node).nodes)
             # filter graph
@@ -557,11 +557,13 @@ class Ontology:
             tree = _misc.create_tree_visualisation(G)
             # add synonym information
-            for node in self.extended_schema:
-                if self.extended_schema[node].get("synonym_for"):
+            for node in self.mapping.extended_schema:
+                if not isinstance(self.mapping.extended_schema[node], dict):
+                    continue
+                if self.mapping.extended_schema[node].get("synonym_for"):
                     tree.nodes[node].tag = (
                         f"{node} = "
-                        f"{self.extended_schema[node].get('synonym_for')}"
+                        f"{self.mapping.extended_schema[node].get('synonym_for')}"
                     )
             tree.show()
@@ -602,7 +604,7 @@ class Ontology:
             "node_id": self._get_current_id(),
             "node_label": "BioCypher",
             "properties": {
-                "schema": "self.extended_schema",
+                "schema": "self.ontology_mapping.extended_schema",
             },
         }

biocypher/_pandas.py CHANGED Viewed

@@ -1,11 +1,10 @@
 import pandas as pd
-from ._create import BioCypherEdge, BioCypherNode
+from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
 class Pandas:
-    def __init__(self, ontology, translator, deduplicator):
-        self.ontology = ontology
+    def __init__(self, translator, deduplicator):
         self.translator = translator
         self.deduplicator = deduplicator
@@ -18,22 +17,48 @@ class Pandas:
         """
         lists = {}
         for entity in entities:
-            if not isinstance(entity, BioCypherNode) and not isinstance(
-                entity, BioCypherEdge
+            if (
+                not isinstance(entity, BioCypherNode)
+                and not isinstance(entity, BioCypherEdge)
+                and not isinstance(entity, BioCypherRelAsNode)
             ):
                 raise TypeError(
-                    f"Expected a BioCypherNode or BioCypherEdge, got {type(entity)}."
+                    "Expected a BioCypherNode / BioCypherEdge / "
+                    f"BioCypherRelAsNode, got {type(entity)}."
                 )
             if isinstance(entity, BioCypherNode):
                 seen = self.deduplicator.node_seen(entity)
             elif isinstance(entity, BioCypherEdge):
                 seen = self.deduplicator.edge_seen(entity)
+            elif isinstance(entity, BioCypherRelAsNode):
+                seen = self.deduplicator.rel_as_node_seen(entity)
             if seen:
                 continue
-            _type = entity.get_label()
+            if isinstance(entity, BioCypherRelAsNode):
+                node = entity.get_node()
+                source_edge = entity.get_source_edge()
+                target_edge = entity.get_target_edge()
+                _type = node.get_type()
+                if not _type in lists:
+                    lists[_type] = []
+                lists[_type].append(node)
+                _source_type = source_edge.get_type()
+                if not _source_type in lists:
+                    lists[_source_type] = []
+                lists[_source_type].append(source_edge)
+                _target_type = target_edge.get_type()
+                if not _target_type in lists:
+                    lists[_target_type] = []
+                lists[_target_type].append(target_edge)
+                continue
+            _type = entity.get_type()
             if not _type in lists:
                 lists[_type] = []
             lists[_type].append(entity)

biocypher/_translate.py CHANGED Viewed

@@ -23,7 +23,7 @@ from more_itertools import peekable
 from . import _misc
 from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
-from ._mapping import OntologyMapping
+from ._ontology import Ontology
 __all__ = ["BiolinkAdapter", "Translator"]
@@ -41,9 +41,7 @@ class Translator:
     and cypher queries.
     """
-    def __init__(
-        self, ontology_mapping: "OntologyMapping", strict_mode: bool = False
-    ):
+    def __init__(self, ontology: "Ontology", strict_mode: bool = False):
         """
         Args:
             leaves:
@@ -57,7 +55,7 @@ class Translator:
                 carry source, licence, and version information.
         """
-        self.extended_schema = ontology_mapping.extended_schema
+        self.ontology = ontology
         self.strict_mode = strict_mode
         # record nodes without biolink type configured in schema_config.yaml
@@ -71,7 +69,7 @@ class Translator:
     def translate_nodes(
         self,
-        id_type_prop_tuples: Iterable,
+        node_tuples: Iterable,
     ) -> Generator[BioCypherNode, None, None]:
         """
         Translates input node representation to a representation that
@@ -79,16 +77,16 @@ class Translator:
         requires explicit statement of node type on pass.
         Args:
-            id_type_tuples (list of tuples): collection of tuples
+            node_tuples (list of tuples): collection of tuples
                 representing individual nodes by their unique id and a type
                 that is translated from the original database notation to
                 the corresponding BioCypher notation.
         """
-        self._log_begin_translate(id_type_prop_tuples, "nodes")
+        self._log_begin_translate(node_tuples, "nodes")
-        for _id, _type, _props in id_type_prop_tuples:
+        for _id, _type, _props in node_tuples:
             # check for strict mode requirements
             required_props = ["source", "licence", "version"]
@@ -132,8 +130,9 @@ class Translator:
         """
         return (
-            self.extended_schema[_bl_type]["preferred_id"]
-            if "preferred_id" in self.extended_schema.get(_bl_type, {})
+            self.ontology.mapping.extended_schema[_bl_type]["preferred_id"]
+            if "preferred_id"
+            in self.ontology.mapping.extended_schema.get(_bl_type, {})
             else "id"
         )
@@ -142,7 +141,9 @@ class Translator:
         Filters properties for those specified in schema_config if any.
         """
-        filter_props = self.extended_schema[bl_type].get("properties", {})
+        filter_props = self.ontology.mapping.extended_schema[bl_type].get(
+            "properties", {}
+        )
         # strict mode: add required properties (only if there is a whitelist)
         if self.strict_mode and filter_props:
@@ -150,7 +151,7 @@ class Translator:
                 {"source": "str", "licence": "str", "version": "str"},
             )
-        exclude_props = self.extended_schema[bl_type].get(
+        exclude_props = self.ontology.mapping.extended_schema[bl_type].get(
             "exclude_properties", []
         )
@@ -188,7 +189,7 @@ class Translator:
     def translate_edges(
         self,
-        id_src_tar_type_prop_tuples: Iterable,
+        edge_tuples: Iterable,
     ) -> Generator[Union[BioCypherEdge, BioCypherRelAsNode], None, None]:
         """
         Translates input edge representation to a representation that
@@ -197,7 +198,7 @@ class Translator:
         Args:
-            id_src_tar_type_prop_tuples (list of tuples):
+            edge_tuples (list of tuples):
                 collection of tuples representing source and target of
                 an interaction via their unique ids as well as the type
@@ -206,18 +207,18 @@ class Translator:
                 Can optionally possess its own ID.
         """
-        self._log_begin_translate(id_src_tar_type_prop_tuples, "edges")
+        self._log_begin_translate(edge_tuples, "edges")
         # legacy: deal with 4-tuples (no edge id)
         # TODO remove for performance reasons once safe
-        id_src_tar_type_prop_tuples = peekable(id_src_tar_type_prop_tuples)
-        if len(id_src_tar_type_prop_tuples.peek()) == 4:
-            id_src_tar_type_prop_tuples = [
+        edge_tuples = peekable(edge_tuples)
+        if len(edge_tuples.peek()) == 4:
+            edge_tuples = [
                 (None, src, tar, typ, props)
-                for src, tar, typ, props in id_src_tar_type_prop_tuples
+                for src, tar, typ, props in edge_tuples
             ]
-        for _id, _src, _tar, _type, _props in id_src_tar_type_prop_tuples:
+        for _id, _src, _tar, _type, _props in edge_tuples:
             # check for strict mode requirements
             if self.strict_mode:
                 if not "source" in _props:
@@ -239,7 +240,9 @@ class Translator:
                 # filter properties for those specified in schema_config if any
                 _filtered_props = self._filter_props(bl_type, _props)
-                rep = self.extended_schema[bl_type]["represented_as"]
+                rep = self.ontology.mapping.extended_schema[bl_type][
+                    "represented_as"
+                ]
                 if rep == "node":
                     if _id:
@@ -295,9 +298,9 @@ class Translator:
                     yield BioCypherRelAsNode(n, e_s, e_t)
                 else:
-                    edge_label = self.extended_schema[bl_type].get(
-                        "label_as_edge"
-                    )
+                    edge_label = self.ontology.mapping.extended_schema[
+                        bl_type
+                    ].get("label_as_edge")
                     if edge_label is None:
                         edge_label = bl_type
@@ -356,7 +359,7 @@ class Translator:
         self._ontology_mapping = {}
-        for key, value in self.extended_schema.items():
+        for key, value in self.ontology.mapping.extended_schema.items():
             labels = value.get("input_label") or value.get("label_in_input")
             if isinstance(labels, str):

biocypher/_write.py CHANGED Viewed

@@ -125,7 +125,6 @@ class _BatchWriter(ABC):
     def __init__(
         self,
-        ontology: "Ontology",
         translator: "Translator",
         deduplicator: "Deduplicator",
         delimiter: str,
@@ -167,10 +166,6 @@ class _BatchWriter(ABC):
             - _get_import_script_name
         Args:
-            ontology:
-                Instance of :py:class:`Ontology` to enable translation and
-                ontology queries
             translator:
                 Instance of :py:class:`Translator` to enable translation of
                 nodes and manipulation of properties.
@@ -251,8 +246,6 @@ class _BatchWriter(ABC):
         self.wipe = wipe
         self.strict_mode = strict_mode
-        self.extended_schema = ontology.extended_schema
-        self.ontology = ontology
         self.translator = translator
         self.deduplicator = deduplicator
         self.node_property_dict = {}
@@ -352,34 +345,34 @@ class _BatchWriter(ABC):
             bool: The return value. True for success, False otherwise.
         """
         passed = False
-        # unwrap generator in one step
         edges = list(edges)  # force evaluation to handle empty generator
         if edges:
-            z = zip(
-                *(
-                    (
-                        e.get_node(),
-                        [
-                            e.get_source_edge(),
-                            e.get_target_edge(),
-                        ],
-                    )
-                    if isinstance(e, BioCypherRelAsNode)
-                    else (None, [e])
-                    for e in edges
-                )
-            )
-            nod, edg = (list(a) for a in z)
-            nod = [n for n in nod if n]
-            edg = [val for sublist in edg for val in sublist]  # flatten
+            nodes_flat = []
+            edges_flat = []
+            for edge in edges:
+                if isinstance(edge, BioCypherRelAsNode):
+                    # check if relationship has already been written, if so skip
+                    if self.deduplicator.rel_as_node_seen(edge):
+                        continue
-            if nod and edg:
-                passed = self.write_nodes(nod) and self._write_edge_data(
-                    edg,
+                    nodes_flat.append(edge.get_node())
+                    edges_flat.append(edge.get_source_edge())
+                    edges_flat.append(edge.get_target_edge())
+                else:
+                    # check if relationship has already been written, if so skip
+                    if self.deduplicator.edge_seen(edge):
+                        continue
+                    edges_flat.append(edge)
+            if nodes_flat and edges_flat:
+                passed = self.write_nodes(nodes_flat) and self._write_edge_data(
+                    edges_flat,
                     batch_size,
                 )
             else:
-                passed = self._write_edge_data(edg, batch_size)
+                passed = self._write_edge_data(edges_flat, batch_size)
         else:
             # is this a problem? if the generator or list is empty, we
@@ -451,8 +444,12 @@ class _BatchWriter(ABC):
                     bin_l[label] = 1
                     # get properties from config if present
-                    cprops = self.extended_schema.get(label).get(
-                        "properties",
+                    cprops = (
+                        self.translator.ontology.mapping.extended_schema.get(
+                            label
+                        ).get(
+                            "properties",
+                        )
                     )
                     if cprops:
                         d = dict(cprops)
@@ -486,7 +483,7 @@ class _BatchWriter(ABC):
                     # get label hierarchy
                     # multiple labels:
-                    all_labels = self.ontology.get_ancestors(label)
+                    all_labels = self.translator.ontology.get_ancestors(label)
                     if all_labels:
                         # convert to pascal case
@@ -682,10 +679,6 @@ class _BatchWriter(ABC):
             # for each label to check for consistency and their type
             # for now, relevant for `int`
             for edge in edges:
-                # check for duplicates
-                if self.deduplicator.edge_seen(edge):
-                    continue
                 if not (edge.get_source_id() and edge.get_target_id()):
                     logger.error(
                         "Edge must have source and target node. "
@@ -706,13 +699,23 @@ class _BatchWriter(ABC):
                     # (may not be if it is an edge that carries the
                     # "label_as_edge" property)
                     cprops = None
-                    if label in self.extended_schema:
-                        cprops = self.extended_schema.get(label).get(
+                    if (
+                        label
+                        in self.translator.ontology.mapping.extended_schema
+                    ):
+                        cprops = self.translator.ontology.mapping.extended_schema.get(
+                            label
+                        ).get(
                             "properties",
                         )
                     else:
                         # try via "label_as_edge"
-                        for k, v in self.extended_schema.items():
+                        for (
+                            k,
+                            v,
+                        ) in (
+                            self.translator.ontology.mapping.extended_schema.items()
+                        ):
                             if isinstance(v, dict):
                                 if v.get("label_as_edge") == label:
                                     cprops = v.get("properties")
@@ -873,9 +876,14 @@ class _BatchWriter(ABC):
             if label in ["IS_SOURCE_OF", "IS_TARGET_OF", "IS_PART_OF"]:
                 skip_id = True
-            elif not self.extended_schema.get(label):
+            elif not self.translator.ontology.mapping.extended_schema.get(
+                label
+            ):
                 # find label in schema by label_as_edge
-                for k, v in self.extended_schema.items():
+                for (
+                    k,
+                    v,
+                ) in self.translator.ontology.mapping.extended_schema.items():
                     if v.get("label_as_edge") == label:
                         schema_label = k
                         break
@@ -884,7 +892,9 @@ class _BatchWriter(ABC):
             if schema_label:
                 if (
-                    self.extended_schema.get(schema_label).get("use_id")
+                    self.translator.ontology.mapping.extended_schema.get(
+                        schema_label
+                    ).get("use_id")
                     == False
                 ):
                     skip_id = True
@@ -1009,6 +1019,7 @@ class _Neo4jBatchWriter(_BatchWriter):
     This class inherits from the abstract class "_BatchWriter" and implements the
     Neo4j-specific methods:
         - _write_node_headers
         - _write_edge_headers
         - _construct_import_call
@@ -1181,9 +1192,14 @@ class _Neo4jBatchWriter(_BatchWriter):
             if label in ["IS_SOURCE_OF", "IS_TARGET_OF", "IS_PART_OF"]:
                 skip_id = True
-            elif not self.extended_schema.get(label):
+            elif not self.translator.ontology.mapping.extended_schema.get(
+                label
+            ):
                 # find label in schema by label_as_edge
-                for k, v in self.extended_schema.items():
+                for (
+                    k,
+                    v,
+                ) in self.translator.ontology.mapping.extended_schema.items():
                     if v.get("label_as_edge") == label:
                         schema_label = k
                         break
@@ -1194,7 +1210,9 @@ class _Neo4jBatchWriter(_BatchWriter):
             if schema_label:
                 if (
-                    self.extended_schema.get(schema_label).get("use_id")
+                    self.translator.ontology.mapping.extended_schema.get(
+                        schema_label
+                    ).get("use_id")
                     == False
                 ):
                     skip_id = True
@@ -1352,9 +1370,9 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
                 f.write(row)
             # add collection from schema config
-            collection = self.extended_schema[label].get(
-                "db_collection_name", None
-            )
+            collection = self.translator.ontology.mapping.extended_schema[
+                label
+            ].get("db_collection_name", None)
             # add file path to neo4 admin import statement
             # do once for each part file
@@ -1433,16 +1451,19 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
                 f.write(row)
             # add collection from schema config
-            if not self.extended_schema.get(label):
-                for _, v in self.extended_schema.items():
+            if not self.translator.ontology.mapping.extended_schema.get(label):
+                for (
+                    _,
+                    v,
+                ) in self.translator.ontology.mapping.extended_schema.items():
                     if v.get("label_as_edge") == label:
                         collection = v.get("db_collection_name", None)
                         break
             else:
-                collection = self.extended_schema[label].get(
-                    "db_collection_name", None
-                )
+                collection = self.translator.ontology.mapping.extended_schema[
+                    label
+                ].get("db_collection_name", None)
             # add file path to neo4 admin import statement (import call path
             # may be different from actual output path)
@@ -1520,6 +1541,7 @@ class _PostgreSQLBatchWriter(_BatchWriter):
     This class inherits from the abstract class "_BatchWriter" and implements the
     PostgreSQL-specific methods:
         - _write_node_headers
         - _write_edge_headers
         - _construct_import_call
@@ -1839,7 +1861,6 @@ DBMS_TO_CLASS = {
 def get_writer(
     dbms: str,
     translator: "Translator",
-    ontology: "Ontology",
     deduplicator: "Deduplicator",
     output_directory: str,
     strict_mode: bool,
@@ -1854,8 +1875,6 @@ def get_writer(
         translator: the Translator object.
-        ontology: the Ontology object.
         output_directory: the directory to write the output files to.
         strict_mode: whether to use strict mode.
@@ -1879,7 +1898,6 @@ def get_writer(
     if writer is not None:
         return writer(
-            ontology=ontology,
             translator=translator,
             deduplicator=deduplicator,
             delimiter=dbms_config.get("delimiter"),

{biocypher-0.5.19.dist-info → biocypher-0.5.20.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: biocypher
-Version: 0.5.19
+Version: 0.5.20
 Summary: A unifying framework for biomedical research knowledge graphs
 Home-page: https://github.com/biocypher/biocypher
 License: MIT
@@ -38,7 +38,9 @@ Description-Content-Type: text/markdown
 ![Python](https://img.shields.io/badge/python-3.10-blue.svg)
 [![PyPI version](https://badge.fury.io/py/biocypher.svg)](https://badge.fury.io/py/biocypher)
 [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)
-![Docs build](https://github.com/biocypher/biocypher/actions/workflows/sphinx_autodoc.yml/badge.svg)
+[![CI](https://github.com/biocypher/biocypher/actions/workflows/ci_cd.yaml/badge.svg)](https://github.com/biocypher/biocypher/actions/workflows/ci_cd.yaml)
+![Coverage](https://raw.githubusercontent.com/biocypher/biocypher/coverage/coverage.svg)
+[![Docs build](https://github.com/biocypher/biocypher/actions/workflows/sphinx_autodoc.yaml/badge.svg)](https://github.com/biocypher/biocypher/actions/workflows/sphinx_autodoc.yaml)
 [![Downloads](https://static.pepy.tech/badge/biocypher)](https://pepy.tech/project/biocypher)
 [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
 [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat-square)](http://makeapullrequest.com)

{biocypher-0.5.19.dist-info → biocypher-0.5.20.dist-info}/RECORD RENAMED Viewed

@@ -5,19 +5,19 @@ biocypher/_config/test_config.yaml,sha256=Np8jeS5_EP6HHOvMKb7B_Tkyqd5YaYlYz_DVsX
 biocypher/_config/test_schema_config.yaml,sha256=D1600WgEj3iTXrumVU9LIivJHJO36iaxfkOgyam9zVU,3129
 biocypher/_config/test_schema_config_disconnected.yaml,sha256=Qm8FLxEn2spHcyj_5F859KjcDvKSxNhxDvi4b4LLkvQ,68
 biocypher/_config/test_schema_config_extended.yaml,sha256=wn3A76142hhjnImhMF6RODbCFESTJ2TtPvcFdIFsAT0,3309
-biocypher/_connect.py,sha256=i62424Cbdnm2oI4ECLkcMF2V2A6aShCK2eSSwaGLbVE,12603
-biocypher/_core.py,sha256=Sg7ESentsTsqp9KbzPC_jh1fRAqOGzyy98Xzma7BBkw,17100
+biocypher/_connect.py,sha256=0oSyO6CEIlKL8rHo-HHE7y0FzGfSi4vnEXSDy1TnIUE,12456
+biocypher/_core.py,sha256=fA0tRorzy3R1mgzzT77mFk-l6oQ01ZAfjg8l6KbPQYM,19882
 biocypher/_create.py,sha256=vpUchUdEpWupZi1LgFLxAWMtqoBwnWbP7PwEDUCBS4A,10202
-biocypher/_deduplicate.py,sha256=ah2i6ONx6ml4MbQMXIe6NfbVzf1bjav0l3gLj1xGDE0,3288
+biocypher/_deduplicate.py,sha256=BBvfpXzu6L5YDY5FdtXxnf8YlsbJpbCE8RdUoKsm0n0,4949
 biocypher/_logger.py,sha256=soYtz1DiduLFw3XrMnphWWUxeuJqvSof4AYhlafxl08,2933
 biocypher/_mapping.py,sha256=XJZjmXTPnXVkyub1ZU0h3EKXQ2YROaGaJOaGyPMqgy4,9338
-biocypher/_metadata.py,sha256=24UdhQ8vslHBfHf0S6oF9A5asyiM9SVOjfVqvRPnFvY,1658
+biocypher/_metadata.py,sha256=Hmz4g_CSuqikUJ6EtLEq2GS7Z0BawtAsL0Wk-7AiE8c,1658
 biocypher/_misc.py,sha256=wsjGVOqBDVM5hxbE_TEaZ69u1kJc8HXwRAtQHUgE8XQ,4545
-biocypher/_ontology.py,sha256=vCGIHJn_IH5bmOMTA6GJQZB-eNVOlyjYnMzwmwfni0Q,21375
-biocypher/_pandas.py,sha256=2qaCtUCk_nhr8dCqXqUr8zgMhCetPh9EDq-3z-8Qxi0,2021
-biocypher/_translate.py,sha256=e5XhPxbPArd0aK-Zk7F533ECV12jMR_ZzoAlGD3TAzc,16540
-biocypher/_write.py,sha256=kOb_l1LMu_weu5RLxEDLvSrpgdU1PZZe7ObaNhJRkdU,66943
-biocypher-0.5.19.dist-info/LICENSE,sha256=SjUaQkq671iQUZOxEUpC4jvJxXOlfSiHTTueyz9kXJM,1065
-biocypher-0.5.19.dist-info/WHEEL,sha256=vxFmldFsRN_Hx10GDvsdv1wroKq8r5Lzvjp6GZ4OO8c,88
-biocypher-0.5.19.dist-info/METADATA,sha256=_7DNxOzmag2EO1vxTpjE7dcsX7YclymnoIVEkoMXlJ4,9103
-biocypher-0.5.19.dist-info/RECORD,,
+biocypher/_ontology.py,sha256=pHc4hO8iZx-yg9gzqfBR9khoIni-lKAxWgnRFyNP91E,21530
+biocypher/_pandas.py,sha256=GVCFM68J7yBjh40MpkNVgD8qT1RFMrrIjMOtD3iKsf4,3040
+biocypher/_translate.py,sha256=nj4Y60F0U3JBH36N2dh5pFcC8Ot86rskJ2ChJwje9dI,16494
+biocypher/_write.py,sha256=2ynF-VkvTr8WT2qPt2wji3iupP3WON94TlT6NpfDvCs,67738
+biocypher-0.5.20.dist-info/LICENSE,sha256=SjUaQkq671iQUZOxEUpC4jvJxXOlfSiHTTueyz9kXJM,1065
+biocypher-0.5.20.dist-info/WHEEL,sha256=vxFmldFsRN_Hx10GDvsdv1wroKq8r5Lzvjp6GZ4OO8c,88
+biocypher-0.5.20.dist-info/METADATA,sha256=B3VOakjkLgCjusCElMML-neoPoc869g4jNI45Bchibo,9429
+biocypher-0.5.20.dist-info/RECORD,,

{biocypher-0.5.19.dist-info → biocypher-0.5.20.dist-info}/LICENSE RENAMED Viewed

File without changes

{biocypher-0.5.19.dist-info → biocypher-0.5.20.dist-info}/WHEEL RENAMED Viewed

File without changes

biocypher 0.5.19__py3-none-any.whl → 0.5.20__py3-none-any.whl

Potentially problematic release.

biocypher 0.5.19py3-none-any.whl → 0.5.20py3-none-any.whl