PyPI - biocypher - Versions diffs - 0.6.2__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

biocypher 0.6.2py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biocypher might be problematic. Click here for more details.

Files changed (33) hide show

biocypher/__init__.py +3 -13
biocypher/_config/__init__.py +6 -23
biocypher/_core.py +360 -262
biocypher/_create.py +13 -27
biocypher/_deduplicate.py +4 -11
biocypher/_get.py +21 -60
biocypher/_logger.py +4 -16
biocypher/_mapping.py +4 -17
biocypher/_metadata.py +3 -15
biocypher/_misc.py +14 -28
biocypher/_ontology.py +127 -212
biocypher/_translate.py +34 -58
biocypher/output/connect/_get_connector.py +40 -0
biocypher/output/connect/_neo4j_driver.py +9 -65
biocypher/output/in_memory/_get_in_memory_kg.py +34 -0
biocypher/output/in_memory/_in_memory_kg.py +40 -0
biocypher/output/in_memory/_networkx.py +44 -0
biocypher/output/in_memory/_pandas.py +20 -15
biocypher/output/write/_batch_writer.py +132 -177
biocypher/output/write/_get_writer.py +11 -24
biocypher/output/write/_writer.py +14 -33
biocypher/output/write/graph/_arangodb.py +7 -24
biocypher/output/write/graph/_neo4j.py +51 -56
biocypher/output/write/graph/_networkx.py +36 -43
biocypher/output/write/graph/_rdf.py +107 -95
biocypher/output/write/relational/_csv.py +6 -11
biocypher/output/write/relational/_postgresql.py +5 -13
biocypher/output/write/relational/_sqlite.py +3 -1
{biocypher-0.6.2.dist-info → biocypher-0.7.0.dist-info}/LICENSE +1 -1
{biocypher-0.6.2.dist-info → biocypher-0.7.0.dist-info}/METADATA +3 -3
biocypher-0.7.0.dist-info/RECORD +43 -0
{biocypher-0.6.2.dist-info → biocypher-0.7.0.dist-info}/WHEEL +1 -1
biocypher-0.6.2.dist-info/RECORD +0 -39

biocypher/_create.py CHANGED Viewed

@@ -1,23 +1,16 @@
-#!/usr/bin/env python
-#
-# Copyright 2021, Heidelberg University Clinic
-#
-# File author(s): Sebastian Lobentanzer
-#                 ...
-#
-# Distributed under MIT licence, see the file `LICENSE`.
-#
 """
 BioCypher 'create' module. Handles the creation of BioCypher node and edge
 dataclasses.
 """
-from ._logger import logger
-logger.debug(f"Loading module {__name__}.")
+import os
+from dataclasses import dataclass, field
 from typing import Union
-from dataclasses import field, dataclass
-import os
+from ._logger import logger
+logger.debug(f"Loading module {__name__}.")
 __all__ = [
     "BioCypherEdge",
@@ -71,8 +64,7 @@ class BioCypherNode:
         if ":TYPE" in self.properties.keys():
             logger.warning(
-                "Keyword ':TYPE' is reserved for Neo4j. "
-                "Removing from properties.",
+                "Keyword ':TYPE' is reserved for Neo4j. Removing from properties.",
                 # "Renaming to 'type'."
             )
             # self.properties["type"] = self.properties[":TYPE"]
@@ -209,24 +201,21 @@ class BioCypherEdge:
         if ":TYPE" in self.properties.keys():
             logger.debug(
-                "Keyword ':TYPE' is reserved for Neo4j. "
-                "Removing from properties.",
+                "Keyword ':TYPE' is reserved for Neo4j. Removing from properties.",
                 # "Renaming to 'type'."
             )
             # self.properties["type"] = self.properties[":TYPE"]
             del self.properties[":TYPE"]
         elif "id" in self.properties.keys():
             logger.debug(
-                "Keyword 'id' is reserved for Neo4j. "
-                "Removing from properties.",
+                "Keyword 'id' is reserved for Neo4j. Removing from properties.",
                 # "Renaming to 'type'."
             )
             # self.properties["type"] = self.properties[":TYPE"]
             del self.properties["id"]
         elif "_ID" in self.properties.keys():
             logger.debug(
-                "Keyword '_ID' is reserved for Postgres. "
-                "Removing from properties.",
+                "Keyword '_ID' is reserved for Postgres. Removing from properties.",
                 # "Renaming to 'type'."
             )
             # self.properties["type"] = self.properties[":TYPE"]
@@ -334,20 +323,17 @@ class BioCypherRelAsNode:
     def __post_init__(self):
         if not isinstance(self.node, BioCypherNode):
             raise TypeError(
-                f"BioCypherRelAsNode.node must be a BioCypherNode, "
-                f"not {type(self.node)}.",
+                f"BioCypherRelAsNode.node must be a BioCypherNode, " f"not {type(self.node)}.",
             )
         if not isinstance(self.source_edge, BioCypherEdge):
             raise TypeError(
-                f"BioCypherRelAsNode.source_edge must be a BioCypherEdge, "
-                f"not {type(self.source_edge)}.",
+                f"BioCypherRelAsNode.source_edge must be a BioCypherEdge, " f"not {type(self.source_edge)}.",
             )
         if not isinstance(self.target_edge, BioCypherEdge):
             raise TypeError(
-                f"BioCypherRelAsNode.target_edge must be a BioCypherEdge, "
-                f"not {type(self.target_edge)}.",
+                f"BioCypherRelAsNode.target_edge must be a BioCypherEdge, " f"not {type(self.target_edge)}.",
             )
     def get_node(self) -> BioCypherNode:

biocypher/_deduplicate.py CHANGED Viewed

@@ -1,9 +1,8 @@
+from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
 from ._logger import logger
 logger.debug(f"Loading module {__name__}.")
-from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
 class Deduplicator:
     """
@@ -45,9 +44,7 @@ class Deduplicator:
         if entity.get_id() in self.seen_entity_ids:
             self.duplicate_entity_ids.add(entity.get_id())
             if entity.get_label() not in self.duplicate_entity_types:
-                logger.warning(
-                    f"Duplicate node type {entity.get_label()} found. "
-                )
+                logger.warning(f"Duplicate node type {entity.get_label()} found. ")
                 self.duplicate_entity_types.add(entity.get_label())
             return True
@@ -69,18 +66,14 @@ class Deduplicator:
         # concatenate source and target if no id is present
         if not relationship.get_id():
-            _id = (
-                f"{relationship.get_source_id()}_{relationship.get_target_id()}"
-            )
+            _id = f"{relationship.get_source_id()}_{relationship.get_target_id()}"
         else:
             _id = relationship.get_id()
         if _id in self.seen_relationships[relationship.get_type()]:
             self.duplicate_relationship_ids.add(_id)
             if relationship.get_type() not in self.duplicate_relationship_types:
-                logger.warning(
-                    f"Duplicate edge type {relationship.get_type()} found. "
-                )
+                logger.warning(f"Duplicate edge type {relationship.get_type()} found. ")
                 self.duplicate_relationship_types.add(relationship.get_type())
             return True

biocypher/_get.py CHANGED Viewed

@@ -1,38 +1,26 @@
-#!/usr/bin/env python
-#
-# Copyright 2021, Heidelberg University Clinic
-#
-# File author(s): Sebastian Lobentanzer
-#                 ...
-#
-# Distributed under MIT licence, see the file `LICENSE`.
-#
 """
 BioCypher get module. Used to download and cache data from external sources.
 """
 from __future__ import annotations
-from typing import Optional
+import ftplib
+import json
+import os
 import shutil
-import requests
-from ._logger import logger
-logger.debug(f"Loading module {__name__}.")
 from abc import ABC
 from datetime import datetime, timedelta
 from tempfile import TemporaryDirectory
-import os
-import json
-import ftplib
+from typing import Optional
 import pooch
+import requests
-from ._misc import to_list, is_nested
+from ._logger import logger
+from ._misc import is_nested, to_list
+logger.debug(f"Loading module {__name__}.")
 class Resource(ABC):
@@ -43,7 +31,6 @@ class Resource(ABC):
         lifetime: int = 0,
     ):
         """
         A Resource is a file, a list of files, an API request, or a list of API
         requests, any of which can be downloaded from the given URL(s) and
         cached locally. This class implements checks of the minimum requirements
@@ -153,7 +140,6 @@ class Downloader:
         Returns:
             list[str]: The path or paths to the downloaded resource(s).
         """
         expired = self._is_cache_expired(resource)
@@ -163,14 +149,10 @@ class Downloader:
                 logger.info(f"Asking for download of resource {resource.name}.")
                 paths = self._download_files(cache, resource)
             elif isinstance(resource, APIRequest):
-                logger.info(
-                    f"Asking for download of api request {resource.name}."
-                )
+                logger.info(f"Asking for download of api request {resource.name}.")
                 paths = self._download_api_request(resource)
             else:
                 raise TypeError(f"Unknown resource type: {type(resource)}")
         else:
             paths = self.get_cached_version(resource)
         self._update_cache_record(resource)
@@ -181,17 +163,14 @@ class Downloader:
         Check if resource or API request cache is expired.
         Args:
-            resource (Resource): The resource or API request to download.
+            resource (Resource): The resource to download.
         Returns:
-            bool: True if cache is expired, False if not.
+            bool: cache is expired or not.
         """
         cache_record = self._get_cache_record(resource)
         if cache_record:
-            download_time = datetime.strptime(
-                cache_record.get("date_downloaded"), "%Y-%m-%d %H:%M:%S.%f"
-            )
+            download_time = datetime.strptime(cache_record.get("date_downloaded"), "%Y-%m-%d %H:%M:%S.%f")
             lifetime = timedelta(days=resource.lifetime)
             expired = download_time + lifetime < datetime.now()
         else:
@@ -200,9 +179,7 @@ class Downloader:
     def _delete_expired_cache(self, resource: Resource):
         cache_resource_path = self.cache_dir + "/" + resource.name
-        if os.path.exists(cache_resource_path) and os.path.isdir(
-            cache_resource_path
-        ):
+        if os.path.exists(cache_resource_path) and os.path.isdir(cache_resource_path):
             shutil.rmtree(cache_resource_path)
     def _download_files(self, cache, file_download: FileDownload):
@@ -219,9 +196,7 @@ class Downloader:
         """
         if file_download.is_dir:
             files = self._get_files(file_download)
-            file_download.url_s = [
-                file_download.url_s + "/" + file for file in files
-            ]
+            file_download.url_s = [file_download.url_s + "/" + file for file in files]
             file_download.is_dir = False
             paths = self._download_or_cache(file_download, cache)
         elif isinstance(file_download.url_s, list):
@@ -236,9 +211,7 @@ class Downloader:
                 paths.append(path)
         else:
             paths = []
-            fname = file_download.url_s[
-                file_download.url_s.rfind("/") + 1 :
-            ].split("?")[0]
+            fname = file_download.url_s[file_download.url_s.rfind("/") + 1 :].split("?")[0]
             results = self._retrieve(
                 url=file_download.url_s,
                 fname=fname,
@@ -259,31 +232,22 @@ class Downloader:
         Download an API request and return the path.
         Args:
-            api_request(APIRequest): The API request result that is being
-                cached.
+            api_request(APIRequest): The API request result that is being cached.
         Returns:
             list[str]: The path to the cached API request.
         """
-        urls = (
-            api_request.url_s
-            if isinstance(api_request.url_s, list)
-            else [api_request.url_s]
-        )
+        urls = api_request.url_s if isinstance(api_request.url_s, list) else [api_request.url_s]
         paths = []
         for url in urls:
             fname = url[url.rfind("/") + 1 :].rsplit(".", 1)[0]
-            logger.info(
-                f"Asking for caching API of {api_request.name} {fname}."
-            )
+            logger.info(f"Asking for caching API of {api_request.name} {fname}.")
             response = requests.get(url=url)
             if response.status_code != 200:
                 response.raise_for_status()
             response_data = response.json()
-            api_path = os.path.join(
-                self.cache_dir, api_request.name, f"{fname}.json"
-            )
+            api_path = os.path.join(self.cache_dir, api_request.name, f"{fname}.json")
             os.makedirs(os.path.dirname(api_path), exist_ok=True)
             with open(api_path, "w") as f:
@@ -300,7 +264,6 @@ class Downloader:
         Returns:
             list[str]: The paths to the cached resource(s).
         """
         cached_location = os.path.join(self.cache_dir, resource.name)
         logger.info(f"Use cached version from {cached_location}.")
@@ -390,9 +353,7 @@ class Downloader:
             files = ftp.nlst()
             ftp.quit()
         else:
-            raise NotImplementedError(
-                "Only FTP directories are supported at the moment."
-            )
+            raise NotImplementedError("Only FTP directories are supported at the moment.")
         return files

biocypher/_logger.py CHANGED Viewed

@@ -1,23 +1,14 @@
-#!/usr/bin/env python
-#
-# Copyright 2021, Heidelberg University Clinic
-#
-# File author(s): Sebastian Lobentanzer
-#                 ...
-#
-# Distributed under MIT licence, see the file `LICENSE`.
-#
 """
 Configuration of the module logger.
 """
 __all__ = ["get_logger", "log", "logfile"]
-from datetime import datetime
+import logging
 import os
 import pydoc
-import logging
+from datetime import datetime
 from biocypher import _config
 from biocypher._metadata import __version__
@@ -63,10 +54,7 @@ def get_logger(name: str = "biocypher") -> logging.Logger:
         log_to_disk = _config.config("biocypher").get("log_to_disk")
         if log_to_disk:
-            logdir = (
-                _config.config("biocypher").get("log_directory")
-                or "biocypher-log"
-            )
+            logdir = _config.config("biocypher").get("log_directory") or "biocypher-log"
             os.makedirs(logdir, exist_ok=True)
             logfile = os.path.join(logdir, f"biocypher-{date_time}.log")

biocypher/_mapping.py CHANGED Viewed

@@ -1,20 +1,7 @@
-#!/usr/bin/env python
-#
-# Copyright 2021, Heidelberg University Clinic
-#
-# File author(s): Sebastian Lobentanzer
-#                 ...
-#
-# Distributed under MIT licence, see the file `LICENSE`.
-#
 """
 BioCypher 'mapping' module. Handles the mapping of user-defined schema to the
 underlying ontology.
 """
-from ._logger import logger
-logger.debug(f"Loading module {__name__}.")
 from typing import Optional
 from urllib.request import urlopen
@@ -22,7 +9,9 @@ from urllib.request import urlopen
 import yaml
 from . import _misc
-from ._config import config as _config
+from ._logger import logger
+logger.debug(f"Loading module {__name__}.")
 class OntologyMapping:
@@ -151,9 +140,7 @@ class OntologyMapping:
                 if parent_props:
                     v["properties"].update(parent_props)
-                parent_excl_props = self.schema[parent].get(
-                    "exclude_properties", {}
-                )
+                parent_excl_props = self.schema[parent].get("exclude_properties", {})
                 if parent_excl_props:
                     v["exclude_properties"].update(parent_excl_props)

biocypher/_metadata.py CHANGED Viewed

@@ -1,25 +1,16 @@
-#!/usr/bin/env python
-#
-# Copyright 2021, Heidelberg University Clinic
-#
-# File author(s): Sebastian Lobentanzer
-#                 ...
-#
-# Distributed under MIT licence, see the file `LICENSE`.
-#
 """
 Package metadata (version, authors, etc).
 """
 __all__ = ["get_metadata"]
+import importlib.metadata
 import os
 import pathlib
-import importlib.metadata
 import toml
-_VERSION = "0.6.2"
+_VERSION = "0.7.0"
 def get_metadata():
@@ -52,10 +43,7 @@ def get_metadata():
     if not meta:
         try:
-            meta = {
-                k.lower(): v
-                for k, v in importlib.metadata.metadata(here.name).items()
-            }
+            meta = {k.lower(): v for k, v in importlib.metadata.metadata(here.name).items()}
         except importlib.metadata.PackageNotFoundError:
             pass

biocypher/_misc.py CHANGED Viewed

@@ -1,36 +1,29 @@
-#!/usr/bin/env python
-#
-# Copyright 2021, Heidelberg University Clinic
-#
-# File author(s): Sebastian Lobentanzer
-#                 ...
-#
-# Distributed under MIT licence, see the file `LICENSE`.
-#
 """
 Handy functions for use in various places.
 """
-from ._logger import logger
-logger.debug(f"Loading module {__name__}.")
+import re
+from collections.abc import Iterable
 from typing import (
     Any,
-    Union,
-    Mapping,
-    KeysView,
     Generator,
     ItemsView,
+    KeysView,
+    Mapping,
+    Union,
     ValuesView,
 )
-from collections.abc import Iterable
-import re
-from treelib import Tree
 import networkx as nx
 import stringcase
+from treelib import Tree
+from ._logger import logger
+logger.debug(f"Loading module {__name__}.")
 __all__ = ["LIST_LIKE", "SIMPLE_TYPES", "ensure_iterable", "to_list"]
 SIMPLE_TYPES = (
@@ -110,9 +103,7 @@ def _get_inheritance_tree(inheritance_graph: Union[dict, nx.Graph]) -> dict:
     if isinstance(inheritance_graph, nx.Graph):
         inheritance_tree = nx.to_dict_of_lists(inheritance_graph)
-        multiple_parents_present = _multiple_inheritance_present(
-            inheritance_tree
-        )
+        multiple_parents_present = _multiple_inheritance_present(inheritance_tree)
         if multiple_parents_present:
             logger.warning(
                 "The ontology contains multiple inheritance (one child node "
@@ -143,17 +134,12 @@ def _find_root_node(inheritance_tree: dict) -> tuple[set, str]:
         if "entity" in root:
             root = "entity"  # TODO: default: good standard?
         else:
-            raise ValueError(
-                "Inheritance tree cannot have more than one root node. "
-                f"Found {len(root)}: {root}."
-            )
+            raise ValueError("Inheritance tree cannot have more than one root node. " f"Found {len(root)}: {root}.")
     else:
         root = root[0]
     if not root:
         # find key whose value is None
-        root = list(inheritance_tree.keys())[
-            list(inheritance_tree.values()).index(None)
-        ]
+        root = list(inheritance_tree.keys())[list(inheritance_tree.values()).index(None)]
     return classes, root

biocypher 0.6.2__py3-none-any.whl → 0.7.0__py3-none-any.whl

Potentially problematic release.

biocypher 0.6.2py3-none-any.whl → 0.7.0py3-none-any.whl