PyPI - biocypher - Versions diffs - 0.5.44__tar.gz → 0.6.0__tar.gz - Mend

biocypher 0.5.44tar.gz → 0.6.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biocypher might be problematic. Click here for more details.

Files changed (39) hide show

{biocypher-0.5.44 → biocypher-0.6.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: biocypher
-Version: 0.5.44
+Version: 0.6.0
 Summary: A unifying framework for biomedical research knowledge graphs
 Home-page: https://github.com/biocypher/biocypher
 License: MIT

{biocypher-0.5.44 → biocypher-0.6.0}/biocypher/_get.py RENAMED Viewed

@@ -17,10 +17,13 @@ from __future__ import annotations
 from typing import Optional
 import shutil
+import requests
 from ._logger import logger
 logger.debug(f"Loading module {__name__}.")
+from abc import ABC
 from datetime import datetime, timedelta
 from tempfile import TemporaryDirectory
 import os
@@ -29,21 +32,22 @@ import ftplib
 import pooch
-from ._misc import to_list
+from ._misc import to_list, is_nested
-class Resource:
+class Resource(ABC):
     def __init__(
         self,
         name: str,
         url_s: str | list[str],
         lifetime: int = 0,
-        is_dir: bool = False,
     ):
         """
-        A resource is a file that can be downloaded from a URL and cached
-        locally. This class implements checks of the minimum requirements for
-        a resource, to be implemented by a biocypher adapter.
+        A Resource is a file, a list of files, an API request, or a list of API
+        requests, any of which can be downloaded from the given URL(s) and
+        cached locally. This class implements checks of the minimum requirements
+        for a resource, to be implemented by a biocypher adapter.
         Args:
             name (str): The name of the resource.
@@ -52,43 +56,83 @@ class Resource:
             lifetime (int): The lifetime of the resource in days. If 0, the
                 resource is considered to be permanent.
-            is_dir (bool): Whether the resource is a directory or not.
         """
         self.name = name
         self.url_s = url_s
         self.lifetime = lifetime
+class FileDownload(Resource):
+    def __init__(
+        self,
+        name: str,
+        url_s: str | list[str],
+        lifetime: int = 0,
+        is_dir: bool = False,
+    ):
+        """
+        Represents basic information for a File Download.
+        Args:
+            name(str): The name of the File Download.
+            url_s(str|list[str]): The URL(s) of the File Download.
+            lifetime(int): The lifetime of the File Download in days. If 0, the
+                File Download is cached indefinitely.
+            is_dir (bool): Whether the URL points to a directory or not.
+        """
+        super().__init__(name, url_s, lifetime)
         self.is_dir = is_dir
+class APIRequest(Resource):
+    def __init__(self, name: str, url_s: str | list[str], lifetime: int = 0):
+        """
+        Represents basic information for an API Request.
+        Args:
+            name(str): The name of the API Request.
+            url_s(str|list): The URL of the API endpoint.
+            lifetime(int): The lifetime of the API Request in days. If 0, the
+                API Request is cached indefinitely.
+        """
+        super().__init__(name, url_s, lifetime)
 class Downloader:
     def __init__(self, cache_dir: Optional[str] = None) -> None:
         """
-        A downloader is a collection of resources that can be downloaded
+        The Downloader is a class that manages resources that can be downloaded
         and cached locally. It manages the lifetime of downloaded resources by
         keeping a JSON record of the download date of each resource.
         Args:
             cache_dir (str): The directory where the resources are cached. If
                 not given, a temporary directory is created.
-        Returns:
-            Downloader: The downloader object.
         """
         self.cache_dir = cache_dir or TemporaryDirectory().name
         self.cache_file = os.path.join(self.cache_dir, "cache.json")
         self.cache_dict = self._load_cache_dict()
-    # download function that accepts a resource or a list of resources
     def download(self, *resources: Resource):
         """
-        Download one or multiple resources.
+        Download one or multiple resources. Load from cache if the resource is
+        already downloaded and the cache is not expired.
         Args:
-            resources (Resource): The resource or resources to download.
+            resources (Resource): The resource(s) to download or load from
+                cache.
         Returns:
-            str or list: The path or paths to the downloaded resource(s).
+            list[str]: The path or paths to the resource(s) that were downloaded
+                or loaded from cache.
         """
         paths = []
         for resource in resources:
@@ -106,16 +150,27 @@ class Downloader:
         Args:
             resource (Resource): The resource to download.
         Returns:
-            str or list: The path or paths to the downloaded resource(s).
+            list[str]: The path or paths to the downloaded resource(s).
         """
         expired = self._is_cache_expired(resource)
         if expired or not cache:
-            self._delete_expired_resource_cache(resource)
-            logger.info(f"Asking for download of {resource.name}.")
-            paths = self._download_resource(cache, resource)
+            self._delete_expired_cache(resource)
+            if isinstance(resource, FileDownload):
+                logger.info(f"Asking for download of resource {resource.name}.")
+                paths = self._download_files(cache, resource)
+            elif isinstance(resource, APIRequest):
+                logger.info(
+                    f"Asking for download of api request {resource.name}."
+                )
+                paths = self._download_api_request(resource)
+            else:
+                raise TypeError(f"Unknown resource type: {type(resource)}")
         else:
             paths = self.get_cached_version(resource)
         self._update_cache_record(resource)
@@ -123,13 +178,14 @@ class Downloader:
     def _is_cache_expired(self, resource: Resource) -> bool:
         """
-        Check if resource cache is expired.
+        Check if resource or API request cache is expired.
         Args:
-            resource (Resource): The resource to download.
+            resource (Resource): The resource or API request to download.
         Returns:
-            bool: cache is expired or not.
+            bool: True if cache is expired, False if not.
         """
         cache_record = self._get_cache_record(resource)
         if cache_record:
@@ -142,65 +198,116 @@ class Downloader:
             expired = True
         return expired
-    def _delete_expired_resource_cache(self, resource: Resource):
-        resource_cache_path = self.cache_dir + "/" + resource.name
-        if os.path.exists(resource_cache_path) and os.path.isdir(
-            resource_cache_path
+    def _delete_expired_cache(self, resource: Resource):
+        cache_resource_path = self.cache_dir + "/" + resource.name
+        if os.path.exists(cache_resource_path) and os.path.isdir(
+            cache_resource_path
         ):
-            shutil.rmtree(resource_cache_path)
+            shutil.rmtree(cache_resource_path)
-    def _download_resource(self, cache, resource):
-        """Download a resource.
+    def _download_files(self, cache, file_download: FileDownload):
+        """
+        Download a resource given it is a file or a directory and return the
+        path.
         Args:
             cache (bool): Whether to cache the resource or not.
-            resource (Resource): The resource to download.
+            file_download (FileDownload): The resource to download.
         Returns:
-            str or list: The path or paths to the downloaded resource(s).
+            list[str]: The path or paths to the downloaded resource(s).
         """
-        if resource.is_dir:
-            files = self._get_files(resource)
-            resource.url_s = [resource.url_s + "/" + file for file in files]
-            resource.is_dir = False
-            paths = self._download_or_cache(resource, cache)
-        elif isinstance(resource.url_s, list):
+        if file_download.is_dir:
+            files = self._get_files(file_download)
+            file_download.url_s = [
+                file_download.url_s + "/" + file for file in files
+            ]
+            file_download.is_dir = False
+            paths = self._download_or_cache(file_download, cache)
+        elif isinstance(file_download.url_s, list):
             paths = []
-            for url in resource.url_s:
-                fname = url[url.rfind("/") + 1 :]
+            for url in file_download.url_s:
+                fname = url[url.rfind("/") + 1 :].split("?")[0]
                 paths.append(
                     self._retrieve(
                         url=url,
                         fname=fname,
-                        path=os.path.join(self.cache_dir, resource.name),
+                        path=os.path.join(self.cache_dir, file_download.name),
                     )
                 )
         else:
-            fname = resource.url_s[resource.url_s.rfind("/") + 1 :]
-            paths = self._retrieve(
-                url=resource.url_s,
+            paths = []
+            fname = file_download.url_s[
+                file_download.url_s.rfind("/") + 1 :
+            ].split("?")[0]
+            results = self._retrieve(
+                url=file_download.url_s,
                 fname=fname,
-                path=os.path.join(self.cache_dir, resource.name),
+                path=os.path.join(self.cache_dir, file_download.name),
             )
+            if isinstance(results, list):
+                paths.extend(results)
+            else:
+                paths.append(results)
         # sometimes a compressed file contains multiple files
         # TODO ask for a list of files in the archive to be used from the
         # adapter
         return paths
-    def get_cached_version(self, resource) -> list[str]:
+    def _download_api_request(self, api_request: APIRequest):
+        """
+        Download an API request and return the path.
+        Args:
+            api_request(APIRequest): The API request result that is being
+                cached.
+        Returns:
+            list[str]: The path to the cached API request.
+        """
+        urls = (
+            api_request.url_s
+            if isinstance(api_request.url_s, list)
+            else [api_request.url_s]
+        )
+        paths = []
+        for url in urls:
+            fname = url[url.rfind("/") + 1 :].rsplit(".", 1)[0]
+            logger.info(
+                f"Asking for caching API of {api_request.name} {fname}."
+            )
+            response = requests.get(url=url)
+            if response.status_code != 200:
+                response.raise_for_status()
+            response_data = response.json()
+            api_path = os.path.join(
+                self.cache_dir, api_request.name, f"{fname}.json"
+            )
+            os.makedirs(os.path.dirname(api_path), exist_ok=True)
+            with open(api_path, "w") as f:
+                json.dump(response_data, f)
+                logger.info(f"Caching API request to {api_path}.")
+            paths.append(api_path)
+        return paths
+    def get_cached_version(self, resource: Resource) -> list[str]:
         """Get the cached version of a resource.
         Args:
-            resource (Resource): The resource to get the cached version of.
+            resource(Resource): The resource to get the cached version of.
         Returns:
             list[str]: The paths to the cached resource(s).
         """
-        cached_resource_location = os.path.join(self.cache_dir, resource.name)
-        logger.info(f"Use cached version from {cached_resource_location}.")
+        cached_location = os.path.join(self.cache_dir, resource.name)
+        logger.info(f"Use cached version from {cached_location}.")
         paths = []
-        for file in os.listdir(cached_resource_location):
-            paths.append(os.path.join(cached_resource_location, file))
+        for file in os.listdir(cached_location):
+            paths.append(os.path.join(cached_location, file))
         return paths
     def _retrieve(
@@ -260,23 +367,23 @@ class Downloader:
                 progressbar=True,
             )
-    def _get_files(self, resource: Resource):
+    def _get_files(self, file_download: FileDownload):
         """
-        Get the files contained in a directory resource.
+        Get the files contained in a directory file.
         Args:
-            resource (Resource): The directory resource.
+            file_download (FileDownload): The directory file.
         Returns:
             list: The files contained in the directory.
         """
-        if resource.url_s.startswith("ftp://"):
+        if file_download.url_s.startswith("ftp://"):
             # remove protocol
-            url = resource.url_s[6:]
+            url = file_download.url_s[6:]
             # get base url
             url = url[: url.find("/")]
             # get directory (remove initial slash as well)
-            dir = resource.url_s[7 + len(url) :]
+            dir = file_download.url_s[7 + len(url) :]
             # get files
             ftp = ftplib.FTP(url)
             ftp.login()
@@ -334,19 +441,3 @@ class Downloader:
         self.cache_dict[resource.name] = cache_record
         with open(self.cache_file, "w") as f:
             json.dump(self.cache_dict, f, default=str)
-def is_nested(lst):
-    """
-    Check if a list is nested.
-    Args:
-        lst (list): The list to check.
-    Returns:
-        bool: True if the list is nested, False otherwise.
-    """
-    for item in lst:
-        if isinstance(item, list):
-            return True
-    return False

{biocypher-0.5.44 → biocypher-0.6.0}/biocypher/_metadata.py RENAMED Viewed

@@ -19,7 +19,7 @@ import importlib.metadata
 import toml
-_VERSION = "0.5.44"
+_VERSION = "0.6.0"
 def get_metadata():

{biocypher-0.5.44 → biocypher-0.6.0}/biocypher/_misc.py RENAMED Viewed

@@ -246,3 +246,19 @@ def to_lower_sentence_case(s: str) -> str:
         return pascalcase_to_sentencecase(s)
     else:
         return s
+def is_nested(lst) -> bool:
+    """
+    Check if a list is nested.
+    Args:
+        lst (list): The list to check.
+    Returns:
+        bool: True if the list is nested, False otherwise.
+    """
+    for item in lst:
+        if isinstance(item, list):
+            return True
+    return False

{biocypher-0.5.44 → biocypher-0.6.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "biocypher"
-version = "0.5.44"
+version = "0.6.0"
 description = "A unifying framework for biomedical research knowledge graphs"
 authors = [
     "Sebastian Lobentanzer <sebastian.lobentanzer@gmail.com>",