PyPI - graphdatascience - Versions diffs - 1.8__tar.gz → 1.9__tar.gz - Mend

graphdatascience 1.8tar.gz → 1.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (132) hide show

{graphdatascience-1.8/graphdatascience.egg-info → graphdatascience-1.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: graphdatascience
-Version: 1.8
+Version: 1.9
 Summary: A Python client for the Neo4j Graph Data Science (GDS) library
 Home-page: https://neo4j.com/product/graph-data-science/
 Author: Neo4j
@@ -30,10 +30,11 @@ License-File: LICENSE
 Requires-Dist: multimethod<2.0,>=1.0
 Requires-Dist: neo4j<6.0,>=4.4.2
 Requires-Dist: pandas<3.0,>=1.0
-Requires-Dist: pyarrow<14.0,>=4.0
+Requires-Dist: pyarrow<15.0,>=10.0
 Requires-Dist: textdistance<5.0,>=4.0
 Requires-Dist: tqdm<5.0,>=4.0
 Requires-Dist: typing-extensions<5.0,>=4.0
+Requires-Dist: requests
 Provides-Extra: ogb
 Requires-Dist: ogb<2.0,>=1.0; extra == "ogb"
 Provides-Extra: networkx
@@ -125,6 +126,7 @@ Full end-to-end examples in Jupyter ready-to-run notebooks can be found in the [
 * [Sampling, Export and Integration with PyG example](examples/import-sample-export-gnn.ipynb)
 * [Load data to a projected graph via graph construction](examples/load-data-via-graph-construction.ipynb)
 * [Heterogeneous Node Classification with HashGNN and Autotuning](https://github.com/neo4j/graph-data-science-client/tree/main/examples/heterogeneous-node-classification-with-hashgnn.ipynb)
+* [Perform inference using pre-trained KGE models](examples/kge-predict-transe-pyg-train.ipynb)
 ## Documentation

{graphdatascience-1.8 → graphdatascience-1.9}/README.md RENAMED Viewed

@@ -84,6 +84,7 @@ Full end-to-end examples in Jupyter ready-to-run notebooks can be found in the [
 * [Sampling, Export and Integration with PyG example](examples/import-sample-export-gnn.ipynb)
 * [Load data to a projected graph via graph construction](examples/load-data-via-graph-construction.ipynb)
 * [Heterogeneous Node Classification with HashGNN and Autotuning](https://github.com/neo4j/graph-data-science-client/tree/main/examples/heterogeneous-node-classification-with-hashgnn.ipynb)
+* [Perform inference using pre-trained KGE models](examples/kge-predict-transe-pyg-train.ipynb)
 ## Documentation

graphdatascience-1.9/graphdatascience/__init__.py ADDED Viewed

@@ -0,0 +1,36 @@
+from .gds_session.gds_sessions import GdsSessions
+from .graph.graph_create_result import GraphCreateResult
+from .graph.graph_object import Graph
+from .graph_data_science import GraphDataScience
+from .model.graphsage_model import GraphSageModel
+from .model.link_prediction_model import LinkFeature, LPModel
+from .model.node_classification_model import NCModel
+from .model.node_regression_model import NRModel
+from .model.pipeline_model import NodePropertyStep
+from .model.simple_rel_embedding_model import SimpleRelEmbeddingModel
+from .pipeline.lp_training_pipeline import LPTrainingPipeline
+from .pipeline.nc_training_pipeline import NCTrainingPipeline
+from .pipeline.nr_training_pipeline import NRTrainingPipeline
+from .query_runner.query_runner import QueryRunner
+from .server_version.server_version import ServerVersion
+from .version import __version__
+__all__ = [
+    "GraphDataScience",
+    "GdsSessions",
+    "QueryRunner",
+    "__version__",
+    "ServerVersion",
+    "Graph",
+    "GraphCreateResult",
+    "LPTrainingPipeline",
+    "NCTrainingPipeline",
+    "NRTrainingPipeline",
+    "NodePropertyStep",
+    "LinkFeature",
+    "LPModel",
+    "NCModel",
+    "NRModel",
+    "GraphSageModel",
+    "SimpleRelEmbeddingModel",
+]

{graphdatascience-1.8 → graphdatascience-1.9}/graphdatascience/algo/algo_proc_runner.py RENAMED Viewed

@@ -7,21 +7,15 @@ from ..error.illegal_attr_checker import IllegalAttrChecker
 from ..graph.graph_object import Graph
 from ..graph.graph_type_check import graph_type_check
 from ..model.graphsage_model import GraphSageModel
+from graphdatascience.call_parameters import CallParameters
 class AlgoProcRunner(IllegalAttrChecker, ABC):
     @graph_type_check
     def _run_procedure(self, G: Graph, config: Dict[str, Any], with_logging: bool = True) -> DataFrame:
-        query = f"CALL {self._namespace}($graph_name, $config)"
+        params = CallParameters(graph_name=G.name(), config=config)
-        params: Dict[str, Any] = {}
-        params["graph_name"] = G.name()
-        params["config"] = config
-        if with_logging:
-            return self._query_runner.run_query_with_logging(query, params)
-        else:
-            return self._query_runner.run_query(query, params)
+        return self._query_runner.call_procedure(endpoint=self._namespace, params=params, logging=with_logging)
     @graph_type_check
     def estimate(self, G: Graph, **config: Any) -> "Series[Any]":

graphdatascience-1.9/graphdatascience/call_parameters.py ADDED Viewed

@@ -0,0 +1,8 @@
+from typing import Any, OrderedDict
+class CallParameters(OrderedDict[str, Any]):
+    # since Python 3.6 also initializing through CallParameters(**kwargs) is order preserving
+    def placeholder_str(self) -> str:
+        return ", ".join([f"${k}" for k in self.keys()])

{graphdatascience-1.8 → graphdatascience-1.9}/graphdatascience/caller_base.py RENAMED Viewed

@@ -13,7 +13,11 @@ class CallerBase(ABC):
         self._server_version = server_version
     def _raise_suggestive_error_message(self, requested_endpoint: str) -> NoReturn:
-        list_result = self._query_runner.run_query("CALL gds.list() YIELD name", custom_error=False)
+        list_result = self._query_runner.call_procedure(
+            endpoint="gds.list",
+            yields=["name"],
+            custom_error=False,
+        )
         all_endpoints = list_result["name"].tolist()
         raise SyntaxError(generate_suggestive_error_message(requested_endpoint, all_endpoints))

{graphdatascience-1.8 → graphdatascience-1.9}/graphdatascience/endpoints.py RENAMED Viewed

@@ -3,11 +3,7 @@ from .algo.single_mode_algo_endpoints import (
     SingleModeAlphaAlgoEndpoints,
 )
 from .call_builder import IndirectAlphaCallBuilder, IndirectBetaCallBuilder
-from .graph.graph_endpoints import (
-    GraphAlphaEndpoints,
-    GraphBetaEndpoints,
-    GraphEndpoints,
-)
+from .graph.graph_endpoints import GraphAlphaEndpoints, GraphBetaEndpoints
 from .model.model_endpoints import (
     ModelAlphaEndpoints,
     ModelBetaEndpoints,
@@ -39,7 +35,6 @@ class DirectEndpoints(
     SingleModeAlgoEndpoints,
     DirectSystemEndpoints,
     DirectUtilEndpoints,
-    GraphEndpoints,
     PipelineEndpoints,
     ModelEndpoints,
     ConfigEndpoints,

graphdatascience-1.9/graphdatascience/gds_session/aura_api.py ADDED Viewed

@@ -0,0 +1,236 @@
+from __future__ import annotations
+import dataclasses
+import logging
+import os
+import time
+from dataclasses import dataclass
+from typing import Any, List, Optional
+from urllib.parse import urlparse
+import requests as req
+from requests import HTTPError
+from graphdatascience.version import __version__
+@dataclass(repr=True)
+class InstanceDetails:
+    id: str
+    name: str
+    tenant_id: str
+    cloud_provider: str
+    @classmethod
+    def fromJson(cls, json: dict[str, Any]) -> InstanceDetails:
+        return cls(
+            id=json["id"],
+            name=json["name"],
+            tenant_id=json["tenant_id"],
+            cloud_provider=json["cloud_provider"],
+        )
+@dataclass(repr=True)
+class InstanceSpecificDetails(InstanceDetails):
+    status: str
+    connection_url: str
+    memory: str
+    type: str
+    region: str
+    @classmethod
+    def fromJson(cls, json: dict[str, Any]) -> InstanceSpecificDetails:
+        return cls(
+            id=json["id"],
+            name=json["name"],
+            tenant_id=json["tenant_id"],
+            cloud_provider=json["cloud_provider"],
+            status=json["status"],
+            connection_url=json.get("connection_url", ""),
+            memory=json.get("memory", ""),
+            type=json["type"],
+            region=json["region"],
+        )
+@dataclass(repr=True)
+class InstanceCreateDetails:
+    id: str
+    username: str
+    password: str
+    connection_url: str
+    @classmethod
+    def from_json(cls, json: dict[str, Any]) -> InstanceCreateDetails:
+        fields = dataclasses.fields(cls)
+        if any(f.name not in json for f in fields):
+            raise RuntimeError(f"Missing required field. Expected `{[f.name for f in fields]}` but got `{json}`")
+        return cls(**{f.name: json[f.name] for f in fields})
+class AuraApi:
+    class AuraAuthToken:
+        access_token: str
+        expires_in: int
+        token_type: str
+        def __init__(self, json: dict[str, Any]) -> None:
+            self.access_token = json["access_token"]
+            expires_in: int = json["expires_in"]
+            self.expires_at = int(time.time()) + expires_in
+            self.token_type = json["token_type"]
+        def is_expired(self) -> bool:
+            return self.expires_at >= int(time.time())
+    def __init__(self, client_id: str, client_secret: str, tenant_id: Optional[str] = None) -> None:
+        self._dev_env = os.environ.get("AURA_ENV")
+        self._base_uri = "https://api.neo4j.io" if not self._dev_env else f"https://api-{self._dev_env}.neo4j-dev.io"
+        self._credentials = (client_id, client_secret)
+        self._token: Optional[AuraApi.AuraAuthToken] = None
+        self._logger = logging.getLogger()
+        self._tenant_id = tenant_id if tenant_id else self._get_tenant_id()
+    @staticmethod
+    def extract_id(uri: str) -> str:
+        host = urlparse(uri).hostname
+        if not host:
+            raise RuntimeError(f"Could not parse the uri `{uri}`.")
+        return host.split(".")[0].split("-")[0]
+    def create_instance(self, name: str, memory: str, cloud_provider: str, region: str) -> InstanceCreateDetails:
+        # TODO should give more control here
+        data = {
+            "name": name,
+            "memory": memory,
+            "version": "5",
+            "region": region,
+            # TODO should be figured out from the tenant details in the future
+            "type": self._instance_type(),
+            "tenant_id": self._tenant_id,
+            "cloud_provider": cloud_provider,
+        }
+        response = req.post(
+            f"{self._base_uri}/v1/instances",
+            json=data,
+            headers=self._build_header(),
+        )
+        try:
+            response.raise_for_status()
+        except HTTPError as e:
+            print(response.json())
+            raise e
+        return InstanceCreateDetails.from_json(response.json()["data"])
+    def delete_instance(self, instance_id: str) -> Optional[InstanceSpecificDetails]:
+        response = req.delete(
+            f"{self._base_uri}/v1/instances/{instance_id}",
+            headers=self._build_header(),
+        )
+        if response.status_code == 404:
+            return None
+        response.raise_for_status()
+        return InstanceSpecificDetails.fromJson(response.json()["data"])
+    def list_instances(self) -> List[InstanceDetails]:
+        response = req.get(
+            f"{self._base_uri}/v1/instances",
+            headers=self._build_header(),
+            params={"tenantId": self._tenant_id},
+        )
+        response.raise_for_status()
+        raw_data = response.json()["data"]
+        return [InstanceDetails.fromJson(i) for i in raw_data]
+    def list_instance(self, instance_id: str) -> Optional[InstanceSpecificDetails]:
+        response = req.get(
+            f"{self._base_uri}/v1/instances/{instance_id}",
+            headers=self._build_header(),
+        )
+        if response.status_code == 404:
+            return None
+        response.raise_for_status()
+        raw_data = response.json()["data"]
+        return InstanceSpecificDetails.fromJson(raw_data)
+    def wait_for_instance_running(
+        self, instance_id: str, sleep_time: float = 0.2, max_sleep_time: float = 300
+    ) -> Optional[str]:
+        waited_time = 0.0
+        while waited_time <= max_sleep_time:
+            instance = self.list_instance(instance_id)
+            if instance is None:
+                return "Instance is not found -- please retry"
+            elif instance.status in ["deleting", "destroying"]:
+                return "Instance is being deleted"
+            elif instance.status == "running":
+                return None
+            else:
+                self._logger.debug(
+                    f"Instance `{instance_id}` is not yet running. "
+                    f"Current status: {instance.status}. "
+                    f"Retrying in {sleep_time} seconds..."
+                )
+            waited_time += sleep_time
+            time.sleep(sleep_time)
+        return f"Instance is not running after waiting for {waited_time} seconds"
+    def _get_tenant_id(self) -> str:
+        response = req.get(
+            f"{self._base_uri}/v1/tenants",
+            headers=self._build_header(),
+        )
+        response.raise_for_status()
+        raw_data = response.json()["data"]
+        if len(raw_data) != 1:
+            raise RuntimeError(
+                f"This account has access to multiple tenants `{raw_data}`. Please specify which one to use."
+            )
+        return raw_data[0]["id"]  # type: ignore
+    def _build_header(self) -> dict[str, str]:
+        return {"Authorization": f"Bearer {self._auth_token()}", "User-agent": f"neo4j-graphdatascience-v{__version__}"}
+    def _auth_token(self) -> str:
+        if self._token is None or self._token.is_expired():
+            self._token = self._update_token()
+        return self._token.access_token
+    def _update_token(self) -> AuraAuthToken:
+        data = {
+            "grant_type": "client_credentials",
+        }
+        self._logger.debug("Updating oauth token")
+        response = req.post(
+            f"{self._base_uri}/oauth/token", data=data, auth=(self._credentials[0], self._credentials[1])
+        )
+        response.raise_for_status()
+        return AuraApi.AuraAuthToken(response.json())
+    def _instance_type(self) -> str:
+        return "enterprise-ds" if not self._dev_env else "professional-ds"

graphdatascience-1.9/graphdatascience/gds_session/aura_graph_data_science.py ADDED Viewed

@@ -0,0 +1,181 @@
+from typing import Any, Callable, Dict, Optional
+from pandas import DataFrame
+from graphdatascience.call_builder import IndirectCallBuilder
+from graphdatascience.endpoints import AlphaEndpoints, BetaEndpoints, DirectEndpoints
+from graphdatascience.error.uncallable_namespace import UncallableNamespace
+from graphdatascience.gds_session.dbms_connection_info import DbmsConnectionInfo
+from graphdatascience.graph.graph_proc_runner import GraphRemoteProcRunner
+from graphdatascience.query_runner.arrow_query_runner import ArrowQueryRunner
+from graphdatascience.query_runner.aura_db_arrow_query_runner import (
+    AuraDbArrowQueryRunner,
+)
+from graphdatascience.query_runner.neo4j_query_runner import Neo4jQueryRunner
+from graphdatascience.server_version.server_version import ServerVersion
+class AuraGraphDataScience(DirectEndpoints, UncallableNamespace):
+    """
+    Primary API class for interacting with Neo4j AuraDB + Graph Data Science.
+    Always bind this object to a variable called `gds`.
+    """
+    def __init__(
+        self,
+        gds_session_connection_info: DbmsConnectionInfo,
+        aura_db_connection_info: DbmsConnectionInfo,
+        delete_fn: Callable[[], bool],
+        arrow_disable_server_verification: bool = True,
+        arrow_tls_root_certs: Optional[bytes] = None,
+        bookmarks: Optional[Any] = None,
+    ):
+        gds_neo4j_query_runner = Neo4jQueryRunner.create(
+            gds_session_connection_info.uri, gds_session_connection_info.auth(), aura_ds=True
+        )
+        gds_query_runner = ArrowQueryRunner.create(
+            gds_neo4j_query_runner,
+            gds_session_connection_info.auth(),
+            gds_neo4j_query_runner.encrypted(),
+            arrow_disable_server_verification,
+            arrow_tls_root_certs,
+        )
+        self._server_version = gds_query_runner.server_version()
+        if self._server_version < ServerVersion(2, 6, 0):
+            raise RuntimeError(
+                f"AuraDB connection info was provided but GDS version {self._server_version} \
+                    does not support connecting to AuraDB"
+            )
+        self._db_query_runner = Neo4jQueryRunner.create(
+            aura_db_connection_info.uri,
+            aura_db_connection_info.auth(),
+            aura_ds=True,
+            server_version=self._server_version,
+        )
+        self._db_query_runner.set_bookmarks(bookmarks)
+        # we need to explicitly set these as the default value is None
+        # which signals the driver to use the default configured database
+        # from the dbms.
+        gds_query_runner.set_database("neo4j")
+        self._db_query_runner.set_database("neo4j")
+        self._query_runner = AuraDbArrowQueryRunner(
+            gds_query_runner, self._db_query_runner, self._db_query_runner.encrypted(), aura_db_connection_info
+        )
+        self._delete_fn = delete_fn
+        super().__init__(self._query_runner, "gds", self._server_version)
+    def run_cypher(
+        self, query: str, params: Optional[Dict[str, Any]] = None, database: Optional[str] = None
+    ) -> DataFrame:
+        """
+        Run a Cypher query against the AuraDB instance.
+        Parameters
+        ----------
+        query: str
+            the Cypher query
+        params: Dict[str, Any]
+            parameters to the query
+        database: str
+            the database on which to run the query
+        Returns:
+            The query result as a DataFrame
+        """
+        # This will avoid calling valid gds procedures through a raw string
+        return self._db_query_runner.run_cypher(query, params, database, False)
+    @property
+    def graph(self) -> GraphRemoteProcRunner:
+        return GraphRemoteProcRunner(self._query_runner, f"{self._namespace}.graph", self._server_version)
+    @property
+    def alpha(self) -> AlphaEndpoints:
+        return AlphaEndpoints(self._query_runner, "gds.alpha", self._server_version)
+    @property
+    def beta(self) -> BetaEndpoints:
+        return BetaEndpoints(self._query_runner, "gds.beta", self._server_version)
+    def __getattr__(self, attr: str) -> IndirectCallBuilder:
+        return IndirectCallBuilder(self._query_runner, f"gds.{attr}", self._server_version)
+    def set_database(self, database: str) -> None:
+        """
+        Set the database which queries are run against.
+        Parameters
+        -------
+        database: str
+            The name of the database to run queries against.
+        """
+        self._db_query_runner.set_database(database)
+    def set_bookmarks(self, bookmarks: Any) -> None:
+        """
+        Set Neo4j bookmarks to require a certain state before the next query gets executed
+        Parameters
+        ----------
+        bookmarks: Bookmark(s)
+            The Neo4j bookmarks defining the required state
+        """
+        self._db_query_runner.set_bookmarks(bookmarks)
+    def database(self) -> Optional[str]:
+        """
+        Get the database which queries are run against.
+        Returns:
+            The name of the database.
+        """
+        return self._db_query_runner.database()
+    def bookmarks(self) -> Optional[Any]:
+        """
+        Get the Neo4j bookmarks defining the currently required states for queries to execute
+        Returns
+        -------
+        The (possibly None) Neo4j bookmarks defining the currently required state
+        """
+        return self._db_query_runner.bookmarks()
+    def last_bookmarks(self) -> Optional[Any]:
+        """
+        Get the Neo4j bookmarks defining the state following the most recently called query
+        Returns
+        -------
+        The (possibly None) Neo4j bookmarks defining the state following the most recently called query
+        """
+        return self._db_query_runner.last_bookmarks()
+    def driver_config(self) -> Dict[str, Any]:
+        """
+        Get the configuration used to create the underlying driver used to make queries to Neo4j.
+        Returns:
+            The configuration as a dictionary.
+        """
+        return self._query_runner.driver_config()
+    def delete(self) -> bool:
+        """
+        Delete a GDS session.
+        """
+        self.close()
+        return self._delete_fn()
+    def close(self) -> None:
+        """
+        Close the GraphDataScience object and release any resources held by it.
+        """
+        self._query_runner.close()

graphdatascience-1.9/graphdatascience/gds_session/dbms_connection_info.py ADDED Viewed

@@ -0,0 +1,14 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Tuple
+@dataclass
+class DbmsConnectionInfo:
+    uri: str
+    username: str
+    password: str
+    def auth(self) -> Tuple[str, str]:
+        return self.username, self.password

graphdatascience 1.8__tar.gz → 1.9__tar.gz

graphdatascience 1.8tar.gz → 1.9tar.gz