PyPI - graphdatascience - Versions diffs - 1.10a1__tar.gz → 1.11a1__tar.gz - Mend

graphdatascience 1.10a1tar.gz → 1.11a1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (141) hide show

{graphdatascience-1.10a1/graphdatascience.egg-info → graphdatascience-1.11a1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: graphdatascience
-Version: 1.10a1
+Version: 1.11a1
 Summary: A Python client for the Neo4j Graph Data Science (GDS) library
 Home-page: https://neo4j.com/product/graph-data-science/
 Author: Neo4j
@@ -31,7 +31,7 @@ License-File: LICENSE
 Requires-Dist: multimethod<2.0,>=1.0
 Requires-Dist: neo4j<6.0,>=4.4.2
 Requires-Dist: pandas<3.0,>=1.0
-Requires-Dist: pyarrow<15.0,>=10.0
+Requires-Dist: pyarrow<16.0,>=11.0
 Requires-Dist: textdistance<5.0,>=4.0
 Requires-Dist: tqdm<5.0,>=4.0
 Requires-Dist: typing-extensions<5.0,>=4.0

{graphdatascience-1.10a1 → graphdatascience-1.11a1}/graphdatascience/graph/base_graph_proc_runner.py RENAMED Viewed

@@ -18,6 +18,7 @@ from .graph_entity_ops_runner import (
     GraphElementPropertyRunner,
     GraphLabelRunner,
     GraphNodePropertiesRunner,
+    GraphNodePropertyRunner,
     GraphPropertyRunner,
     GraphRelationshipPropertiesRunner,
     GraphRelationshipRunner,
@@ -379,9 +380,9 @@ class BaseGraphProcRunner(UncallableNamespace, IllegalAttrChecker):
         )
     @property
-    def nodeProperty(self) -> GraphElementPropertyRunner:
+    def nodeProperty(self) -> GraphNodePropertyRunner:
         self._namespace += ".nodeProperty"
-        return GraphElementPropertyRunner(self._query_runner, self._namespace, self._server_version)
+        return GraphNodePropertyRunner(self._query_runner, self._namespace, self._server_version)
     @property
     def nodeProperties(self) -> GraphNodePropertiesRunner:
@@ -516,8 +517,7 @@ class BaseGraphProcRunner(UncallableNamespace, IllegalAttrChecker):
         ).squeeze()
     @multimethod
-    def removeNodeProperties(self) -> None:
-        ...
+    def removeNodeProperties(self) -> None: ...
     @removeNodeProperties.register
     @graph_type_check

{graphdatascience-1.10a1 → graphdatascience-1.11a1}/graphdatascience/graph/graph_entity_ops_runner.py RENAMED Viewed

@@ -77,6 +77,26 @@ class GraphElementPropertyRunner(GraphEntityOpsBaseRunner):
         return self._handle_properties(G, node_properties, node_labels, config)
+class GraphNodePropertyRunner(GraphEntityOpsBaseRunner):
+    @compatible_with("stream", min_inclusive=ServerVersion(2, 2, 0))
+    @filter_id_func_deprecation_warning()
+    def stream(
+        self,
+        G: Graph,
+        node_property: str,
+        node_labels: Strings = ["*"],
+        db_node_properties: List[str] = [],
+        **config: Any,
+    ) -> DataFrame:
+        self._namespace += ".stream"
+        result = self._handle_properties(G, node_property, node_labels, config)
+        return GraphNodePropertiesRunner._process_result(
+            self._query_runner, list(node_property), False, db_node_properties, result, config
+        )
 class GraphNodePropertiesRunner(GraphEntityOpsBaseRunner):
     @compatible_with("stream", min_inclusive=ServerVersion(2, 2, 0))
     @filter_id_func_deprecation_warning()
@@ -93,6 +113,19 @@ class GraphNodePropertiesRunner(GraphEntityOpsBaseRunner):
         result = self._handle_properties(G, node_properties, node_labels, config)
+        return GraphNodePropertiesRunner._process_result(
+            self._query_runner, node_properties, separate_property_columns, db_node_properties, result, config
+        )
+    @staticmethod
+    def _process_result(
+        query_runner: QueryRunner,
+        node_properties: List[str],
+        separate_property_columns: bool,
+        db_node_properties: List[str],
+        result: DataFrame,
+        config: Dict[str, Any],
+    ) -> DataFrame:
         # new format was requested, but the query was run via Cypher
         if separate_property_columns and "propertyValue" in result.keys():
             wide_result = result.pivot(index=["nodeId"], columns=["nodeProperty"], values="propertyValue")
@@ -106,7 +139,7 @@ class GraphNodePropertiesRunner(GraphEntityOpsBaseRunner):
         # old format was requested but the query was run via Arrow
         elif not separate_property_columns and "propertyValue" not in result.keys():
             id_vars = ["nodeId", "nodeLabels"] if config.get("listNodeLabels", False) else ["nodeId"]
-            result = result.melt(id_vars=id_vars).rename(columns={"variable": "nodeProperty", "value": "propertyValue"})
+            result = result.melt(id_vars=id_vars, var_name="nodeProperty", value_name="propertyValue")
         if db_node_properties:
             duplicate_properties = set(db_node_properties).intersection(set(node_properties))
@@ -116,16 +149,20 @@ class GraphNodePropertiesRunner(GraphEntityOpsBaseRunner):
                 )
             unique_node_ids = result["nodeId"].drop_duplicates().tolist()
-            db_properties_df = self._query_runner.run_cypher(
-                self._build_query(db_node_properties), {"ids": unique_node_ids}
+            db_properties_df = query_runner.run_cypher(
+                GraphNodePropertiesRunner._build_query(db_node_properties), {"ids": unique_node_ids}
             )
             if "propertyValue" not in result.keys():
                 result = result.join(db_properties_df.set_index("nodeId"), on="nodeId")
             else:
-                db_properties_df = db_properties_df.melt(id_vars=["nodeId"]).rename(
-                    columns={"variable": "nodeProperty", "value": "propertyValue"}
+                db_properties_df = db_properties_df.melt(
+                    id_vars=["nodeId"], var_name="nodeProperty", value_name="propertyValue"
                 )
+                if "nodeProperty" not in result.keys():
+                    result["nodeProperty"] = node_properties[0]
                 result = pd.concat([result, db_properties_df])
         return result

{graphdatascience-1.10a1 → graphdatascience-1.11a1}/graphdatascience/graph/graph_remote_project_runner.py RENAMED Viewed

@@ -8,13 +8,11 @@ from .graph_object import Graph
 from graphdatascience.call_parameters import CallParameters
 from graphdatascience.graph.graph_create_result import GraphCreateResult
 from graphdatascience.server_version.server_version import ServerVersion
-from graphdatascience.session.schema import (
-    NODE_PROPERTY_SCHEMA,
-    RELATIONSHIP_PROPERTY_SCHEMA,
-)
 class GraphProjectRemoteRunner(IllegalAttrChecker):
+    _SCHEMA_KEYS = ["nodePropertySchema", "relationshipPropertySchema"]
     @compatible_with("project", min_inclusive=ServerVersion(2, 6, 0))
     def __call__(self, graph_name: str, query: str, **config: Any) -> GraphCreateResult:
         placeholder = "<>"  # host and token will be added by query runner
@@ -35,6 +33,6 @@ class GraphProjectRemoteRunner(IllegalAttrChecker):
     @staticmethod
     def map_property_types(config: dict[str, Any]) -> None:
-        for key in [NODE_PROPERTY_SCHEMA, RELATIONSHIP_PROPERTY_SCHEMA]:
+        for key in GraphProjectRemoteRunner._SCHEMA_KEYS:
             if key in config:
                 config[key] = {k: v.value for k, v in config[key].items()}

{graphdatascience-1.10a1 → graphdatascience-1.11a1}/graphdatascience/graph_data_science.py RENAMED Viewed

@@ -23,11 +23,12 @@ class GraphDataScience(DirectEndpoints, UncallableNamespace):
     def __init__(
         self,
+        /,
         endpoint: Union[str, Driver, QueryRunner],
         auth: Optional[Tuple[str, str]] = None,
         aura_ds: bool = False,
         database: Optional[str] = None,
-        arrow: bool = True,
+        arrow: Union[str, bool] = True,
         arrow_disable_server_verification: bool = True,
         arrow_tls_root_certs: Optional[bytes] = None,
         bookmarks: Optional[Any] = None,
@@ -43,19 +44,20 @@ class GraphDataScience(DirectEndpoints, UncallableNamespace):
             A username, password pair for database authentication.
         aura_ds : bool, default False
             A flag that indicates that that the client is used to connect
-            to a Neo4j Aura instance.
+            to a Neo4j AuraDS instance.
         database: Optional[str], default None
             The Neo4j database to query against.
-        arrow : bool, default True
-            A flag that indicates that the client should use Apache Arrow
-            for data streaming if it is available on the server.
+        arrow : Union[str, bool], default True
+            Arrow connection information. This is either a bool or a string.
+            If it is a string, it will be interpreted as a connection URL to a GDS Arrow Server.
+            If it is a bool,
+                True will make the client discover the connection URI to the GDS Arrow server via the Neo4j endpoint,
+                while False will make the client use Bolt for all operations.
         arrow_disable_server_verification : bool, default True
-            A flag that indicates that, if the flight client is connecting with
-            TLS, that it skips server verification. If this is enabled, all
-            other TLS settings are overridden.
+            A flag that overrides other TLS settings and disables server verification for TLS connections.
         arrow_tls_root_certs : Optional[bytes], default None
-            PEM-encoded certificates that are used for the connecting to the
-            Arrow Flight server.
+            PEM-encoded certificates that are used for the connection to the
+            GDS Arrow Flight server.
         bookmarks : Optional[Any], default None
             The Neo4j bookmarks to require a certain state before the next query gets executed.
         """
@@ -76,6 +78,7 @@ class GraphDataScience(DirectEndpoints, UncallableNamespace):
                 self._query_runner.encrypted(),
                 arrow_disable_server_verification,
                 arrow_tls_root_certs,
+                None if arrow is True else arrow,
             )
         super().__init__(self._query_runner, "gds", self._server_version)

{graphdatascience-1.10a1 → graphdatascience-1.11a1}/graphdatascience/query_runner/arrow_graph_constructor.py RENAMED Viewed

@@ -1,9 +1,11 @@
+from __future__ import annotations
 import concurrent
 import json
 import math
 import warnings
 from concurrent.futures import ThreadPoolExecutor
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, NoReturn, Optional
 import numpy
 import pyarrow.flight as flight
@@ -93,7 +95,7 @@ class ArrowGraphConstructor(GraphConstructor):
         json.loads(collected_result[0].body.to_pybytes().decode())
-    def _send_df(self, df: DataFrame, entity_type: str, pbar: tqdm) -> None:
+    def _send_df(self, df: DataFrame, entity_type: str, pbar: tqdm[NoReturn]) -> None:
         table = Table.from_pandas(df)
         batches = table.to_batches(self._chunk_size)
         flight_descriptor = {"name": self._graph_name, "entity_type": entity_type}
@@ -108,6 +110,8 @@ class ArrowGraphConstructor(GraphConstructor):
             for partition in batches:
                 writer.write_batch(partition)
                 pbar.update(partition.num_rows)
+        # Force a refresh to avoid the progress bar getting stuck at 0%
+        pbar.refresh()
     def _send_dfs(self, dfs: List[DataFrame], entity_type: str) -> None:
         desc = "Uploading Nodes" if entity_type == "node" else "Uploading Relationships"

{graphdatascience-1.10a1 → graphdatascience-1.11a1}/graphdatascience/query_runner/arrow_query_runner.py RENAMED Viewed

@@ -31,17 +31,22 @@ class ArrowQueryRunner(QueryRunner):
         encrypted: bool = False,
         disable_server_verification: bool = False,
         tls_root_certs: Optional[bytes] = None,
+        connection_string_override: Optional[str] = None,
     ) -> QueryRunner:
         arrow_info = (
             fallback_query_runner.call_procedure(endpoint="gds.debug.arrow", custom_error=False).squeeze().to_dict()
         )
         server_version = fallback_query_runner.server_version()
-        listen_address: str = arrow_info.get("advertisedListenAddress", arrow_info["listenAddress"])
+        connection_string: str
+        if connection_string_override is not None:
+            connection_string = connection_string_override
+        else:
+            connection_string = arrow_info.get("advertisedListenAddress", arrow_info["listenAddress"])
         arrow_endpoint_version = ArrowEndpointVersion.from_arrow_info(arrow_info.get("versions", []))
         if arrow_info["running"]:
             return ArrowQueryRunner(
-                listen_address,
+                connection_string,
                 fallback_query_runner,
                 server_version,
                 auth,
@@ -277,7 +282,7 @@ class ArrowQueryRunner(QueryRunner):
         if self._arrow_endpoint_version == ArrowEndpointVersion.V1:
             payload = {
-                "name": "GET_MESSAGE",
+                "name": "GET_COMMAND",
                 "version": ArrowEndpointVersion.V1.version(),
                 "body": payload,
             }
@@ -320,6 +325,10 @@ class ArrowQueryRunner(QueryRunner):
         )
     def _sanitize_arrow_table(self, arrow_table: Table) -> Table:
+        # empty columns cannot be used to build a chunked_array in pyarrow
+        if len(arrow_table) == 0:
+            return arrow_table
         dict_encoded_fields = [
             (idx, field) for idx, field in enumerate(arrow_table.schema) if is_dictionary(field.type)
         ]
@@ -368,10 +377,17 @@ class AuthMiddleware(ClientMiddleware):  # type: ignore
         self._factory = factory
     def received_headers(self, headers: Dict[str, Any]) -> None:
-        auth_header: str = headers.get("Authorization", None)
+        auth_header = headers.get("authorization", None)
         if not auth_header:
             return
-        [auth_type, token] = auth_header.split(" ", 1)
+        # the result is always a list
+        header_value = auth_header[0]
+        if not isinstance(header_value, str):
+            raise ValueError(f"Incompatible header value received from server: `{header_value}`")
+        auth_type, token = header_value.split(" ", 1)
         if auth_type == "Bearer":
             self._factory.set_token(token)

{graphdatascience-1.10a1 → graphdatascience-1.11a1}/graphdatascience/query_runner/neo4j_query_runner.py RENAMED Viewed

@@ -5,7 +5,7 @@ import re
 import time
 import warnings
 from concurrent.futures import Future, ThreadPoolExecutor, wait
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, NoReturn, Optional, Tuple, Union
 from uuid import uuid4
 import neo4j
@@ -46,18 +46,20 @@ class Neo4jQueryRunner(QueryRunner):
             driver = neo4j.GraphDatabase.driver(endpoint, auth=auth, **config)
             query_runner = Neo4jQueryRunner(
-                driver, auto_close=True, bookmarks=bookmarks, config=config, server_version=server_version
+                driver,
+                auto_close=True,
+                bookmarks=bookmarks,
+                config=config,
+                server_version=server_version,
+                database=database,
             )
         elif isinstance(endpoint, neo4j.Driver):
-            query_runner = Neo4jQueryRunner(endpoint, auto_close=False, bookmarks=bookmarks)
+            query_runner = Neo4jQueryRunner(endpoint, auto_close=False, bookmarks=bookmarks, database=database)
         else:
             raise ValueError(f"Invalid endpoint type: {type(endpoint)}")
-        if database:
-            query_runner.set_database(database)
         return query_runner
     @staticmethod
@@ -97,7 +99,7 @@ class Neo4jQueryRunner(QueryRunner):
         if database is None:
             database = self._database
-        self._verify_connectivity()
+        self._verify_connectivity(database=database)
         with self._driver.session(database=database, bookmarks=self.bookmarks()) as session:
             try:
@@ -224,14 +226,19 @@ class Neo4jQueryRunner(QueryRunner):
             self._logger.info(notification)
     def _log(self, job_id: str, future: "Future[Any]", database: Optional[str] = None) -> None:
-        pbar = None
+        pbar: Optional[tqdm[NoReturn]] = None
         warn_if_failure = True
         while wait([future], timeout=self._LOG_POLLING_INTERVAL).not_done:
             try:
                 tier = "beta." if self._server_version < ServerVersion(2, 5, 0) else ""
+                # we only retrieve the progress of the root task
                 progress = self.run_cypher(
-                    f"CALL gds.{tier}listProgress('{job_id}') YIELD taskName, progress", database=database
+                    f"CALL gds.{tier}listProgress('{job_id}')"
+                    + " YIELD taskName, progress"
+                    + " RETURN taskName, progress"
+                    + " LIMIT 1",
+                    database=database,
                 )
             except Exception as e:
                 # Do nothing if the procedure either:
@@ -246,17 +253,19 @@ class Neo4jQueryRunner(QueryRunner):
                     continue
             progress_percent = progress["progress"][0]
-            if not progress_percent == "n/a":
-                task_name = progress["taskName"][0].split("|--")[-1][1:]
-                pbar = pbar or tqdm(total=100, unit="%", desc=task_name)
-            else:
+            if progress_percent == "n/a":
                 return
+            root_task_name = progress["taskName"][0].split("|--")[-1][1:]
+            if not pbar:
+                pbar = tqdm(total=100, unit="%", desc=root_task_name, maxinterval=self._LOG_POLLING_INTERVAL)
             parsed_progress = float(progress_percent[:-1])
             pbar.update(parsed_progress - pbar.n)
         if pbar:
             pbar.update(100 - pbar.n)
+            pbar.refresh()
     def set_database(self, database: str) -> None:
         self._database = database
@@ -303,11 +312,14 @@ class Neo4jQueryRunner(QueryRunner):
         raise SyntaxError(generate_suggestive_error_message(requested_endpoint, all_endpoints)) from e
-    def _verify_connectivity(self) -> None:
+    def _verify_connectivity(self, database: Optional[str] = None) -> None:
         WAIT_TIME = 1
         MAX_RETRYS = 10 * 60
         WARN_INTERVAL = 10
+        if database is None:
+            database = self._database
         exception = None
         retrys = 0
         while retrys < MAX_RETRYS:
@@ -318,7 +330,16 @@ class Neo4jQueryRunner(QueryRunner):
                         category=neo4j.ExperimentalWarning,
                         message=r"^The configuration may change in the future.$",
                     )
-                self._driver.verify_connectivity()
+                else:
+                    warnings.filterwarnings(
+                        "ignore",
+                        category=neo4j.ExperimentalWarning,
+                        message=(
+                            r"^All configuration key-word arguments to verify_connectivity\(\) are experimental. "
+                            "They might be changed or removed in any future version without prior notice.$"
+                        ),
+                    )
+                self._driver.verify_connectivity(database=database)
                 break
             except neo4j.exceptions.DriverError as e:
                 exception = e

graphdatascience-1.11a1/graphdatascience/session/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+from .algorithm_category import AlgorithmCategory
+from .dbms_connection_info import DbmsConnectionInfo
+from .gds_sessions import AuraAPICredentials, GdsSessions
+from .schema import GdsPropertyTypes
+from .session_info import SessionInfo
+from .session_sizes import SessionMemory
+__all__ = [
+    "GdsSessions",
+    "SessionInfo",
+    "DbmsConnectionInfo",
+    "AuraAPICredentials",
+    "SessionMemory",
+    "GdsPropertyTypes",
+    "AlgorithmCategory",
+]

graphdatascience-1.11a1/graphdatascience/session/algorithm_category.py ADDED Viewed

@@ -0,0 +1,14 @@
+from enum import Enum
+class AlgorithmCategory(Enum):
+    """
+    Enumeration of supported algorithm categories used for size estimation.
+    """
+    CENTRALITY = "centrality"
+    COMMUNITY_DETECTION = "community-detection"
+    MACHINE_LEARNING = "machine-learning"
+    NODE_EMBEDDING = "node-embedding"
+    PATH_FINDING = "path-finding"
+    SIMILARITY = "similarity"

graphdatascience 1.10a1__tar.gz → 1.11a1__tar.gz

graphdatascience 1.10a1tar.gz → 1.11a1tar.gz