PyPI - graphdatascience - Versions diffs - 1.9__tar.gz → 1.10__tar.gz - Mend

graphdatascience 1.9tar.gz → 1.10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (134) hide show

{graphdatascience-1.9/graphdatascience.egg-info → graphdatascience-1.10}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: graphdatascience
-Version: 1.9
+Version: 1.10
 Summary: A Python client for the Neo4j Graph Data Science (GDS) library
 Home-page: https://neo4j.com/product/graph-data-science/
 Author: Neo4j
@@ -20,6 +20,7 @@ Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
 Classifier: Topic :: Database
 Classifier: Topic :: Scientific/Engineering
 Classifier: Topic :: Software Development
@@ -30,7 +31,7 @@ License-File: LICENSE
 Requires-Dist: multimethod<2.0,>=1.0
 Requires-Dist: neo4j<6.0,>=4.4.2
 Requires-Dist: pandas<3.0,>=1.0
-Requires-Dist: pyarrow<15.0,>=10.0
+Requires-Dist: pyarrow<16.0,>=11.0
 Requires-Dist: textdistance<5.0,>=4.0
 Requires-Dist: tqdm<5.0,>=4.0
 Requires-Dist: typing-extensions<5.0,>=4.0

{graphdatascience-1.9 → graphdatascience-1.10}/graphdatascience/__init__.py RENAMED Viewed

@@ -1,4 +1,3 @@
-from .gds_session.gds_sessions import GdsSessions
 from .graph.graph_create_result import GraphCreateResult
 from .graph.graph_object import Graph
 from .graph_data_science import GraphDataScience
@@ -13,6 +12,7 @@ from .pipeline.nc_training_pipeline import NCTrainingPipeline
 from .pipeline.nr_training_pipeline import NRTrainingPipeline
 from .query_runner.query_runner import QueryRunner
 from .server_version.server_version import ServerVersion
+from .session.gds_sessions import GdsSessions
 from .version import __version__
 __all__ = [

{graphdatascience-1.9 → graphdatascience-1.10}/graphdatascience/error/endpoint_suggester.py RENAMED Viewed

@@ -9,7 +9,7 @@ def generate_suggestive_error_message(requested_endpoint: str, all_endpoints: Li
     MIN_SIMILARITY_FOR_SUGGESTION = 0.9
     closest_endpoint = None
-    curr_max_similarity = 0
+    curr_max_similarity = 0.0
     for ep in all_endpoints:
         similarity = textdistance.jaro_winkler(requested_endpoint, ep)
         if similarity >= MIN_SIMILARITY_FOR_SUGGESTION:

graphdatascience-1.9/graphdatascience/graph/graph_proc_runner.py → graphdatascience-1.10/graphdatascience/graph/base_graph_proc_runner.py RENAMED Viewed

@@ -1,6 +1,7 @@
 import os
 import pathlib
 import sys
+import warnings
 from typing import Any, Dict, List, Optional, Union
 import pandas as pd
@@ -17,6 +18,7 @@ from .graph_entity_ops_runner import (
     GraphElementPropertyRunner,
     GraphLabelRunner,
     GraphNodePropertiesRunner,
+    GraphNodePropertyRunner,
     GraphPropertyRunner,
     GraphRelationshipPropertiesRunner,
     GraphRelationshipRunner,
@@ -24,7 +26,6 @@ from .graph_entity_ops_runner import (
 )
 from .graph_export_runner import GraphExportRunner
 from .graph_object import Graph
-from .graph_project_runner import GraphProjectRemoteRunner, GraphProjectRunner
 from .graph_sample_runner import GraphSampleRunner
 from .graph_type_check import (
     from_graph_type_check,
@@ -34,7 +35,6 @@ from .graph_type_check import (
 from .ogb_loader import OGBLLoader, OGBNLoader
 from graphdatascience.call_parameters import CallParameters
 from graphdatascience.graph.graph_create_result import GraphCreateResult
-from graphdatascience.graph.graph_cypher_runner import GraphCypherRunner
 Strings = Union[str, List[str]]
@@ -42,6 +42,15 @@ is_neo4j_4_driver = ServerVersion.from_string(neo4j_driver_version) < ServerVers
 class BaseGraphProcRunner(UncallableNamespace, IllegalAttrChecker):
+    def __init__(self, query_runner: Any, namespace: str, server_version: ServerVersion):
+        super().__init__(query_runner, namespace, server_version)
+        # Pandas 2.2.0 deprecated an API used by ArrowTable.to_pandas() (< pyarrow 14.0)
+        warnings.filterwarnings(
+            "ignore",
+            category=DeprecationWarning,
+            message=r"Passing a BlockManager to DataFrame is deprecated",
+        )
     @staticmethod
     def _path(package: str, resource: str) -> pathlib.Path:
         if sys.version_info >= (3, 9):
@@ -371,9 +380,9 @@ class BaseGraphProcRunner(UncallableNamespace, IllegalAttrChecker):
         )
     @property
-    def nodeProperty(self) -> GraphElementPropertyRunner:
+    def nodeProperty(self) -> GraphNodePropertyRunner:
         self._namespace += ".nodeProperty"
-        return GraphElementPropertyRunner(self._query_runner, self._namespace, self._server_version)
+        return GraphNodePropertyRunner(self._query_runner, self._namespace, self._server_version)
     @property
     def nodeProperties(self) -> GraphNodePropertiesRunner:
@@ -558,22 +567,3 @@ class BaseGraphProcRunner(UncallableNamespace, IllegalAttrChecker):
             endpoint=self._namespace,
             params=params,
         ).squeeze()
-class GraphProcRunner(BaseGraphProcRunner):
-    @property
-    def project(self) -> GraphProjectRunner:
-        self._namespace += ".project"
-        return GraphProjectRunner(self._query_runner, self._namespace, self._server_version)
-    @property
-    def cypher(self) -> GraphCypherRunner:
-        self._namespace += ".project"
-        return GraphCypherRunner(self._query_runner, self._namespace, self._server_version)
-class GraphRemoteProcRunner(BaseGraphProcRunner):
-    @property
-    def project(self) -> GraphProjectRemoteRunner:
-        self._namespace += ".project.remoteDb"
-        return GraphProjectRemoteRunner(self._query_runner, self._namespace, self._server_version)

{graphdatascience-1.9 → graphdatascience-1.10}/graphdatascience/graph/graph_entity_ops_runner.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from functools import reduce
 from typing import Any, Dict, List, Type, Union
+from warnings import filterwarnings
 import pandas as pd
 from pandas import DataFrame, Series
@@ -26,6 +27,13 @@ class TopologyDataFrame(DataFrame):
         return TopologyDataFrame
     def by_rel_type(self) -> Dict[str, List[List[int]]]:
+        # Pandas 2.2.0 deprecated an internal API used by DF.take(indices)
+        filterwarnings(
+            "ignore",
+            category=DeprecationWarning,
+            message=r"Passing a BlockManager to TopologyDataFrame is deprecated",
+        )
         gb = self.groupby("relationshipType", observed=True)
         output = {}
@@ -69,6 +77,26 @@ class GraphElementPropertyRunner(GraphEntityOpsBaseRunner):
         return self._handle_properties(G, node_properties, node_labels, config)
+class GraphNodePropertyRunner(GraphEntityOpsBaseRunner):
+    @compatible_with("stream", min_inclusive=ServerVersion(2, 2, 0))
+    @filter_id_func_deprecation_warning()
+    def stream(
+        self,
+        G: Graph,
+        node_property: str,
+        node_labels: Strings = ["*"],
+        db_node_properties: List[str] = [],
+        **config: Any,
+    ) -> DataFrame:
+        self._namespace += ".stream"
+        result = self._handle_properties(G, node_property, node_labels, config)
+        return GraphNodePropertiesRunner._process_result(
+            self._query_runner, list(node_property), False, db_node_properties, result, config
+        )
 class GraphNodePropertiesRunner(GraphEntityOpsBaseRunner):
     @compatible_with("stream", min_inclusive=ServerVersion(2, 2, 0))
     @filter_id_func_deprecation_warning()
@@ -85,6 +113,19 @@ class GraphNodePropertiesRunner(GraphEntityOpsBaseRunner):
         result = self._handle_properties(G, node_properties, node_labels, config)
+        return GraphNodePropertiesRunner._process_result(
+            self._query_runner, node_properties, separate_property_columns, db_node_properties, result, config
+        )
+    @staticmethod
+    def _process_result(
+        query_runner: QueryRunner,
+        node_properties: List[str],
+        separate_property_columns: bool,
+        db_node_properties: List[str],
+        result: DataFrame,
+        config: Dict[str, Any],
+    ) -> DataFrame:
         # new format was requested, but the query was run via Cypher
         if separate_property_columns and "propertyValue" in result.keys():
             wide_result = result.pivot(index=["nodeId"], columns=["nodeProperty"], values="propertyValue")
@@ -98,7 +139,7 @@ class GraphNodePropertiesRunner(GraphEntityOpsBaseRunner):
         # old format was requested but the query was run via Arrow
         elif not separate_property_columns and "propertyValue" not in result.keys():
             id_vars = ["nodeId", "nodeLabels"] if config.get("listNodeLabels", False) else ["nodeId"]
-            result = result.melt(id_vars=id_vars).rename(columns={"variable": "nodeProperty", "value": "propertyValue"})
+            result = result.melt(id_vars=id_vars, var_name="nodeProperty", value_name="propertyValue")
         if db_node_properties:
             duplicate_properties = set(db_node_properties).intersection(set(node_properties))
@@ -108,16 +149,20 @@ class GraphNodePropertiesRunner(GraphEntityOpsBaseRunner):
                 )
             unique_node_ids = result["nodeId"].drop_duplicates().tolist()
-            db_properties_df = self._query_runner.run_cypher(
-                self._build_query(db_node_properties), {"ids": unique_node_ids}
+            db_properties_df = query_runner.run_cypher(
+                GraphNodePropertiesRunner._build_query(db_node_properties), {"ids": unique_node_ids}
             )
             if "propertyValue" not in result.keys():
                 result = result.join(db_properties_df.set_index("nodeId"), on="nodeId")
             else:
-                db_properties_df = db_properties_df.melt(id_vars=["nodeId"]).rename(
-                    columns={"variable": "nodeProperty", "value": "propertyValue"}
+                db_properties_df = db_properties_df.melt(
+                    id_vars=["nodeId"], var_name="nodeProperty", value_name="propertyValue"
                 )
+                if "nodeProperty" not in result.keys():
+                    result["nodeProperty"] = node_properties[0]
                 result = pd.concat([result, db_properties_df])
         return result
@@ -242,13 +287,13 @@ class ToUndirectedRunner(IllegalAttrChecker):
     @graph_type_check
     def __call__(self, G: Graph, relationship_type: str, mutate_relationship_type: str, **config: Any) -> "Series[Any]":
-        return self._run_procedure(G, relationship_type, mutate_relationship_type)
+        return self._run_procedure(G, relationship_type, mutate_relationship_type, **config)
     @graph_type_check
     @compatible_with("estimate", min_inclusive=ServerVersion(2, 3, 0))
     def estimate(self, G: Graph, relationship_type: str, mutate_relationship_type: str, **config: Any) -> "Series[Any]":
         self._namespace += ".estimate"
-        return self._run_procedure(G, relationship_type, mutate_relationship_type)
+        return self._run_procedure(G, relationship_type, mutate_relationship_type, **config)
 class GraphRelationshipsRunner(GraphEntityOpsBaseRunner):

graphdatascience-1.10/graphdatascience/graph/graph_proc_runner.py ADDED Viewed

@@ -0,0 +1,15 @@
+from .graph_project_runner import GraphProjectRunner
+from graphdatascience.graph.base_graph_proc_runner import BaseGraphProcRunner
+from graphdatascience.graph.graph_cypher_runner import GraphCypherRunner
+class GraphProcRunner(BaseGraphProcRunner):
+    @property
+    def project(self) -> GraphProjectRunner:
+        self._namespace += ".project"
+        return GraphProjectRunner(self._query_runner, self._namespace, self._server_version)
+    @property
+    def cypher(self) -> GraphCypherRunner:
+        self._namespace += ".project"
+        return GraphCypherRunner(self._query_runner, self._namespace, self._server_version)

{graphdatascience-1.9 → graphdatascience-1.10}/graphdatascience/graph/graph_project_runner.py RENAMED Viewed

@@ -5,13 +5,10 @@ from typing import Any
 from pandas import Series
 from ..error.illegal_attr_checker import IllegalAttrChecker
-from ..gds_session.schema import NODE_PROPERTY_SCHEMA, RELATIONSHIP_PROPERTY_SCHEMA
 from .graph_object import Graph
 from .graph_type_check import from_graph_type_check
 from graphdatascience.call_parameters import CallParameters
 from graphdatascience.graph.graph_create_result import GraphCreateResult
-from graphdatascience.server_version.compatible_with import compatible_with
-from graphdatascience.server_version.server_version import ServerVersion
 class GraphProjectRunner(IllegalAttrChecker):
@@ -73,29 +70,3 @@ class GraphProjectBetaRunner(IllegalAttrChecker):
         ).squeeze()
         return GraphCreateResult(Graph(graph_name, self._query_runner, self._server_version), result)
-class GraphProjectRemoteRunner(IllegalAttrChecker):
-    @compatible_with("project", min_inclusive=ServerVersion(2, 6, 0))
-    def __call__(self, graph_name: str, query: str, **config: Any) -> GraphCreateResult:
-        placeholder = "<>"  # host and token will be added by query runner
-        self.map_property_types(config)
-        params = CallParameters(
-            graph_name=graph_name,
-            query=query,
-            token=placeholder,
-            host=placeholder,
-            remote_database=self._query_runner.database(),
-            config=config,
-        )
-        result = self._query_runner.call_procedure(
-            endpoint=self._namespace,
-            params=params,
-        ).squeeze()
-        return GraphCreateResult(Graph(graph_name, self._query_runner, self._server_version), result)
-    @staticmethod
-    def map_property_types(config: dict[str, Any]) -> None:
-        for key in [NODE_PROPERTY_SCHEMA, RELATIONSHIP_PROPERTY_SCHEMA]:
-            if key in config:
-                config[key] = {k: v.value for k, v in config[key].items()}

graphdatascience-1.10/graphdatascience/graph/graph_remote_proc_runner.py ADDED Viewed

@@ -0,0 +1,9 @@
+from graphdatascience.graph.base_graph_proc_runner import BaseGraphProcRunner
+from graphdatascience.graph.graph_remote_project_runner import GraphProjectRemoteRunner
+class GraphRemoteProcRunner(BaseGraphProcRunner):
+    @property
+    def project(self) -> GraphProjectRemoteRunner:
+        self._namespace += ".project.remoteDb"
+        return GraphProjectRemoteRunner(self._query_runner, self._namespace, self._server_version)

graphdatascience-1.10/graphdatascience/graph/graph_remote_project_runner.py ADDED Viewed

@@ -0,0 +1,38 @@
+from __future__ import annotations
+from typing import Any
+from ..error.illegal_attr_checker import IllegalAttrChecker
+from ..server_version.compatible_with import compatible_with
+from .graph_object import Graph
+from graphdatascience.call_parameters import CallParameters
+from graphdatascience.graph.graph_create_result import GraphCreateResult
+from graphdatascience.server_version.server_version import ServerVersion
+class GraphProjectRemoteRunner(IllegalAttrChecker):
+    _SCHEMA_KEYS = ["nodePropertySchema", "relationshipPropertySchema"]
+    @compatible_with("project", min_inclusive=ServerVersion(2, 6, 0))
+    def __call__(self, graph_name: str, query: str, **config: Any) -> GraphCreateResult:
+        placeholder = "<>"  # host and token will be added by query runner
+        self.map_property_types(config)
+        params = CallParameters(
+            graph_name=graph_name,
+            query=query,
+            token=placeholder,
+            host=placeholder,
+            remote_database=self._query_runner.database(),
+            config=config,
+        )
+        result = self._query_runner.call_procedure(
+            endpoint=self._namespace,
+            params=params,
+        ).squeeze()
+        return GraphCreateResult(Graph(graph_name, self._query_runner, self._server_version), result)
+    @staticmethod
+    def map_property_types(config: dict[str, Any]) -> None:
+        for key in GraphProjectRemoteRunner._SCHEMA_KEYS:
+            if key in config:
+                config[key] = {k: v.value for k, v in config[key].items()}

{graphdatascience-1.9 → graphdatascience-1.10}/graphdatascience/graph/ogb_loader.py RENAMED Viewed

@@ -314,8 +314,8 @@ class OGBLLoader(OGBLoader):
                 assert source_labels[i] == source_label
                 assert target_labels[i] == target_label
-                source_ids[i] += node_id_offsets[edges["head_type"][i]]
-                target_ids[i] += node_id_offsets[edges["tail_type"][i]]
+                source_ids[i] += node_id_offsets[edges["head_type"][i]] + edges["head"][i]
+                target_ids[i] += node_id_offsets[edges["tail_type"][i]] + edges["tail"][i]
                 rel_types.append(f"{edge_type}_{set_type.upper()}")

{graphdatascience-1.9 → graphdatascience-1.10}/graphdatascience/graph_data_science.py RENAMED Viewed

@@ -23,11 +23,12 @@ class GraphDataScience(DirectEndpoints, UncallableNamespace):
     def __init__(
         self,
+        /,
         endpoint: Union[str, Driver, QueryRunner],
         auth: Optional[Tuple[str, str]] = None,
         aura_ds: bool = False,
         database: Optional[str] = None,
-        arrow: bool = True,
+        arrow: Union[str, bool] = True,
         arrow_disable_server_verification: bool = True,
         arrow_tls_root_certs: Optional[bytes] = None,
         bookmarks: Optional[Any] = None,
@@ -46,9 +47,10 @@ class GraphDataScience(DirectEndpoints, UncallableNamespace):
             to a Neo4j Aura instance.
         database: Optional[str], default None
             The Neo4j database to query against.
-        arrow : bool, default True
-            A flag that indicates that the client should use Apache Arrow
-            for data streaming if it is available on the server.
+        arrow : Union[str, bool], default True
+            Arrow connection information. Either a flag that indicates whether the client should use Apache Arrow
+            for data streaming if it is available on the server. True means discover the connection URI from the server.
+            A connection URI (str) can also be provided.
         arrow_disable_server_verification : bool, default True
             A flag that indicates that, if the flight client is connecting with
             TLS, that it skips server verification. If this is enabled, all
@@ -76,6 +78,7 @@ class GraphDataScience(DirectEndpoints, UncallableNamespace):
                 self._query_runner.encrypted(),
                 arrow_disable_server_verification,
                 arrow_tls_root_certs,
+                None if arrow is True else arrow,
             )
         super().__init__(self._query_runner, "gds", self._server_version)

graphdatascience-1.10/graphdatascience/query_runner/arrow_endpoint_version.py ADDED Viewed

@@ -0,0 +1,35 @@
+from __future__ import annotations
+from enum import Enum
+from typing import List
+class ArrowEndpointVersion(Enum):
+    ALPHA = ""
+    V1 = "v1/"
+    def version(self) -> str:
+        return self._name_.lower()
+    def prefix(self) -> str:
+        return self._value_
+    @staticmethod
+    def from_arrow_info(supported_arrow_versions: List[str]) -> ArrowEndpointVersion:
+        # Fallback for pre 2.6.0 servers that do not support versions
+        if len(supported_arrow_versions) == 0:
+            return ArrowEndpointVersion.ALPHA
+        # If the server supports versioned endpoints, we try v1 first
+        if ArrowEndpointVersion.V1.version() in supported_arrow_versions:
+            return ArrowEndpointVersion.V1
+        if ArrowEndpointVersion.ALPHA.version() in supported_arrow_versions:
+            return ArrowEndpointVersion.ALPHA
+        raise UnsupportedArrowEndpointVersion(supported_arrow_versions)
+class UnsupportedArrowEndpointVersion(Exception):
+    def __init__(self, server_version: List[str]) -> None:
+        super().__init__(self, f"Unsupported Arrow endpoint versions: {server_version}")

{graphdatascience-1.9 → graphdatascience-1.10}/graphdatascience/query_runner/arrow_graph_constructor.py RENAMED Viewed

@@ -1,9 +1,11 @@
+from __future__ import annotations
 import concurrent
 import json
 import math
 import warnings
 from concurrent.futures import ThreadPoolExecutor
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, NoReturn, Optional
 import numpy
 import pyarrow.flight as flight
@@ -11,6 +13,7 @@ from pandas import DataFrame
 from pyarrow import Table
 from tqdm.auto import tqdm
+from .arrow_endpoint_version import ArrowEndpointVersion
 from .graph_constructor import GraphConstructor
@@ -21,6 +24,7 @@ class ArrowGraphConstructor(GraphConstructor):
         graph_name: str,
         flight_client: flight.FlightClient,
         concurrency: int,
+        arrow_endpoint_version: ArrowEndpointVersion,
         undirected_relationship_types: Optional[List[str]],
         chunk_size: int = 10_000,
     ):
@@ -28,6 +32,7 @@ class ArrowGraphConstructor(GraphConstructor):
         self._concurrency = concurrency
         self._graph_name = graph_name
         self._client = flight_client
+        self._arrow_endpoint_version = arrow_endpoint_version
         self._undirected_relationship_types = (
             [] if undirected_relationship_types is None else undirected_relationship_types
         )
@@ -81,6 +86,7 @@ class ArrowGraphConstructor(GraphConstructor):
         return partitioned_dfs
     def _send_action(self, action_type: str, meta_data: Dict[str, Any]) -> None:
+        action_type = self._versioned_action_type(action_type)
         result = self._client.do_action(flight.Action(action_type, json.dumps(meta_data).encode("utf-8")))
         # Consume result fully to sanity check and avoid cancelled streams
@@ -89,10 +95,11 @@ class ArrowGraphConstructor(GraphConstructor):
         json.loads(collected_result[0].body.to_pybytes().decode())
-    def _send_df(self, df: DataFrame, entity_type: str, pbar: tqdm) -> None:
+    def _send_df(self, df: DataFrame, entity_type: str, pbar: tqdm[NoReturn]) -> None:
         table = Table.from_pandas(df)
         batches = table.to_batches(self._chunk_size)
         flight_descriptor = {"name": self._graph_name, "entity_type": entity_type}
+        flight_descriptor = self._versioned_flight_desriptor(flight_descriptor)
         # Write schema
         upload_descriptor = flight.FlightDescriptor.for_command(json.dumps(flight_descriptor).encode("utf-8"))
@@ -103,6 +110,8 @@ class ArrowGraphConstructor(GraphConstructor):
             for partition in batches:
                 writer.write_batch(partition)
                 pbar.update(partition.num_rows)
+        # Force a refresh to avoid the progress bar getting stuck at 0%
+        pbar.refresh()
     def _send_dfs(self, dfs: List[DataFrame], entity_type: str) -> None:
         desc = "Uploading Nodes" if entity_type == "node" else "Uploading Relationships"
@@ -117,3 +126,17 @@ class ArrowGraphConstructor(GraphConstructor):
                 if not future.exception():
                     continue
                 raise future.exception()  # type: ignore
+    def _versioned_action_type(self, action_type: str) -> str:
+        return self._arrow_endpoint_version.prefix() + action_type
+    def _versioned_flight_desriptor(self, flight_descriptor: Dict[str, Any]) -> Dict[str, Any]:
+        return (
+            flight_descriptor
+            if self._arrow_endpoint_version == ArrowEndpointVersion.ALPHA
+            else {
+                "name": "PUT_MESSAGE",
+                "version": ArrowEndpointVersion.V1.version(),
+                "body": flight_descriptor,
+            }
+        )

{graphdatascience-1.9 → graphdatascience-1.10}/graphdatascience/query_runner/arrow_query_runner.py RENAMED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import base64
 import json
 import time
@@ -5,13 +7,14 @@ import warnings
 from typing import Any, Dict, List, Optional, Tuple
 import pyarrow.flight as flight
-from pandas import DataFrame, Series
+from pandas import DataFrame
 from pyarrow import ChunkedArray, Table, chunked_array
 from pyarrow.flight import ClientMiddleware, ClientMiddlewareFactory
 from pyarrow.types import is_dictionary  # type: ignore
 from ..call_parameters import CallParameters
 from ..server_version.server_version import ServerVersion
+from .arrow_endpoint_version import ArrowEndpointVersion
 from .arrow_graph_constructor import ArrowGraphConstructor
 from .graph_constructor import GraphConstructor
 from .query_runner import QueryRunner
@@ -28,27 +31,29 @@ class ArrowQueryRunner(QueryRunner):
         encrypted: bool = False,
         disable_server_verification: bool = False,
         tls_root_certs: Optional[bytes] = None,
-    ) -> "QueryRunner":
+        connection_string_override: Optional[str] = None,
+    ) -> QueryRunner:
+        arrow_info = (
+            fallback_query_runner.call_procedure(endpoint="gds.debug.arrow", custom_error=False).squeeze().to_dict()
+        )
         server_version = fallback_query_runner.server_version()
+        connection_string: str
+        if connection_string_override is not None:
+            connection_string = connection_string_override
+        else:
+            connection_string = arrow_info.get("advertisedListenAddress", arrow_info["listenAddress"])
+        arrow_endpoint_version = ArrowEndpointVersion.from_arrow_info(arrow_info.get("versions", []))
-        yield_fields = (
-            ["running", "listenAddress"]
-            if server_version >= ServerVersion(2, 2, 1)
-            else ["running", "advertisedListenAddress"]
-        )
-        arrow_info: "Series[Any]" = fallback_query_runner.call_procedure(
-            endpoint="gds.debug.arrow", yields=yield_fields, custom_error=False
-        ).squeeze()
-        listen_address: str = arrow_info.get("advertisedListenAddress", arrow_info["listenAddress"])  # type: ignore
         if arrow_info["running"]:
             return ArrowQueryRunner(
-                listen_address,
+                connection_string,
                 fallback_query_runner,
                 server_version,
                 auth,
                 encrypted,
                 disable_server_verification,
                 tls_root_certs,
+                arrow_endpoint_version,
             )
         else:
             return fallback_query_runner
@@ -62,9 +67,11 @@ class ArrowQueryRunner(QueryRunner):
         encrypted: bool = False,
         disable_server_verification: bool = False,
         tls_root_certs: Optional[bytes] = None,
+        arrow_endpoint_version: ArrowEndpointVersion = ArrowEndpointVersion.ALPHA,
     ):
         self._fallback_query_runner = fallback_query_runner
         self._server_version = server_version
+        self._arrow_endpoint_version = arrow_endpoint_version
         host, port_string = uri.split(":")
@@ -272,8 +279,15 @@ class ArrowQueryRunner(QueryRunner):
             "procedure_name": procedure_name,
             "configuration": configuration,
         }
-        ticket = flight.Ticket(json.dumps(payload).encode("utf-8"))
+        if self._arrow_endpoint_version == ArrowEndpointVersion.V1:
+            payload = {
+                "name": "GET_MESSAGE",
+                "version": ArrowEndpointVersion.V1.version(),
+                "body": payload,
+            }
+        ticket = flight.Ticket(json.dumps(payload).encode("utf-8"))
         get = self._flight_client.do_get(ticket)
         arrow_table = get.read_all()
@@ -282,6 +296,13 @@ class ArrowQueryRunner(QueryRunner):
             new_colum_names = ["nodeLabels" if i == "labels" else i for i in arrow_table.column_names]
             arrow_table = arrow_table.rename_columns(new_colum_names)
+        # Pandas 2.2.0 deprecated an API used by ArrowTable.to_pandas() (< pyarrow 15.0)
+        warnings.filterwarnings(
+            "ignore",
+            category=DeprecationWarning,
+            message=r"Passing a BlockManager to DataFrame is deprecated",
+        )
         return self._sanitize_arrow_table(arrow_table).to_pandas()  # type: ignore
     def create_graph_constructor(
@@ -295,10 +316,19 @@ class ArrowQueryRunner(QueryRunner):
             )
         return ArrowGraphConstructor(
-            database, graph_name, self._flight_client, concurrency, undirected_relationship_types
+            database,
+            graph_name,
+            self._flight_client,
+            concurrency,
+            self._arrow_endpoint_version,
+            undirected_relationship_types,
         )
     def _sanitize_arrow_table(self, arrow_table: Table) -> Table:
+        # empty columns cannot be used to build a chunked_array in pyarrow
+        if len(arrow_table) == 0:
+            return arrow_table
         dict_encoded_fields = [
             (idx, field) for idx, field in enumerate(arrow_table.schema) if is_dictionary(field.type)
         ]

{graphdatascience-1.9 → graphdatascience-1.10}/graphdatascience/query_runner/aura_db_arrow_query_runner.py RENAMED Viewed

@@ -5,7 +5,7 @@ from pyarrow import flight
 from pyarrow.flight import ClientMiddleware, ClientMiddlewareFactory
 from ..call_parameters import CallParameters
-from ..gds_session.dbms_connection_info import DbmsConnectionInfo
+from ..session.dbms_connection_info import DbmsConnectionInfo
 from .query_runner import QueryRunner
 from graphdatascience.query_runner.graph_constructor import GraphConstructor
 from graphdatascience.server_version.server_version import ServerVersion

graphdatascience 1.9__tar.gz → 1.10__tar.gz

graphdatascience 1.9tar.gz → 1.10tar.gz