PyPI - neo4j-viz - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

neo4j-viz 0.4.0py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

neo4j_viz/gds.py +45 -25
neo4j_viz/neo4j.py +31 -10
neo4j_viz/pandas.py +27 -11
neo4j_viz/resources/nvl_entrypoint/base.js +1 -1
{neo4j_viz-0.4.0.dist-info → neo4j_viz-0.4.1.dist-info}/METADATA +1 -1
{neo4j_viz-0.4.0.dist-info → neo4j_viz-0.4.1.dist-info}/RECORD +8 -8
{neo4j_viz-0.4.0.dist-info → neo4j_viz-0.4.1.dist-info}/WHEEL +0 -0
{neo4j_viz-0.4.0.dist-info → neo4j_viz-0.4.1.dist-info}/top_level.txt +0 -0

neo4j_viz/gds.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+import warnings
 from itertools import chain
 from typing import Optional
 from uuid import uuid4
@@ -13,11 +14,11 @@ from .visualization_graph import VisualizationGraph
 def _fetch_node_dfs(
-    gds: GraphDataScience, G: Graph, node_properties: list[str], node_labels: list[str]
+    gds: GraphDataScience, G: Graph, node_properties_by_label: dict[str, list[str]], node_labels: list[str]
 ) -> dict[str, pd.DataFrame]:
     return {
         lbl: gds.graph.nodeProperties.stream(
-            G, node_properties=node_properties, node_labels=[lbl], separate_property_columns=True
+            G, node_properties=node_properties_by_label[lbl], node_labels=[lbl], separate_property_columns=True
         )
         for lbl in node_labels
     }
@@ -56,6 +57,7 @@ def from_gds(
     If the properties are named as the fields of the `Node` class, they will be included as top level fields of the
     created `Node` objects. Otherwise, they will be included in the `properties` dictionary.
     Additionally, a new "labels" node property will be added, containing the node labels of the node.
+    Similarly for relationships, a new "relationshipType" property will be added.
     Parameters
     ----------
@@ -77,27 +79,37 @@ def from_gds(
     """
     node_properties_from_gds = G.node_properties()
     assert isinstance(node_properties_from_gds, pd.Series)
-    actual_node_properties = list(chain.from_iterable(node_properties_from_gds.to_dict().values()))
+    actual_node_properties = node_properties_from_gds.to_dict()
+    all_actual_node_properties = list(chain.from_iterable(actual_node_properties.values()))
-    if size_property is not None and size_property not in actual_node_properties:
-        raise ValueError(f"There is no node property '{size_property}' in graph '{G.name()}'")
+    if size_property is not None:
+        if size_property not in all_actual_node_properties:
+            raise ValueError(f"There is no node property '{size_property}' in graph '{G.name()}'")
     if additional_node_properties is None:
-        additional_node_properties = actual_node_properties
+        node_properties_by_label = {k: set(v) for k, v in actual_node_properties.items()}
     else:
         for prop in additional_node_properties:
-            if prop not in actual_node_properties:
+            if prop not in all_actual_node_properties:
                 raise ValueError(f"There is no node property '{prop}' in graph '{G.name()}'")
-    node_properties = set()
-    if additional_node_properties is not None:
-        node_properties.update(additional_node_properties)
+        node_properties_by_label = {}
+        for label, props in actual_node_properties.items():
+            node_properties_by_label[label] = {
+                prop for prop in actual_node_properties[label] if prop in additional_node_properties
+            }
     if size_property is not None:
-        node_properties.add(size_property)
-    node_properties = list(node_properties)
+        for label, props in node_properties_by_label.items():
+            props.add(size_property)
+    node_properties_by_label = {k: list(v) for k, v in node_properties_by_label.items()}
     node_count = G.node_count()
     if node_count > max_node_count:
+        warnings.warn(
+            f"The '{G.name()}' projection's node count ({G.node_count()}) exceeds `max_node_count` ({max_node_count}), so subsampling will be applied. Increase `max_node_count` if needed"
+        )
         sampling_ratio = float(max_node_count) / node_count
         sample_name = f"neo4j-viz_sample_{uuid4()}"
         G_fetched, _ = gds.graph.sample.rwr(sample_name, G, samplingRatio=sampling_ratio, nodeLabelStratification=True)
@@ -107,13 +119,18 @@ def from_gds(
     property_name = None
     try:
         # Since GDS does not allow us to only fetch node IDs, we add the degree property
-        # as a temporary property to ensure that we have at least one property to fetch
-        if len(actual_node_properties) == 0:
+        # as a temporary property to ensure that we have at least one property for each label to fetch
+        if sum([len(props) == 0 for props in node_properties_by_label.values()]) > 0:
             property_name = f"neo4j-viz_property_{uuid4()}"
             gds.degree.mutate(G_fetched, mutateProperty=property_name)
-            node_properties = [property_name]
+            for props in node_properties_by_label.values():
+                props.append(property_name)
+        node_dfs = _fetch_node_dfs(gds, G_fetched, node_properties_by_label, G_fetched.node_labels())
+        if property_name is not None:
+            for df in node_dfs.values():
+                df.drop(columns=[property_name], inplace=True)
-        node_dfs = _fetch_node_dfs(gds, G_fetched, node_properties, G_fetched.node_labels())
         rel_df = _fetch_rel_df(gds, G_fetched)
     finally:
         if G_fetched.name() != G.name():
@@ -122,32 +139,35 @@ def from_gds(
             gds.graph.nodeProperties.drop(G_fetched, node_properties=[property_name])
     for df in node_dfs.values():
-        df.rename(columns={"nodeId": "id"}, inplace=True)
         if property_name is not None and property_name in df.columns:
             df.drop(columns=[property_name], inplace=True)
-    rel_df.rename(columns={"sourceNodeId": "source", "targetNodeId": "target"}, inplace=True)
     node_props_df = pd.concat(node_dfs.values(), ignore_index=True, axis=0).drop_duplicates()
     if size_property is not None:
-        if "size" in actual_node_properties and size_property != "size":
+        if "size" in all_actual_node_properties and size_property != "size":
             node_props_df.rename(columns={"size": "__size"}, inplace=True)
         node_props_df.rename(columns={size_property: "size"}, inplace=True)
     for lbl, df in node_dfs.items():
-        if "labels" in actual_node_properties:
+        if "labels" in all_actual_node_properties:
             df.rename(columns={"labels": "__labels"}, inplace=True)
         df["labels"] = lbl
-    node_labels_df = pd.concat([df[["id", "labels"]] for df in node_dfs.values()], ignore_index=True, axis=0)
-    node_labels_df = node_labels_df.groupby("id").agg({"labels": list})
+    node_labels_df = pd.concat([df[["nodeId", "labels"]] for df in node_dfs.values()], ignore_index=True, axis=0)
+    node_labels_df = node_labels_df.groupby("nodeId").agg({"labels": list})
-    node_df = node_props_df.merge(node_labels_df, on="id")
+    node_df = node_props_df.merge(node_labels_df, on="nodeId")
-    if "caption" not in actual_node_properties:
+    if "caption" not in all_actual_node_properties:
         node_df["caption"] = node_df["labels"].astype(str)
+    if "caption" not in rel_df.columns:
+        rel_df["caption"] = rel_df["relationshipType"]
     try:
-        return _from_dfs(node_df, rel_df, node_radius_min_max=node_radius_min_max, rename_properties={"__size": "size"})
+        return _from_dfs(
+            node_df, rel_df, node_radius_min_max=node_radius_min_max, rename_properties={"__size": "size"}, dropna=True
+        )
     except ValueError as e:
         err_msg = str(e)
         if "column" in err_msg:

neo4j_viz/neo4j.py CHANGED Viewed

@@ -1,9 +1,10 @@
 from __future__ import annotations
+import warnings
 from typing import Optional, Union
 import neo4j.graph
-from neo4j import Result
+from neo4j import Driver, Result, RoutingControl
 from pydantic import BaseModel, ValidationError
 from neo4j_viz.node import Node
@@ -20,14 +21,15 @@ def _parse_validation_error(e: ValidationError, entity_type: type[BaseModel]) ->
 def from_neo4j(
-    result: Union[neo4j.graph.Graph, Result],
+    data: Union[neo4j.graph.Graph, Result, Driver],
     size_property: Optional[str] = None,
     node_caption: Optional[str] = "labels",
     relationship_caption: Optional[str] = "type",
     node_radius_min_max: Optional[tuple[float, float]] = (3, 60),
+    row_limit: int = 10_000,
 ) -> VisualizationGraph:
     """
-    Create a VisualizationGraph from a Neo4j Graph or Neo4j Result object.
+    Create a VisualizationGraph from a Neo4j `Graph`, Neo4j `Result` or Neo4j `Driver`.
     All node and relationship properties will be included in the visualization graph.
     If the properties are named as the fields of the `Node` or `Relationship` classes, they will be included as
@@ -36,8 +38,9 @@ def from_neo4j(
     Parameters
     ----------
-    result : Union[neo4j.graph.Graph, Result]
-        Query result either in shape of a Graph or result.
+    data : Union[neo4j.graph.Graph, neo4j.Result, neo4j.Driver]
+        Either a query result in the shape of a `neo4j.graph.Graph` or `neo4j.Result`, or a `neo4j.Driver` in
+        which case a simple default query will be executed internally to retrieve the graph data.
     size_property : str, optional
         Property to use for node size, by default None.
     node_caption : str, optional
@@ -47,14 +50,32 @@ def from_neo4j(
     node_radius_min_max : tuple[float, float], optional
         Minimum and maximum node radius, by default (3, 60).
         To avoid tiny or huge nodes in the visualization, the node sizes are scaled to fit in the given range.
+    row_limit : int, optional
+        Maximum number of rows to return from the query, by default 10_000.
+        This is only used if a `neo4j.Driver` is passed as `result` argument, otherwise the limit is ignored.
     """
-    if isinstance(result, Result):
-        graph = result.graph()
-    elif isinstance(result, neo4j.graph.Graph):
-        graph = result
+    if isinstance(data, Result):
+        graph = data.graph()
+    elif isinstance(data, neo4j.graph.Graph):
+        graph = data
+    elif isinstance(data, Driver):
+        rel_count = data.execute_query(
+            "MATCH ()-[r]->() RETURN count(r) as count",
+            routing_=RoutingControl.READ,
+            result_transformer_=Result.single,
+        ).get("count")  # type: ignore[union-attr]
+        if rel_count > row_limit:
+            warnings.warn(
+                f"Database relationship count ({rel_count}) exceeds `row_limit` ({row_limit}), so limiting will be applied. Increase the `row_limit` if needed"
+            )
+        graph = data.execute_query(
+            f"MATCH (n)-[r]->(m) RETURN n,r,m LIMIT {row_limit}",
+            routing_=RoutingControl.READ,
+            result_transformer_=Result.graph,
+        )
     else:
-        raise ValueError(f"Invalid input type `{type(result)}`. Expected `neo4j.Graph` or `neo4j.Result`")
+        raise ValueError(f"Invalid input type `{type(data)}`. Expected `neo4j.Graph`, `neo4j.Result` or `neo4j.Driver`")
     all_node_field_aliases = Node.all_validation_aliases()
     all_rel_field_aliases = Relationship.all_validation_aliases()

neo4j_viz/pandas.py CHANGED Viewed

@@ -27,12 +27,19 @@ def _parse_validation_error(e: ValidationError, entity_type: type[BaseModel]) ->
 def _from_dfs(
-    node_dfs: Optional[DFS_TYPE],
-    rel_dfs: DFS_TYPE,
+    node_dfs: Optional[DFS_TYPE] = None,
+    rel_dfs: Optional[DFS_TYPE] = None,
     node_radius_min_max: Optional[tuple[float, float]] = (3, 60),
     rename_properties: Optional[dict[str, str]] = None,
+    dropna: bool = False,
 ) -> VisualizationGraph:
-    relationships = _parse_relationships(rel_dfs, rename_properties=rename_properties)
+    if node_dfs is None and rel_dfs is None:
+        raise ValueError("At least one of `node_dfs` or `rel_dfs` must be provided")
+    if rel_dfs is None:
+        relationships = []
+    else:
+        relationships = _parse_relationships(rel_dfs, rename_properties=rename_properties, dropna=dropna)
     if node_dfs is None:
         has_size = False
@@ -42,7 +49,7 @@ def _from_dfs(
             node_ids.add(rel.target)
         nodes = [Node(id=id) for id in node_ids]
     else:
-        nodes, has_size = _parse_nodes(node_dfs, rename_properties=rename_properties)
+        nodes, has_size = _parse_nodes(node_dfs, rename_properties=rename_properties, dropna=dropna)
     VG = VisualizationGraph(nodes=nodes, relationships=relationships)
@@ -52,7 +59,9 @@ def _from_dfs(
     return VG
-def _parse_nodes(node_dfs: DFS_TYPE, rename_properties: Optional[dict[str, str]]) -> tuple[list[Node], bool]:
+def _parse_nodes(
+    node_dfs: DFS_TYPE, rename_properties: Optional[dict[str, str]], dropna: bool = False
+) -> tuple[list[Node], bool]:
     if isinstance(node_dfs, DataFrame):
         node_dfs_iter: Iterable[DataFrame] = [node_dfs]
     elif node_dfs is None:
@@ -67,6 +76,8 @@ def _parse_nodes(node_dfs: DFS_TYPE, rename_properties: Optional[dict[str, str]]
     for node_df in node_dfs_iter:
         has_size &= "size" in node_df.columns
         for _, row in node_df.iterrows():
+            if dropna:
+                row = row.dropna(inplace=False)
             top_level = {}
             properties = {}
             for key, value in row.to_dict().items():
@@ -85,7 +96,9 @@ def _parse_nodes(node_dfs: DFS_TYPE, rename_properties: Optional[dict[str, str]]
     return nodes, has_size
-def _parse_relationships(rel_dfs: DFS_TYPE, rename_properties: Optional[dict[str, str]]) -> list[Relationship]:
+def _parse_relationships(
+    rel_dfs: DFS_TYPE, rename_properties: Optional[dict[str, str]], dropna: bool = False
+) -> list[Relationship]:
     all_rel_field_aliases = Relationship.all_validation_aliases()
     if isinstance(rel_dfs, DataFrame):
@@ -96,6 +109,8 @@ def _parse_relationships(rel_dfs: DFS_TYPE, rename_properties: Optional[dict[str
     for rel_df in rel_dfs_iter:
         for _, row in rel_df.iterrows():
+            if dropna:
+                row = row.dropna(inplace=False)
             top_level = {}
             properties = {}
             for key, value in row.to_dict().items():
@@ -115,8 +130,8 @@ def _parse_relationships(rel_dfs: DFS_TYPE, rename_properties: Optional[dict[str
 def from_dfs(
-    node_dfs: Optional[DFS_TYPE],
-    rel_dfs: DFS_TYPE,
+    node_dfs: Optional[DFS_TYPE] = None,
+    rel_dfs: Optional[DFS_TYPE] = None,
     node_radius_min_max: Optional[tuple[float, float]] = (3, 60),
 ) -> VisualizationGraph:
     """
@@ -128,14 +143,15 @@ def from_dfs(
     Parameters
     ----------
-    node_dfs: Optional[Union[DataFrame, Iterable[DataFrame]]]
+    node_dfs: Optional[Union[DataFrame, Iterable[DataFrame]]], optional
         DataFrame or iterable of DataFrames containing node data.
         If None, the nodes will be created from the source and target node ids in the rel_dfs.
-    rel_dfs: Union[DataFrame, Iterable[DataFrame]]
+    rel_dfs: Optional[Union[DataFrame, Iterable[DataFrame]]], optional
         DataFrame or iterable of DataFrames containing relationship data.
+        If None, no relationships will be created.
     node_radius_min_max : tuple[float, float], optional
         Minimum and maximum node radius.
         To avoid tiny or huge nodes in the visualization, the node sizes are scaled to fit in the given range.
     """
-    return _from_dfs(node_dfs, rel_dfs, node_radius_min_max)
+    return _from_dfs(node_dfs, rel_dfs, node_radius_min_max, dropna=False)

neo4j-viz 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

neo4j-viz 0.4.0py3-none-any.whl → 0.4.1py3-none-any.whl