neo4j-viz 0.3.1__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
neo4j_viz/pandas.py CHANGED
@@ -4,6 +4,7 @@ from collections.abc import Iterable
4
4
  from typing import Optional, Union
5
5
 
6
6
  from pandas import DataFrame
7
+ from pydantic import BaseModel, ValidationError
7
8
 
8
9
  from .node import Node
9
10
  from .relationship import Relationship
@@ -12,25 +13,71 @@ from .visualization_graph import VisualizationGraph
12
13
  DFS_TYPE = Union[DataFrame, Iterable[DataFrame]]
13
14
 
14
15
 
16
+ def _parse_validation_error(e: ValidationError, entity_type: type[BaseModel]) -> None:
17
+ for err in e.errors():
18
+ loc = err["loc"][0]
19
+ if err["type"] == "missing":
20
+ raise ValueError(
21
+ f"Mandatory {entity_type.__name__.lower()} column '{loc}' is missing. Expected one of {entity_type.model_fields[loc].validation_alias.choices} to be present" # type: ignore
22
+ )
23
+ else:
24
+ raise ValueError(
25
+ f"Error for {entity_type.__name__.lower()} column '{loc}' with provided input '{err['input']}'. Reason: {err['msg']}"
26
+ )
27
+
28
+
15
29
  def _from_dfs(
16
- node_dfs: DFS_TYPE,
17
- rel_dfs: DFS_TYPE,
30
+ node_dfs: Optional[DFS_TYPE] = None,
31
+ rel_dfs: Optional[DFS_TYPE] = None,
18
32
  node_radius_min_max: Optional[tuple[float, float]] = (3, 60),
19
33
  rename_properties: Optional[dict[str, str]] = None,
34
+ dropna: bool = False,
20
35
  ) -> VisualizationGraph:
36
+ if node_dfs is None and rel_dfs is None:
37
+ raise ValueError("At least one of `node_dfs` or `rel_dfs` must be provided")
38
+
39
+ if rel_dfs is None:
40
+ relationships = []
41
+ else:
42
+ relationships = _parse_relationships(rel_dfs, rename_properties=rename_properties, dropna=dropna)
43
+
44
+ if node_dfs is None:
45
+ has_size = False
46
+ node_ids = set()
47
+ for rel in relationships:
48
+ node_ids.add(rel.source)
49
+ node_ids.add(rel.target)
50
+ nodes = [Node(id=id) for id in node_ids]
51
+ else:
52
+ nodes, has_size = _parse_nodes(node_dfs, rename_properties=rename_properties, dropna=dropna)
53
+
54
+ VG = VisualizationGraph(nodes=nodes, relationships=relationships)
55
+
56
+ if node_radius_min_max is not None and has_size:
57
+ VG.resize_nodes(node_radius_min_max=node_radius_min_max)
58
+
59
+ return VG
60
+
61
+
62
+ def _parse_nodes(
63
+ node_dfs: DFS_TYPE, rename_properties: Optional[dict[str, str]], dropna: bool = False
64
+ ) -> tuple[list[Node], bool]:
21
65
  if isinstance(node_dfs, DataFrame):
22
66
  node_dfs_iter: Iterable[DataFrame] = [node_dfs]
67
+ elif node_dfs is None:
68
+ node_dfs_iter = []
23
69
  else:
24
70
  node_dfs_iter = node_dfs
25
71
 
26
72
  all_node_field_aliases = Node.all_validation_aliases()
27
- all_rel_field_aliases = Relationship.all_validation_aliases()
28
73
 
29
74
  has_size = True
30
75
  nodes = []
31
76
  for node_df in node_dfs_iter:
32
77
  has_size &= "size" in node_df.columns
33
78
  for _, row in node_df.iterrows():
79
+ if dropna:
80
+ row = row.dropna(inplace=False)
34
81
  top_level = {}
35
82
  properties = {}
36
83
  for key, value in row.to_dict().items():
@@ -41,16 +88,29 @@ def _from_dfs(
41
88
  key = rename_properties[key]
42
89
  properties[key] = value
43
90
 
44
- nodes.append(Node(**top_level, properties=properties))
91
+ try:
92
+ nodes.append(Node(**top_level, properties=properties))
93
+ except ValidationError as e:
94
+ _parse_validation_error(e, Node)
95
+
96
+ return nodes, has_size
97
+
98
+
99
+ def _parse_relationships(
100
+ rel_dfs: DFS_TYPE, rename_properties: Optional[dict[str, str]], dropna: bool = False
101
+ ) -> list[Relationship]:
102
+ all_rel_field_aliases = Relationship.all_validation_aliases()
45
103
 
46
104
  if isinstance(rel_dfs, DataFrame):
47
105
  rel_dfs_iter: Iterable[DataFrame] = [rel_dfs]
48
106
  else:
49
107
  rel_dfs_iter = rel_dfs
108
+ relationships: list[Relationship] = []
50
109
 
51
- relationships = []
52
110
  for rel_df in rel_dfs_iter:
53
111
  for _, row in rel_df.iterrows():
112
+ if dropna:
113
+ row = row.dropna(inplace=False)
54
114
  top_level = {}
55
115
  properties = {}
56
116
  for key, value in row.to_dict().items():
@@ -61,19 +121,17 @@ def _from_dfs(
61
121
  key = rename_properties[key]
62
122
  properties[key] = value
63
123
 
64
- relationships.append(Relationship(**top_level, properties=properties))
124
+ try:
125
+ relationships.append(Relationship(**top_level, properties=properties))
126
+ except ValidationError as e:
127
+ _parse_validation_error(e, Relationship)
65
128
 
66
- VG = VisualizationGraph(nodes=nodes, relationships=relationships)
67
-
68
- if node_radius_min_max is not None and has_size:
69
- VG.resize_nodes(node_radius_min_max=node_radius_min_max)
70
-
71
- return VG
129
+ return relationships
72
130
 
73
131
 
74
132
  def from_dfs(
75
- node_dfs: DFS_TYPE,
76
- rel_dfs: DFS_TYPE,
133
+ node_dfs: Optional[DFS_TYPE] = None,
134
+ rel_dfs: Optional[DFS_TYPE] = None,
77
135
  node_radius_min_max: Optional[tuple[float, float]] = (3, 60),
78
136
  ) -> VisualizationGraph:
79
137
  """
@@ -85,13 +143,15 @@ def from_dfs(
85
143
 
86
144
  Parameters
87
145
  ----------
88
- node_dfs: Union[DataFrame, Iterable[DataFrame]]
146
+ node_dfs: Optional[Union[DataFrame, Iterable[DataFrame]]], optional
89
147
  DataFrame or iterable of DataFrames containing node data.
90
- rel_dfs: Union[DataFrame, Iterable[DataFrame]]
148
+ If None, the nodes will be created from the source and target node ids in the rel_dfs.
149
+ rel_dfs: Optional[Union[DataFrame, Iterable[DataFrame]]], optional
91
150
  DataFrame or iterable of DataFrames containing relationship data.
151
+ If None, no relationships will be created.
92
152
  node_radius_min_max : tuple[float, float], optional
93
153
  Minimum and maximum node radius.
94
154
  To avoid tiny or huge nodes in the visualization, the node sizes are scaled to fit in the given range.
95
155
  """
96
156
 
97
- return _from_dfs(node_dfs, rel_dfs, node_radius_min_max)
157
+ return _from_dfs(node_dfs, rel_dfs, node_radius_min_max, dropna=False)