MatplotLibAPI 1.0__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- MatplotLibAPI/Bubble.py +127 -0
- MatplotLibAPI/Composite.py +78 -0
- MatplotLibAPI/Network.py +347 -0
- MatplotLibAPI/Pivot.py +214 -0
- MatplotLibAPI/Style.py +171 -0
- MatplotLibAPI/Table.py +65 -0
- MatplotLibAPI/Timeserie.py +97 -0
- MatplotLibAPI/__init__.py +13 -0
- MatplotLibAPI/pdAccessor.py +125 -0
- {MatplotLibAPI-1.0.dist-info → MatplotLibAPI-3.0.0.dist-info}/METADATA +1 -3
- MatplotLibAPI-3.0.0.dist-info/RECORD +14 -0
- {MatplotLibAPI-1.0.dist-info → MatplotLibAPI-3.0.0.dist-info}/WHEEL +1 -1
- MatplotLibAPI-3.0.0.dist-info/top_level.txt +1 -0
- MatplotLibAPI-1.0.dist-info/RECORD +0 -5
- MatplotLibAPI-1.0.dist-info/top_level.txt +0 -1
- {MatplotLibAPI-1.0.dist-info → MatplotLibAPI-3.0.0.dist-info}/LICENSE +0 -0
MatplotLibAPI/Bubble.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# Hint for Visual Code Python Interactive window
|
|
2
|
+
# %%
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import matplotlib.pyplot as plt
|
|
6
|
+
from matplotlib.axes import Axes
|
|
7
|
+
import seaborn as sns
|
|
8
|
+
from typing import Optional
|
|
9
|
+
from .Style import DynamicFuncFormatter, StyleTemplate, generate_ticks, _validate_panda, string_formatter, bmk_formatter, percent_formatter, format_func
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
BUBBLE_STYLE_TEMPLATE = StyleTemplate(
|
|
13
|
+
format_funcs={"label": string_formatter,
|
|
14
|
+
"x": bmk_formatter,
|
|
15
|
+
"y": percent_formatter,
|
|
16
|
+
"label": string_formatter,
|
|
17
|
+
"z": bmk_formatter},
|
|
18
|
+
yscale="log",
|
|
19
|
+
y_ticks=8,
|
|
20
|
+
x_ticks=8
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def plot_bubble(
|
|
25
|
+
pd_df: pd.DataFrame,
|
|
26
|
+
label: str,
|
|
27
|
+
x: str,
|
|
28
|
+
y: str,
|
|
29
|
+
z: str,
|
|
30
|
+
title: Optional[str] = "Test",
|
|
31
|
+
style: StyleTemplate = BUBBLE_STYLE_TEMPLATE,
|
|
32
|
+
max_values: int = BUBBLE_STYLE_TEMPLATE,
|
|
33
|
+
center_to_mean: bool = False,
|
|
34
|
+
sort_by: Optional[str] = None,
|
|
35
|
+
ascending: bool = False,
|
|
36
|
+
ax: Optional[Axes] = None):
|
|
37
|
+
|
|
38
|
+
_validate_panda(pd_df, cols=[label, x, y, z], sort_by=sort_by)
|
|
39
|
+
style.format_funcs = format_func(
|
|
40
|
+
style.format_funcs, label=label, x=x, y=y, z=z)
|
|
41
|
+
if not sort_by:
|
|
42
|
+
sort_by = z
|
|
43
|
+
|
|
44
|
+
plot_df = pd_df[[label, x, y, z]].sort_values(
|
|
45
|
+
by=sort_by, ascending=ascending).head(max_values)
|
|
46
|
+
if center_to_mean:
|
|
47
|
+
x_col_mean = plot_df[x].mean()
|
|
48
|
+
plot_df[x] = plot_df[x] - x_col_mean
|
|
49
|
+
plot_df['quintile'] = pd.qcut(
|
|
50
|
+
plot_df[z], 5, labels=False)
|
|
51
|
+
|
|
52
|
+
# styling
|
|
53
|
+
|
|
54
|
+
plot_df["fontsize"] = plot_df['quintile'].map(style.font_mapping)
|
|
55
|
+
|
|
56
|
+
if not ax:
|
|
57
|
+
ax = plt.gca()
|
|
58
|
+
|
|
59
|
+
ax = sns.scatterplot(
|
|
60
|
+
data=plot_df,
|
|
61
|
+
x=x,
|
|
62
|
+
y=y,
|
|
63
|
+
size=z,
|
|
64
|
+
hue='quintile',
|
|
65
|
+
sizes=(100, 2000),
|
|
66
|
+
legend=False,
|
|
67
|
+
palette=sns.color_palette(style.palette, as_cmap=True),
|
|
68
|
+
edgecolor=style.background_color,
|
|
69
|
+
ax=ax)
|
|
70
|
+
ax.set_facecolor(style.background_color)
|
|
71
|
+
if style.xscale:
|
|
72
|
+
ax.set(xscale=style.xscale)
|
|
73
|
+
if style.yscale:
|
|
74
|
+
ax.set(yscale=style.yscale)
|
|
75
|
+
|
|
76
|
+
x_min = pd_df[x].min()
|
|
77
|
+
x_max = pd_df[x].max()
|
|
78
|
+
x_mean = pd_df[x].mean()
|
|
79
|
+
ax.set_xticks(generate_ticks(x_min, x_max, num_ticks=style.x_ticks))
|
|
80
|
+
ax.xaxis.grid(True, "major", linewidth=.5, color=style.font_color)
|
|
81
|
+
if style.format_funcs.get("x"):
|
|
82
|
+
ax.xaxis.set_major_formatter(
|
|
83
|
+
DynamicFuncFormatter(style.format_funcs.get("x")))
|
|
84
|
+
|
|
85
|
+
y_min = pd_df[y].min()
|
|
86
|
+
y_max = pd_df[y].max()
|
|
87
|
+
y_mean = pd_df[y].mean()
|
|
88
|
+
ax.set_yticks(generate_ticks(y_min, y_max, num_ticks=style.y_ticks))
|
|
89
|
+
ax.yaxis.grid(True, "major", linewidth=.5, color=style.font_color)
|
|
90
|
+
if style.format_funcs.get("y"):
|
|
91
|
+
ax.yaxis.set_major_formatter(
|
|
92
|
+
DynamicFuncFormatter(style.format_funcs.get("y")))
|
|
93
|
+
|
|
94
|
+
ax.tick_params(axis='both',
|
|
95
|
+
which='major',
|
|
96
|
+
colors=style.font_color,
|
|
97
|
+
labelsize=style.font_size)
|
|
98
|
+
|
|
99
|
+
ax.vlines(x=x_mean,
|
|
100
|
+
ymin=y_min,
|
|
101
|
+
ymax=y_max,
|
|
102
|
+
linestyle='--',
|
|
103
|
+
colors=style.font_color)
|
|
104
|
+
ax.hlines(y=y_mean,
|
|
105
|
+
xmin=x_min,
|
|
106
|
+
xmax=x_max,
|
|
107
|
+
linestyle='--',
|
|
108
|
+
colors=style.font_color)
|
|
109
|
+
|
|
110
|
+
for index, row in plot_df.iterrows():
|
|
111
|
+
x_value = row[x]
|
|
112
|
+
y_value = row[y]
|
|
113
|
+
s_value = str(row[label])
|
|
114
|
+
if style.format_funcs.get("label"):
|
|
115
|
+
s_value = style.format_funcs.get("label")(s_value)
|
|
116
|
+
fs = row["fontsize"]
|
|
117
|
+
ax.text(x_value,
|
|
118
|
+
y_value,
|
|
119
|
+
s_value,
|
|
120
|
+
horizontalalignment='center',
|
|
121
|
+
fontdict={'color': style.font_color, 'fontsize': fs})
|
|
122
|
+
if title:
|
|
123
|
+
ax.set_title(title, color=style.font_color, fontsize=style.font_size*2)
|
|
124
|
+
return ax
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# endregion
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# Hint for Visual Code Python Interactive window
|
|
2
|
+
# %%
|
|
3
|
+
import matplotlib.pyplot as plt
|
|
4
|
+
from matplotlib.figure import Figure
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from .Bubble import plot_bubble, BUBBLE_STYLE_TEMPLATE
|
|
7
|
+
from .Table import plot_table
|
|
8
|
+
from typing import Optional, Tuple
|
|
9
|
+
from .Style import StyleTemplate, _validate_panda,format_func
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def plot_composite_bubble(
|
|
13
|
+
pd_df: pd.DataFrame,
|
|
14
|
+
label: str,
|
|
15
|
+
x: str,
|
|
16
|
+
y: str,
|
|
17
|
+
z: str,
|
|
18
|
+
title: Optional[str] = "Test",
|
|
19
|
+
style: StyleTemplate = BUBBLE_STYLE_TEMPLATE,
|
|
20
|
+
max_values: int = 50,
|
|
21
|
+
center_to_mean: bool = False,
|
|
22
|
+
sort_by: Optional[str] = None,
|
|
23
|
+
ascending: bool = False,
|
|
24
|
+
table_rows: int = 10,
|
|
25
|
+
figsize: Tuple[float, float] = (19.2, 10.8)) -> Figure:
|
|
26
|
+
|
|
27
|
+
_validate_panda(pd_df, cols=[label, x, y, z], sort_by=sort_by)
|
|
28
|
+
style.format_funcs=format_func(style.format_funcs,label=label,x=x,y=y)
|
|
29
|
+
fig = plt.figure(figsize=figsize)
|
|
30
|
+
fig.patch.set_facecolor("black")
|
|
31
|
+
grid = plt.GridSpec(2, 2, height_ratios=[2, 1], width_ratios=[1, 1])
|
|
32
|
+
ax = fig.add_subplot(grid[0, 0:])
|
|
33
|
+
ax = plot_bubble(pd_df=pd_df,
|
|
34
|
+
label=label,
|
|
35
|
+
x=x,
|
|
36
|
+
y=y,
|
|
37
|
+
z=z,
|
|
38
|
+
title=title,
|
|
39
|
+
style=style,
|
|
40
|
+
max_values=max_values,
|
|
41
|
+
center_to_mean=center_to_mean,
|
|
42
|
+
sort_by=sort_by,
|
|
43
|
+
ascending=ascending,
|
|
44
|
+
ax=ax)
|
|
45
|
+
|
|
46
|
+
if "label" in style.format_funcs:
|
|
47
|
+
style.format_funcs[label] = style.format_funcs["label"]
|
|
48
|
+
if "x" in style.format_funcs:
|
|
49
|
+
style.format_funcs[x] = style.format_funcs["x"]
|
|
50
|
+
if "y" in style.format_funcs:
|
|
51
|
+
style.format_funcs[y] = style.format_funcs["y"]
|
|
52
|
+
if "z" in style.format_funcs:
|
|
53
|
+
style.format_funcs[z] = style.format_funcs["z"]
|
|
54
|
+
|
|
55
|
+
ax2 = fig.add_subplot(grid[1, 0])
|
|
56
|
+
ax2 = plot_table(
|
|
57
|
+
pd_df=pd_df,
|
|
58
|
+
cols=[label, z, y, x],
|
|
59
|
+
title=f"Top {table_rows}",
|
|
60
|
+
ax=ax2,
|
|
61
|
+
sort_by=sort_by,
|
|
62
|
+
ascending=False,
|
|
63
|
+
max_values=table_rows,
|
|
64
|
+
style=style
|
|
65
|
+
)
|
|
66
|
+
ax3 = fig.add_subplot(grid[1, 1])
|
|
67
|
+
ax3 = plot_table(
|
|
68
|
+
pd_df=pd_df,
|
|
69
|
+
cols=[label, z, y, x],
|
|
70
|
+
title=f"Worst {table_rows}",
|
|
71
|
+
ax=ax3,
|
|
72
|
+
sort_by=sort_by,
|
|
73
|
+
ascending=True,
|
|
74
|
+
max_values=table_rows,
|
|
75
|
+
style=style
|
|
76
|
+
)
|
|
77
|
+
fig.tight_layout()
|
|
78
|
+
return fig
|
MatplotLibAPI/Network.py
ADDED
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from collections import defaultdict
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
5
|
+
|
|
6
|
+
import matplotlib.pyplot as plt
|
|
7
|
+
from matplotlib.axes import Axes
|
|
8
|
+
import seaborn as sns
|
|
9
|
+
import networkx as nx
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pandas as pd
|
|
12
|
+
from networkx import Graph
|
|
13
|
+
from networkx.classes.graph import Graph
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from .Style import StyleTemplate, string_formatter, _validate_panda,format_func
|
|
17
|
+
|
|
18
|
+
NETWORK_STYLE_TEMPLATE = StyleTemplate(
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
DEFAULT = {"MAX_EDGES": 100,
|
|
22
|
+
"MAX_NODES": 30,
|
|
23
|
+
"MIN_NODE_SIZE": 100,
|
|
24
|
+
"MAX_NODE_SIZE": 2000,
|
|
25
|
+
"MAX_EDGE_WIDTH": 10,
|
|
26
|
+
"GRAPH_SCALE": 2,
|
|
27
|
+
"MAX_FONT_SIZE": 20,
|
|
28
|
+
"MIN_FONT_SIZE": 8
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def softmax(x):
|
|
33
|
+
return (np.exp(x - np.max(x)) / np.exp(x - np.max(x)).sum())
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def scale_weights(weights, scale_min=0, scale_max=1):
|
|
37
|
+
deciles = np.percentile(weights, [10, 20, 30, 40, 50, 60, 70, 80, 90])
|
|
38
|
+
outs = np.searchsorted(deciles, weights)
|
|
39
|
+
return [out * (scale_max-scale_min)/len(deciles)+scale_min for out in outs]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class NodeView(nx.classes.reportviews.NodeView):
|
|
43
|
+
def sort(self,
|
|
44
|
+
attribute: Optional[str] = 'weight',
|
|
45
|
+
reverse: Optional[bool] = True):
|
|
46
|
+
# Sort the nodes based on the specified attribute
|
|
47
|
+
sorted_nodes = sorted(self,
|
|
48
|
+
key=lambda node: self[node][attribute],
|
|
49
|
+
reverse=reverse)
|
|
50
|
+
return sorted_nodes
|
|
51
|
+
|
|
52
|
+
def filter(self, attribute: str, value: str):
|
|
53
|
+
# Filter the nodes based on the specified attribute and value
|
|
54
|
+
filtered_nodes = [
|
|
55
|
+
node for node in self if attribute in self[node] and self[node][attribute] == value]
|
|
56
|
+
return filtered_nodes
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class AdjacencyView(nx.classes.coreviews.AdjacencyView):
|
|
60
|
+
def sort(self,
|
|
61
|
+
attribute: Optional[str] = 'weight',
|
|
62
|
+
reverse: Optional[bool] = True):
|
|
63
|
+
# Sort the nodes based on the specified attribute
|
|
64
|
+
sorted_nodes = sorted(self,
|
|
65
|
+
key=lambda node: self[node][attribute],
|
|
66
|
+
reverse=reverse)
|
|
67
|
+
return sorted_nodes
|
|
68
|
+
|
|
69
|
+
def filter(self, attribute: str, value: str):
|
|
70
|
+
# Filter the nodes based on the specified attribute and value
|
|
71
|
+
filtered_nodes = [
|
|
72
|
+
node for node in self if attribute in self[node] and self[node][attribute] == value]
|
|
73
|
+
return filtered_nodes
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class EdgeView(nx.classes.reportviews.EdgeView):
|
|
77
|
+
def sort(self,
|
|
78
|
+
reverse: Optional[bool] = True,
|
|
79
|
+
attribute: Optional[str] = 'weight'):
|
|
80
|
+
sorted_edges = sorted(self(data=True),
|
|
81
|
+
key=lambda t: t[2].get(attribute, 1),
|
|
82
|
+
reverse=reverse)
|
|
83
|
+
return {(u, v): _ for u, v, _ in sorted_edges}
|
|
84
|
+
|
|
85
|
+
def filter(self, attribute: str, value: str):
|
|
86
|
+
# Filter the edges based on the specified attribute and value
|
|
87
|
+
filtered_edges = [
|
|
88
|
+
edge for edge in self if attribute in self[edge] and self[edge][attribute] == value]
|
|
89
|
+
return [(edge[0], edge[1]) for edge in filtered_edges]
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class Graph(nx.Graph):
|
|
93
|
+
"""
|
|
94
|
+
Custom graph class based on NetworkX's Graph class.
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
def __init__(self):
|
|
98
|
+
super().__init__()
|
|
99
|
+
self._scale = 1.0
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def scale(self) -> float:
|
|
103
|
+
return self._scale
|
|
104
|
+
|
|
105
|
+
@scale.setter
|
|
106
|
+
def scale(self, value: float):
|
|
107
|
+
self._scale = value
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def nodes(self):
|
|
111
|
+
return NodeView(self)
|
|
112
|
+
|
|
113
|
+
@nodes.setter
|
|
114
|
+
def scale(self, value: NodeView):
|
|
115
|
+
self.nodes = value
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
def edges(self):
|
|
119
|
+
return EdgeView(self)
|
|
120
|
+
|
|
121
|
+
@property
|
|
122
|
+
def adjacency(self):
|
|
123
|
+
return AdjacencyView(list(self))
|
|
124
|
+
|
|
125
|
+
def edge_subgraph(self, edges: Iterable) -> Graph:
|
|
126
|
+
return nx.edge_subgraph(self, edges)
|
|
127
|
+
|
|
128
|
+
def layout(self,
|
|
129
|
+
max_node_size: int = DEFAULT["MAX_NODES"],
|
|
130
|
+
min_node_size: int = DEFAULT["MAX_NODES"],
|
|
131
|
+
max_edge_width: int = DEFAULT["MAX_EDGE_WIDTH"],
|
|
132
|
+
max_font_size: int = DEFAULT["MAX_FONT_SIZE"],
|
|
133
|
+
min_font_size: int = DEFAULT["MIN_FONT_SIZE"]):
|
|
134
|
+
"""
|
|
135
|
+
Calculates the sizes for nodes, edges, and fonts based on node weights and edge weights.
|
|
136
|
+
|
|
137
|
+
Parameters:
|
|
138
|
+
- max_node_size (int): Maximum size for nodes (default: 300).
|
|
139
|
+
- max_edge_width (int): Maximum width for edges (default: 10).
|
|
140
|
+
- max_font_size (int): Maximum font size for node labels (default: 18).
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
- Tuple[List[int], List[int], Dict[int, List[str]]]: A tuple containing the node sizes, edge widths,
|
|
144
|
+
and font sizes for node labels.
|
|
145
|
+
"""
|
|
146
|
+
# Normalize and scale nodes' weights within the desired range of edge widths
|
|
147
|
+
node_weights = [data.get('weight', 1)
|
|
148
|
+
for node, data in self.nodes(data=True)]
|
|
149
|
+
node_size = scale_weights(
|
|
150
|
+
weights=node_weights, scale_max=max_node_size, scale_min=min_node_size)
|
|
151
|
+
|
|
152
|
+
# Normalize and scale edges' weights within the desired range of edge widths
|
|
153
|
+
edge_weights = [data.get('weight', 0)
|
|
154
|
+
for _, _, data in self.edges(data=True)]
|
|
155
|
+
edges_width = scale_weights(
|
|
156
|
+
weights=edge_weights, scale_max=max_edge_width)
|
|
157
|
+
|
|
158
|
+
# Scale the normalized node weights within the desired range of font sizes
|
|
159
|
+
node_size_dict = dict(zip(self.nodes, scale_weights(
|
|
160
|
+
weights=node_weights, scale_max=max_font_size, scale_min=min_font_size)))
|
|
161
|
+
fonts_size = defaultdict(list)
|
|
162
|
+
for node, width in node_size_dict.items():
|
|
163
|
+
fonts_size[int(width)].append(node)
|
|
164
|
+
fonts_size = dict(fonts_size)
|
|
165
|
+
|
|
166
|
+
return node_size, edges_width, fonts_size
|
|
167
|
+
|
|
168
|
+
def subgraphX(self, node_list=None, max_edges: int = DEFAULT["MAX_EDGES"]):
|
|
169
|
+
if node_list is None:
|
|
170
|
+
node_list = self.nodes.sort("weight")[:DEFAULT["MAX_NODES"]]
|
|
171
|
+
connected_subgraph_nodes = list(self.find_connected_subgraph())
|
|
172
|
+
node_list = [
|
|
173
|
+
node for node in node_list if node in connected_subgraph_nodes]
|
|
174
|
+
|
|
175
|
+
subgraph = nx.subgraph(
|
|
176
|
+
self, nbunch=node_list)
|
|
177
|
+
edges = subgraph.top_k_edges(attribute="weight", k=5).keys()
|
|
178
|
+
subgraph = subgraph.edge_subgraph(list(edges)[:max_edges])
|
|
179
|
+
return subgraph
|
|
180
|
+
|
|
181
|
+
def plotX(self,
|
|
182
|
+
title: str = "Test",
|
|
183
|
+
style: StyleTemplate = NETWORK_STYLE_TEMPLATE,
|
|
184
|
+
ax: Optional[Axes] = None) -> Axes:
|
|
185
|
+
"""
|
|
186
|
+
Plots the degree distribution of the graph, including a degree rank plot and a degree histogram.
|
|
187
|
+
"""
|
|
188
|
+
degree_sequence = sorted([d for n, d in self.degree()], reverse=True)
|
|
189
|
+
dmax = max(degree_sequence)
|
|
190
|
+
sns.set_palette(style.palette)
|
|
191
|
+
if ax is None:
|
|
192
|
+
ax = plt.gca()
|
|
193
|
+
|
|
194
|
+
node_sizes, edge_widths, font_sizes = self.layout(
|
|
195
|
+
min_node_size=DEFAULT["MIN_NODE_SIZE"]/5,
|
|
196
|
+
max_node_size=DEFAULT["MAX_NODE_SIZE"],
|
|
197
|
+
max_edge_width=DEFAULT["MAX_EDGE_WIDTH"],
|
|
198
|
+
min_font_size=style.font_mapping.get(0),
|
|
199
|
+
max_font_size=style.font_mapping.get(4))
|
|
200
|
+
pos = nx.spring_layout(self, k=1)
|
|
201
|
+
# nodes
|
|
202
|
+
nx.draw_networkx_nodes(self,
|
|
203
|
+
pos,
|
|
204
|
+
ax=ax,
|
|
205
|
+
node_size=list(node_sizes),
|
|
206
|
+
node_color=node_sizes,
|
|
207
|
+
cmap=plt.cm.get_cmap(style.palette))
|
|
208
|
+
# edges
|
|
209
|
+
nx.draw_networkx_edges(self,
|
|
210
|
+
pos,
|
|
211
|
+
ax=ax,
|
|
212
|
+
edge_color=style.font_color,
|
|
213
|
+
edge_cmap=plt.cm.get_cmap(style.palette),
|
|
214
|
+
width=edge_widths)
|
|
215
|
+
# labels
|
|
216
|
+
for font_size, nodes in font_sizes.items():
|
|
217
|
+
nx.draw_networkx_labels(
|
|
218
|
+
self,
|
|
219
|
+
pos,
|
|
220
|
+
ax=ax,
|
|
221
|
+
font_size=font_size,
|
|
222
|
+
font_color=style.font_color,
|
|
223
|
+
labels={n: string_formatter(n) for n in nodes})
|
|
224
|
+
ax.set_facecolor(style.background_color)
|
|
225
|
+
ax.set_title(title, color=style.font_color, fontsize=style.font_size*2)
|
|
226
|
+
ax.set_axis_off()
|
|
227
|
+
|
|
228
|
+
return ax
|
|
229
|
+
|
|
230
|
+
def analysis(self, node_list: Optional[List] = None,
|
|
231
|
+
scale: int = DEFAULT["GRAPH_SCALE"],
|
|
232
|
+
node_scale: int = DEFAULT["MAX_NODE_SIZE"],
|
|
233
|
+
edge_scale: float = DEFAULT["MAX_EDGE_WIDTH"],
|
|
234
|
+
max_nodes: int = DEFAULT["MAX_NODES"],
|
|
235
|
+
max_edges: int = DEFAULT["MAX_EDGES"],
|
|
236
|
+
plt_title: Optional[str] = "Top keywords"):
|
|
237
|
+
# node_list=self.nodes_circuits(node_list)
|
|
238
|
+
g = self.subgraphX(max_edges=max_edges, node_list=node_list)
|
|
239
|
+
connected_components = nx.connected_components(g)
|
|
240
|
+
for connected_component in connected_components:
|
|
241
|
+
if len(connected_component) > 5:
|
|
242
|
+
connected_component_graph = self.subgraphX(max_edges=max_edges,
|
|
243
|
+
node_list=connected_component)
|
|
244
|
+
connected_component_graph.plotX()
|
|
245
|
+
|
|
246
|
+
def find_connected_subgraph(self):
|
|
247
|
+
logging.info(f'find_connected_subgraph')
|
|
248
|
+
# Copy the original graph to avoid modifying it
|
|
249
|
+
H = self.copy()
|
|
250
|
+
|
|
251
|
+
# Flag to keep track of whether any node with degree < 2 was removed
|
|
252
|
+
removed_node = True
|
|
253
|
+
|
|
254
|
+
while removed_node:
|
|
255
|
+
removed_node = False
|
|
256
|
+
# Iterate over the nodes
|
|
257
|
+
for node in list(H.nodes):
|
|
258
|
+
if H.degree(node) < 2:
|
|
259
|
+
# Remove the node and its incident edges
|
|
260
|
+
logging.info(
|
|
261
|
+
f'Removing the {node} node and its incident edges')
|
|
262
|
+
H.remove_node(node)
|
|
263
|
+
removed_node = True
|
|
264
|
+
break
|
|
265
|
+
|
|
266
|
+
return H
|
|
267
|
+
|
|
268
|
+
def top_k_edges(self, attribute: str, reverse: bool = True, k: int = 5) -> Dict[Any, List[Tuple[Any, Dict]]]:
|
|
269
|
+
"""
|
|
270
|
+
Returns the top k edges per node based on the given attribute.
|
|
271
|
+
|
|
272
|
+
Parameters:
|
|
273
|
+
attribute (str): The attribute name to be used for sorting.
|
|
274
|
+
reverse (bool): Flag indicating whether to sort in reverse order (default: True).
|
|
275
|
+
k (int): Number of top edges to return per node.
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
Dict[Any, List[Tuple[Any, Dict]]]: A dictionary where the key is a node
|
|
279
|
+
and the value is a list of top k edges for that node. Each edge is represented
|
|
280
|
+
as a tuple where the first element is the adjacent node and the second element
|
|
281
|
+
is a dictionary of edge attributes.
|
|
282
|
+
"""
|
|
283
|
+
top_list = {}
|
|
284
|
+
for node in self.nodes:
|
|
285
|
+
edges = self.edges(node, data=True)
|
|
286
|
+
edges_sorted = sorted(edges, key=lambda x: x[2].get(
|
|
287
|
+
attribute, 0), reverse=reverse)
|
|
288
|
+
top_k_edges = edges_sorted[:k]
|
|
289
|
+
for u, v, data in top_k_edges:
|
|
290
|
+
edge_key = (u, v)
|
|
291
|
+
top_list[edge_key] = data[attribute]
|
|
292
|
+
return top_list
|
|
293
|
+
|
|
294
|
+
@staticmethod
|
|
295
|
+
def from_pandas_edgelist(df: pd.DataFrame,
|
|
296
|
+
source: str = "source",
|
|
297
|
+
target: str = "target",
|
|
298
|
+
weight: str = "weight"):
|
|
299
|
+
"""
|
|
300
|
+
Initialize netX instance with a simple dataframe
|
|
301
|
+
|
|
302
|
+
:param df_source: DataFrame containing network data.
|
|
303
|
+
:param source: Name of source nodes column in df_source.
|
|
304
|
+
:param target: Name of target nodes column in df_source.
|
|
305
|
+
:param weight: Name of edges weight column in df_source.
|
|
306
|
+
|
|
307
|
+
"""
|
|
308
|
+
G = Graph()
|
|
309
|
+
G = nx.from_pandas_edgelist(
|
|
310
|
+
df, source=source, target=target, edge_attr=weight, create_using=G)
|
|
311
|
+
G = G.find_connected_subgraph()
|
|
312
|
+
|
|
313
|
+
edge_aggregates = G.top_k_edges(attribute=weight, k=10)
|
|
314
|
+
node_aggregates = {}
|
|
315
|
+
for (u, v), weight_value in edge_aggregates.items():
|
|
316
|
+
if u not in node_aggregates:
|
|
317
|
+
node_aggregates[u] = 0
|
|
318
|
+
if v not in node_aggregates:
|
|
319
|
+
node_aggregates[v] = 0
|
|
320
|
+
node_aggregates[u] += weight_value
|
|
321
|
+
node_aggregates[v] += weight_value
|
|
322
|
+
|
|
323
|
+
nx.set_node_attributes(G, node_aggregates, name=weight)
|
|
324
|
+
|
|
325
|
+
G = G.edge_subgraph(edges=G.top_k_edges(attribute=weight))
|
|
326
|
+
return G
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def plot_network(pd_df: pd.DataFrame,
|
|
330
|
+
source: str = "source",
|
|
331
|
+
target: str = "target",
|
|
332
|
+
weight: str = "weight",
|
|
333
|
+
title: str = "Test",
|
|
334
|
+
style: StyleTemplate = NETWORK_STYLE_TEMPLATE,
|
|
335
|
+
sort_by: Optional[str] = None,
|
|
336
|
+
ascending: bool = False,
|
|
337
|
+
ax: Optional[Axes] = None) -> Axes:
|
|
338
|
+
|
|
339
|
+
_validate_panda(pd_df, cols=[source, target, weight], sort_by=sort_by)
|
|
340
|
+
|
|
341
|
+
graph = Graph.from_pandas_edgelist(pd_df,
|
|
342
|
+
source=source,
|
|
343
|
+
target=target,
|
|
344
|
+
weight=weight)
|
|
345
|
+
return graph.plotX(title=title,
|
|
346
|
+
style=style,
|
|
347
|
+
ax=ax)
|
MatplotLibAPI/Pivot.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
# Hint for Visual Code Python Interactive window
|
|
2
|
+
# %%
|
|
3
|
+
from typing import List, Optional, Union
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
import matplotlib.dates as mdates
|
|
8
|
+
import matplotlib.pyplot as plt
|
|
9
|
+
from matplotlib.axes import Axes
|
|
10
|
+
from matplotlib.dates import DateFormatter, MonthLocator
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
from .Style import DynamicFuncFormatter, StyleTemplate, generate_ticks, string_formatter, _validate_panda, percent_formatter,format_func
|
|
15
|
+
|
|
16
|
+
PIVOTBARS_STYLE_TEMPLATE = StyleTemplate(
|
|
17
|
+
background_color='black',
|
|
18
|
+
fig_border='darkgrey',
|
|
19
|
+
font_color='white',
|
|
20
|
+
palette='magma',
|
|
21
|
+
format_funcs={"y": percent_formatter,
|
|
22
|
+
"label": string_formatter}
|
|
23
|
+
)
|
|
24
|
+
PIVOTLINES_STYLE_TEMPLATE = StyleTemplate(
|
|
25
|
+
background_color='white',
|
|
26
|
+
fig_border='lightgrey',
|
|
27
|
+
palette='viridis',
|
|
28
|
+
format_funcs={"y": percent_formatter, "label": string_formatter}
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
def plot_pivotbar(pd_df: pd.DataFrame,
|
|
32
|
+
label: str,
|
|
33
|
+
x: str,
|
|
34
|
+
y: str,
|
|
35
|
+
agg: str = "sum",
|
|
36
|
+
style: StyleTemplate = PIVOTBARS_STYLE_TEMPLATE,
|
|
37
|
+
title: Optional[str] = None,
|
|
38
|
+
sort_by: Optional[str] = None,
|
|
39
|
+
ascending: bool = False,
|
|
40
|
+
ax: Optional[Axes] = None):
|
|
41
|
+
|
|
42
|
+
_validate_panda(pd_df, cols=[label, x, y], sort_by=sort_by)
|
|
43
|
+
style.format_funcs=format_func(style.format_funcs,label=label,x=x,y=y)
|
|
44
|
+
pivot_df = pd.pivot_table(pd_df, values=y, index=[
|
|
45
|
+
x], columns=[label], aggfunc=agg)
|
|
46
|
+
# Reset index to make x a column again
|
|
47
|
+
pivot_df = pivot_df.reset_index()
|
|
48
|
+
|
|
49
|
+
if not ax:
|
|
50
|
+
ax = plt.gca()
|
|
51
|
+
|
|
52
|
+
# Plot each label's data
|
|
53
|
+
for column in pivot_df.columns[1:]:
|
|
54
|
+
_label = column
|
|
55
|
+
if style.format_funcs.get(column):
|
|
56
|
+
_label = style.format_funcs[column](column)
|
|
57
|
+
ax.bar(x=pivot_df[x],
|
|
58
|
+
height=pivot_df[column],
|
|
59
|
+
label=_label, alpha=0.7)
|
|
60
|
+
|
|
61
|
+
# Set labels and title
|
|
62
|
+
ax.set_ylabel(string_formatter(y))
|
|
63
|
+
ax.set_xlabel(string_formatter(x))
|
|
64
|
+
if title:
|
|
65
|
+
ax.set_title(f'{title}')
|
|
66
|
+
ax.legend(fontsize=style.font_size-2,
|
|
67
|
+
title_fontsize=style.font_size+2,
|
|
68
|
+
labelcolor='linecolor',
|
|
69
|
+
facecolor=style.background_color)
|
|
70
|
+
|
|
71
|
+
ax.tick_params(axis='x', rotation=90)
|
|
72
|
+
return ax
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def plot_lines(
|
|
76
|
+
data: pd.DataFrame,
|
|
77
|
+
label: str,
|
|
78
|
+
x: str,
|
|
79
|
+
y: Union[str, List[str]],
|
|
80
|
+
title: Optional[str] = None,
|
|
81
|
+
style: Optional[StyleTemplate] = PIVOTBARS_STYLE_TEMPLATE,
|
|
82
|
+
max_values: int = 4,
|
|
83
|
+
sort_by: Optional[str] = None,
|
|
84
|
+
ascending: bool = False,
|
|
85
|
+
ax: Optional[Axes] = None
|
|
86
|
+
) -> Axes:
|
|
87
|
+
|
|
88
|
+
if title is not None:
|
|
89
|
+
ax.set_title(title)
|
|
90
|
+
ax.figure.set_facecolor(style.background_color)
|
|
91
|
+
ax.figure.set_edgecolor(style.fig_border)
|
|
92
|
+
# Get the top n elements in the specified z
|
|
93
|
+
top_elements = data.groupby(
|
|
94
|
+
label)[y].sum().nlargest(max_values).index.tolist()
|
|
95
|
+
top_elements_df = data[data[label].isin(top_elements)]
|
|
96
|
+
y_min = 0
|
|
97
|
+
# Plot the time series lines for each of the top elements
|
|
98
|
+
for element in top_elements:
|
|
99
|
+
subset = top_elements_df[top_elements_df[label] == element]
|
|
100
|
+
y_min = min(y_min, subset[y].min())
|
|
101
|
+
ax.plot(subset[x], subset[y], label=element)
|
|
102
|
+
|
|
103
|
+
# Set x-axis date format and locator
|
|
104
|
+
if style.x_formatter is not None:
|
|
105
|
+
x_min = data[x].min()
|
|
106
|
+
x_max = data[x].max()
|
|
107
|
+
|
|
108
|
+
if style.x_formatter == "year_month_formatter":
|
|
109
|
+
ax.xaxis.set_major_locator(plt.matplotlib.dates.MonthLocator())
|
|
110
|
+
else:
|
|
111
|
+
ax.xaxis.set_major_formatter(
|
|
112
|
+
DynamicFuncFormatter(style.x_formatter))
|
|
113
|
+
ax.set_xticks(generate_ticks(
|
|
114
|
+
x_min, x_max, num_ticks=style.x_ticks))
|
|
115
|
+
|
|
116
|
+
ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
|
|
117
|
+
|
|
118
|
+
# Set title and labels
|
|
119
|
+
ax.set_xlabel(x)
|
|
120
|
+
y_max = data[y].dropna().quantile(0.95)
|
|
121
|
+
|
|
122
|
+
ax.set_ylim(y_min, y_max)
|
|
123
|
+
ax.set_ylabel(y)
|
|
124
|
+
if style.y_formatter is not None:
|
|
125
|
+
ax.yaxis.set_major_formatter(
|
|
126
|
+
DynamicFuncFormatter(style.y_formatter))
|
|
127
|
+
ax.set_yticks(generate_ticks(
|
|
128
|
+
y_min, y_max, num_ticks=style.y_ticks))
|
|
129
|
+
else:
|
|
130
|
+
ylabels = ['{:,.0f}%'.format(y) for y in ax.get_yticks()*100]
|
|
131
|
+
ax.yaxis.set_yticklabels(ylabels)
|
|
132
|
+
|
|
133
|
+
# Add legend and grid
|
|
134
|
+
ax.legend()
|
|
135
|
+
ax.grid(True)
|
|
136
|
+
|
|
137
|
+
return ax
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def plot_bars(ax: Axes,
|
|
141
|
+
data: pd.DataFrame,
|
|
142
|
+
x_col: str,
|
|
143
|
+
y_col: Union[str, List[str]],
|
|
144
|
+
style: Optional[StyleTemplate] = None,
|
|
145
|
+
fig_title: Optional[str] = None,
|
|
146
|
+
z_col: str = "browser",
|
|
147
|
+
n=5,
|
|
148
|
+
agg_func: str = 'sum') -> Axes:
|
|
149
|
+
|
|
150
|
+
# Validate inputs
|
|
151
|
+
|
|
152
|
+
if not isinstance(y_col, list):
|
|
153
|
+
y_col = [y_col, y_col] # Ensure y_col is a list
|
|
154
|
+
|
|
155
|
+
# Clear axis
|
|
156
|
+
ax.clear()
|
|
157
|
+
if style is None:
|
|
158
|
+
style = PIVOTBARS_STYLE_TEMPLATE
|
|
159
|
+
|
|
160
|
+
# Copying the da
|
|
161
|
+
df_plot = data.copy()
|
|
162
|
+
|
|
163
|
+
if df_plot.index.name != x_col:
|
|
164
|
+
df_plot.set_index(x_col, inplace=True)
|
|
165
|
+
print(df_plot.head())
|
|
166
|
+
|
|
167
|
+
# Set x-axis date format and locator
|
|
168
|
+
if style.x_formatter is not None:
|
|
169
|
+
if style.x_formatter == "year_month_formatter":
|
|
170
|
+
# Ensure the x_col is in datetime format
|
|
171
|
+
if not pd.api.types.is_datetime64_any_dtype(df_plot.index):
|
|
172
|
+
df_plot[x_col] = pd.to_datetime(df_plot[x_col])
|
|
173
|
+
df_plot.index = df_plot.index.to_pydatetime()
|
|
174
|
+
# Plot the data first
|
|
175
|
+
df_plot.plot(kind='bar', stacked=True, ax=ax)
|
|
176
|
+
|
|
177
|
+
ax.xaxis.set_major_locator(MonthLocator())
|
|
178
|
+
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m'))
|
|
179
|
+
|
|
180
|
+
# Convert the Pandas datetime64 objects to strings in 'Year-Month' format
|
|
181
|
+
formatted_dates = df_plot.index.strftime('%Y-%m')
|
|
182
|
+
# Find the index positions where the day is the first of the month
|
|
183
|
+
first_of_month_positions = [
|
|
184
|
+
i for i, date in enumerate(df_plot.index) if date.day == 1]
|
|
185
|
+
|
|
186
|
+
# Set x-ticks at the first of the month positions
|
|
187
|
+
ax.set_xticks(first_of_month_positions)
|
|
188
|
+
ax.set_xticklabels([formatted_dates[i]
|
|
189
|
+
for i in first_of_month_positions], rotation=45)
|
|
190
|
+
|
|
191
|
+
# Remove the blank space at the beginning
|
|
192
|
+
ax.set_xlim(left=0, right=len(df_plot.index) - 1)
|
|
193
|
+
|
|
194
|
+
else:
|
|
195
|
+
x_min = df_plot[x_col].min()
|
|
196
|
+
x_max = df_plot[x_col].max()
|
|
197
|
+
df_plot.plot(kind='bar', stacked=True, ax=ax)
|
|
198
|
+
ax.xaxis.set_major_formatter(
|
|
199
|
+
DynamicFuncFormatter(style.x_formatter))
|
|
200
|
+
ax.set_xticks(generate_ticks(
|
|
201
|
+
x_min, x_max, num_ticks=style.x_ticks))
|
|
202
|
+
else:
|
|
203
|
+
df_plot.plot(kind='bar', stacked=True, ax=ax)
|
|
204
|
+
|
|
205
|
+
# Apply custom y_formatter if provided
|
|
206
|
+
if style and style.y_formatter is not None:
|
|
207
|
+
ax.yaxis.set_major_formatter(DynamicFuncFormatter(style.y_formatter))
|
|
208
|
+
|
|
209
|
+
# Set title and labels
|
|
210
|
+
ax.set_title(fig_title if fig_title else "")
|
|
211
|
+
ax.set_xlabel(x_col)
|
|
212
|
+
ax.set_ylabel(y_col[0])
|
|
213
|
+
|
|
214
|
+
return ax
|
MatplotLibAPI/Style.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
|
|
4
|
+
from typing import List, Optional, Union, Dict, Callable
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
from matplotlib.dates import num2date
|
|
10
|
+
from matplotlib.ticker import FuncFormatter
|
|
11
|
+
|
|
12
|
+
# region Panda
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _validate_panda(pd_df: pd.DataFrame,
|
|
16
|
+
cols: List[str],
|
|
17
|
+
sort_by: Optional[str] = None):
|
|
18
|
+
_columns = cols.copy()
|
|
19
|
+
if sort_by and sort_by not in _columns:
|
|
20
|
+
_columns.append(sort_by)
|
|
21
|
+
for col in _columns:
|
|
22
|
+
if col not in pd_df.columns:
|
|
23
|
+
raise AttributeError(f"{col} is not a DataFrame's column")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def format_func(
|
|
27
|
+
format_funcs: Optional[Dict[str, Optional[Callable[[Union[int, float, str]], str]]]],
|
|
28
|
+
label: Optional[str] = None,
|
|
29
|
+
x: Optional[str] = None,
|
|
30
|
+
y: Optional[str] = None,
|
|
31
|
+
z: Optional[str] = None):
|
|
32
|
+
|
|
33
|
+
if label and "label" in format_funcs:
|
|
34
|
+
format_funcs[label] = format_funcs["label"]
|
|
35
|
+
if x and "x" in format_funcs:
|
|
36
|
+
format_funcs[x] = format_funcs["x"]
|
|
37
|
+
if y and "y" in format_funcs:
|
|
38
|
+
format_funcs[y] = format_funcs["y"]
|
|
39
|
+
if z and "z" in format_funcs:
|
|
40
|
+
format_funcs[z] = format_funcs["z"]
|
|
41
|
+
return format_funcs
|
|
42
|
+
|
|
43
|
+
# endregion
|
|
44
|
+
|
|
45
|
+
# region Style
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
MAX_RESULTS = 50
|
|
49
|
+
X_COL = "index"
|
|
50
|
+
Y_COL = "overlap"
|
|
51
|
+
Z_COL = "users"
|
|
52
|
+
FIG_SIZE = (19.2, 10.8)
|
|
53
|
+
BACKGROUND_COLOR = 'black'
|
|
54
|
+
TEXT_COLOR = 'white'
|
|
55
|
+
PALETTE = "Greys_r"
|
|
56
|
+
FONT_SIZE = 14
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class StyleTemplate:
|
|
61
|
+
background_color: str = BACKGROUND_COLOR
|
|
62
|
+
fig_border: str = BACKGROUND_COLOR
|
|
63
|
+
font_name: str = 'Arial'
|
|
64
|
+
font_size: int = FONT_SIZE
|
|
65
|
+
font_color: str = TEXT_COLOR
|
|
66
|
+
palette: str = PALETTE
|
|
67
|
+
legend: bool = True
|
|
68
|
+
xscale: Optional[str] = None
|
|
69
|
+
x_ticks: int = 10
|
|
70
|
+
yscale: Optional[str] = None
|
|
71
|
+
y_ticks: int = 5
|
|
72
|
+
format_funcs: Optional[Dict[str, Optional[Callable[[
|
|
73
|
+
Union[int, float, str]], str]]]] = None
|
|
74
|
+
col_widths: Optional[List[float]] = None
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def font_mapping(self):
|
|
78
|
+
return {0: self.font_size-3,
|
|
79
|
+
1: self.font_size-1,
|
|
80
|
+
2: self.font_size,
|
|
81
|
+
3: self.font_size+1,
|
|
82
|
+
4: self.font_size+3}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class DynamicFuncFormatter(FuncFormatter):
|
|
86
|
+
def __init__(self, func_name):
|
|
87
|
+
super().__init__(func_name)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def percent_formatter(val, pos: Optional[int] = None):
|
|
91
|
+
if val*100 <= 0.1: # For 0.1%
|
|
92
|
+
return f"{val*100:.2f}%"
|
|
93
|
+
elif val*100 <= 1: # For 1%
|
|
94
|
+
return f"{val*100:.1f}%"
|
|
95
|
+
else:
|
|
96
|
+
return f"{val*100:.0f}%"
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def bmk_formatter(val, pos: Optional[int] = None):
|
|
100
|
+
if val >= 1_000_000_000: # Billions
|
|
101
|
+
return f"{val / 1_000_000_000:.2f}B"
|
|
102
|
+
elif val >= 1_000_000: # Millions
|
|
103
|
+
return f"{val / 1_000_000:.1f}M"
|
|
104
|
+
elif val >= 1_000: # Thousands
|
|
105
|
+
return f"{val / 1_000:.1f}K"
|
|
106
|
+
else:
|
|
107
|
+
return f"{int(val)}"
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def integer_formatter(value, pos: Optional[int] = None):
|
|
111
|
+
return f"{int(value)}"
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def string_formatter(value, pos: Optional[int] = None):
|
|
115
|
+
return str(value).replace("-", " ").replace("_", " ").title()
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def yy_mm__formatter(x, pos: Optional[int] = None):
|
|
119
|
+
return num2date(x).strftime('%Y-%m')
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def yy_mm_dd__formatter(x, pos: Optional[int] = None):
|
|
123
|
+
return num2date(x).strftime('%Y-%m-%D')
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def percent_formatter(x, pos: Optional[int] = None):
|
|
127
|
+
return f"{x * 100:.0f}%"
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def generate_ticks(min_val, max_val, num_ticks="10"):
|
|
131
|
+
# Identify the type of the input
|
|
132
|
+
try:
|
|
133
|
+
min_val = float(min_val)
|
|
134
|
+
max_val = float(max_val)
|
|
135
|
+
is_date = False
|
|
136
|
+
except ValueError:
|
|
137
|
+
is_date = True
|
|
138
|
+
|
|
139
|
+
# Convert string inputs to appropriate numerical or date types
|
|
140
|
+
num_ticks = int(num_ticks)
|
|
141
|
+
|
|
142
|
+
if is_date:
|
|
143
|
+
min_val = pd.Timestamp(min_val).to_datetime64()
|
|
144
|
+
max_val = pd.Timestamp(max_val).to_datetime64()
|
|
145
|
+
data_range = (max_val - min_val).astype('timedelta64[D]').astype(int)
|
|
146
|
+
else:
|
|
147
|
+
data_range = max_val - min_val
|
|
148
|
+
|
|
149
|
+
# Calculate a nice step size
|
|
150
|
+
step_size = data_range / (num_ticks - 1)
|
|
151
|
+
|
|
152
|
+
# If date, convert back to datetime
|
|
153
|
+
if is_date:
|
|
154
|
+
ticks = pd.date_range(
|
|
155
|
+
start=min_val, periods=num_ticks, freq=f"{step_size}D")
|
|
156
|
+
else:
|
|
157
|
+
# Round the step size to a "nice" number
|
|
158
|
+
exponent = np.floor(np.log10(step_size))
|
|
159
|
+
fraction = step_size / 10**exponent
|
|
160
|
+
nice_fraction = round(fraction)
|
|
161
|
+
|
|
162
|
+
# Create nice step size
|
|
163
|
+
nice_step = nice_fraction * 10**exponent
|
|
164
|
+
|
|
165
|
+
# Generate the tick marks based on the nice step size
|
|
166
|
+
ticks = np.arange(min_val, max_val + nice_step, nice_step)
|
|
167
|
+
|
|
168
|
+
return ticks
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
# endregion
|
MatplotLibAPI/Table.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import matplotlib.pyplot as plt
|
|
4
|
+
from matplotlib.axes import Axes
|
|
5
|
+
from .Style import StyleTemplate, _validate_panda, string_formatter
|
|
6
|
+
|
|
7
|
+
TABLE_STYLE_TEMPLATE = StyleTemplate(
|
|
8
|
+
background_color='black',
|
|
9
|
+
fig_border='darkgrey',
|
|
10
|
+
font_color='white',
|
|
11
|
+
palette='magma'
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def plot_table(pd_df: pd.DataFrame,
|
|
16
|
+
cols: List[str],
|
|
17
|
+
title: Optional[str] = None,
|
|
18
|
+
style: StyleTemplate = TABLE_STYLE_TEMPLATE,
|
|
19
|
+
max_values: int = 20,
|
|
20
|
+
sort_by: Optional[str] = None,
|
|
21
|
+
ascending: bool = False,
|
|
22
|
+
ax: Optional[Axes] = None
|
|
23
|
+
) -> Axes:
|
|
24
|
+
_validate_panda(pd_df, cols=cols, sort_by=sort_by)
|
|
25
|
+
|
|
26
|
+
if not sort_by:
|
|
27
|
+
sort_by = cols[0]
|
|
28
|
+
|
|
29
|
+
plot_df = pd_df[cols].sort_values(
|
|
30
|
+
by=sort_by, ascending=ascending).head(max_values)
|
|
31
|
+
|
|
32
|
+
col_labels = cols
|
|
33
|
+
|
|
34
|
+
if style.format_funcs:
|
|
35
|
+
for col, func in style.format_funcs.items():
|
|
36
|
+
if col in plot_df.columns:
|
|
37
|
+
plot_df[col] = plot_df[col].apply(func)
|
|
38
|
+
|
|
39
|
+
def format_table(table):
|
|
40
|
+
table.auto_set_font_size(False)
|
|
41
|
+
table.set_fontsize(style.font_size)
|
|
42
|
+
table.scale(1.2, 1.2)
|
|
43
|
+
|
|
44
|
+
for key, cell in table.get_celld().items():
|
|
45
|
+
cell.set_fontsize(style.font_size)
|
|
46
|
+
cell.set_facecolor(style.background_color)
|
|
47
|
+
cell.get_text().set_color(style.font_color)
|
|
48
|
+
|
|
49
|
+
if ax is None:
|
|
50
|
+
ax = plt.gca()
|
|
51
|
+
|
|
52
|
+
table_plot = ax.table(
|
|
53
|
+
cellText=plot_df.values,
|
|
54
|
+
colLabels=[string_formatter(colLabel) for colLabel in col_labels],
|
|
55
|
+
cellLoc='center',
|
|
56
|
+
colWidths=style.col_widths,
|
|
57
|
+
bbox=[0, -0.3, 1, 1.3])
|
|
58
|
+
format_table(table_plot)
|
|
59
|
+
ax.set_facecolor(style.background_color)
|
|
60
|
+
ax.set_axis_off()
|
|
61
|
+
ax.grid(False)
|
|
62
|
+
if title:
|
|
63
|
+
ax.set_title(title, color=style.font_color, fontsize=style.font_size*2)
|
|
64
|
+
ax.title.set_position([0.5, 1.05])
|
|
65
|
+
return ax
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# Hint for Visual Code Python Interactive window
|
|
2
|
+
# %%
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
import matplotlib.pyplot as plt
|
|
6
|
+
from matplotlib.axes import Axes
|
|
7
|
+
import seaborn as sns
|
|
8
|
+
from .Style import DynamicFuncFormatter, StyleTemplate, string_formatter, _validate_panda, bmk_formatter,format_func
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
TIMESERIE_STYLE_TEMPLATE = StyleTemplate(
|
|
12
|
+
palette='rocket',
|
|
13
|
+
format_funcs={"y": bmk_formatter, "label": string_formatter}
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
# region Line
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def plot_timeserie(pd_df: pd.DataFrame,
|
|
20
|
+
label: str,
|
|
21
|
+
x: str,
|
|
22
|
+
y: str,
|
|
23
|
+
title: Optional[str] = None,
|
|
24
|
+
style: StyleTemplate = TIMESERIE_STYLE_TEMPLATE,
|
|
25
|
+
sort_by: Optional[str] = None,
|
|
26
|
+
ascending: bool = False,
|
|
27
|
+
ax: Optional[Axes] = None) -> Axes:
|
|
28
|
+
|
|
29
|
+
_validate_panda(pd_df, cols=[label, x, y], sort_by=sort_by)
|
|
30
|
+
style.format_funcs=format_func(style.format_funcs,label=label,x=x,y=y)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
df = pd_df[[label, x, y]].sort_values(by=[label, x])
|
|
34
|
+
df[x] = pd.to_datetime(df[x])
|
|
35
|
+
df.set_index(x, inplace=True)
|
|
36
|
+
|
|
37
|
+
sns.set_palette(style.palette)
|
|
38
|
+
# Colors for each group
|
|
39
|
+
colors = sns.color_palette(n_colors=len(df.columns))
|
|
40
|
+
if ax is None:
|
|
41
|
+
ax = plt.gca()
|
|
42
|
+
|
|
43
|
+
# Get unique dimension_types
|
|
44
|
+
label_types = df[label].unique()
|
|
45
|
+
|
|
46
|
+
# Colors for each group
|
|
47
|
+
colors = sns.color_palette(n_colors=len(label_types))
|
|
48
|
+
|
|
49
|
+
for label_type, color in zip(label_types, colors):
|
|
50
|
+
temp_df = df[df[label] == label_type].sort_values(by=x)
|
|
51
|
+
|
|
52
|
+
if style.format_funcs.get("label"):
|
|
53
|
+
label_type = style.format_funcs.get("label")(label_type)
|
|
54
|
+
|
|
55
|
+
ma = temp_df[y].rolling(window=7, min_periods=1).mean()
|
|
56
|
+
std_dev = temp_df[y].rolling(window=7, min_periods=1).std()
|
|
57
|
+
|
|
58
|
+
# Calculate the last moving average value to include in the legend
|
|
59
|
+
last_ma_value = ma.iloc[-1]
|
|
60
|
+
|
|
61
|
+
# Dynamically creating the legend label
|
|
62
|
+
label_str = f"{string_formatter(label_type)} (avg 7d: {style.format_funcs[y](last_ma_value)})"
|
|
63
|
+
|
|
64
|
+
# Plot moving average and include the last MA value in the label for the legend
|
|
65
|
+
plt.plot(temp_df.index, ma, color=color,
|
|
66
|
+
linestyle='--', label=label_str)
|
|
67
|
+
plt.fill_between(temp_df.index, ma - std_dev, ma +
|
|
68
|
+
std_dev, color=color, alpha=0.2, label='_nolegend_')
|
|
69
|
+
|
|
70
|
+
ax.legend(
|
|
71
|
+
title=label,
|
|
72
|
+
fontsize=style.font_size-4,
|
|
73
|
+
title_fontsize=style.font_size,
|
|
74
|
+
labelcolor='linecolor',
|
|
75
|
+
facecolor=style.background_color)
|
|
76
|
+
|
|
77
|
+
ax.set_xlabel(string_formatter(x), color=style.font_color)
|
|
78
|
+
if style.format_funcs.get("x"):
|
|
79
|
+
ax.xaxis.set_major_formatter(
|
|
80
|
+
DynamicFuncFormatter(style.format_funcs.get("x")))
|
|
81
|
+
ax.tick_params(axis='x', colors=style.font_color,
|
|
82
|
+
labelrotation=45, labelsize=style.font_size-4)
|
|
83
|
+
|
|
84
|
+
ax.set_ylabel(string_formatter(y), color=style.font_color)
|
|
85
|
+
if style.format_funcs.get("y"):
|
|
86
|
+
ax.yaxis.set_major_formatter(
|
|
87
|
+
DynamicFuncFormatter(style.format_funcs.get("y")))
|
|
88
|
+
ax.tick_params(axis='y', colors=style.font_color,
|
|
89
|
+
labelsize=style.font_size-4)
|
|
90
|
+
ax.set_facecolor(style.background_color)
|
|
91
|
+
ax.grid(True)
|
|
92
|
+
if title:
|
|
93
|
+
ax.set_title(title, color=style.font_color, fontsize=style.font_size+4)
|
|
94
|
+
return ax
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# endregion
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
from .Table import plot_table
|
|
4
|
+
from .Timeserie import plot_timeserie
|
|
5
|
+
from .Bubble import plot_bubble
|
|
6
|
+
from .Network import plot_network
|
|
7
|
+
from .Pivot import plot_pivotbar
|
|
8
|
+
from .Composite import plot_composite_bubble
|
|
9
|
+
from .pdAccessor import MatPlotLibAccessor
|
|
10
|
+
from .Style import StyleTemplate
|
|
11
|
+
|
|
12
|
+
__all__ = ["plot_bubble", "plot_timeserie", "plot_table", "plot_network",
|
|
13
|
+
"plot_pivotbar", "plot_composite_bubble", "StyleTemplate", "MatPlotLibAccessor"]
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
|
|
2
|
+
import logging
|
|
3
|
+
import warnings
|
|
4
|
+
from typing import Optional, List
|
|
5
|
+
from matplotlib.axes import Axes
|
|
6
|
+
from matplotlib.figure import Figure
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from .Style import StyleTemplate
|
|
9
|
+
from .Bubble import plot_bubble, BUBBLE_STYLE_TEMPLATE
|
|
10
|
+
from .Composite import plot_composite_bubble
|
|
11
|
+
from .Timeserie import plot_timeserie,TIMESERIE_STYLE_TEMPLATE
|
|
12
|
+
from .Table import plot_table, TABLE_STYLE_TEMPLATE
|
|
13
|
+
from .Network import (Graph)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
warnings.filterwarnings('ignore')
|
|
17
|
+
logging.getLogger().setLevel(logging.WARNING)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@pd.api.extensions.register_dataframe_accessor("mpl")
|
|
21
|
+
class MatPlotLibAccessor:
|
|
22
|
+
|
|
23
|
+
def __init__(self, pd_df: pd.DataFrame):
|
|
24
|
+
self._obj = pd_df
|
|
25
|
+
|
|
26
|
+
def plot_bubble(self,
|
|
27
|
+
label: str,
|
|
28
|
+
x: str,
|
|
29
|
+
y: str,
|
|
30
|
+
z: str,
|
|
31
|
+
title: str = "Test",
|
|
32
|
+
style: StyleTemplate = BUBBLE_STYLE_TEMPLATE,
|
|
33
|
+
max_values: int = 50,
|
|
34
|
+
center_to_mean: bool = False,
|
|
35
|
+
sort_by: Optional[str] = None,
|
|
36
|
+
ascending: bool = False) -> Axes:
|
|
37
|
+
|
|
38
|
+
return plot_bubble(pd_df=self._obj,
|
|
39
|
+
label=label,
|
|
40
|
+
x=x,
|
|
41
|
+
y=y,
|
|
42
|
+
z=z,
|
|
43
|
+
title=title,
|
|
44
|
+
style=style,
|
|
45
|
+
max_values=max_values,
|
|
46
|
+
center_to_mean=center_to_mean,
|
|
47
|
+
sort_by=sort_by,
|
|
48
|
+
ascending=ascending)
|
|
49
|
+
|
|
50
|
+
def plot_bubble_composite(self,
|
|
51
|
+
label: str,
|
|
52
|
+
x: str,
|
|
53
|
+
y: str,
|
|
54
|
+
z: str,
|
|
55
|
+
title: Optional[str] = None,
|
|
56
|
+
style: StyleTemplate = BUBBLE_STYLE_TEMPLATE,
|
|
57
|
+
max_values: int = 50,
|
|
58
|
+
center_to_mean: bool = False,
|
|
59
|
+
sort_by: Optional[str] = None,
|
|
60
|
+
ascending: bool = False) -> Figure:
|
|
61
|
+
|
|
62
|
+
return plot_composite_bubble(pd_df=self._obj,
|
|
63
|
+
label=label,
|
|
64
|
+
x=x,
|
|
65
|
+
y=y,
|
|
66
|
+
z=z,
|
|
67
|
+
title=title,
|
|
68
|
+
style=style,
|
|
69
|
+
max_values=max_values,
|
|
70
|
+
center_to_mean=center_to_mean,
|
|
71
|
+
sort_by=sort_by,
|
|
72
|
+
ascending=ascending)
|
|
73
|
+
|
|
74
|
+
def plot_table(self,
|
|
75
|
+
cols: List[str],
|
|
76
|
+
title: Optional[str] = None,
|
|
77
|
+
style: StyleTemplate = TABLE_STYLE_TEMPLATE,
|
|
78
|
+
max_values: int = 20,
|
|
79
|
+
sort_by: Optional[str] = None,
|
|
80
|
+
ascending: bool = False) -> Axes:
|
|
81
|
+
|
|
82
|
+
return plot_table(pd_df=self._obj,
|
|
83
|
+
cols=cols,
|
|
84
|
+
title=title,
|
|
85
|
+
style=style,
|
|
86
|
+
max_values=max_values,
|
|
87
|
+
sort_by=sort_by,
|
|
88
|
+
ascending=ascending)
|
|
89
|
+
|
|
90
|
+
def plot_timeserie(self,
|
|
91
|
+
label: str,
|
|
92
|
+
x: str,
|
|
93
|
+
y: str,
|
|
94
|
+
title: Optional[str] = None,
|
|
95
|
+
style: StyleTemplate = TIMESERIE_STYLE_TEMPLATE,
|
|
96
|
+
max_values: int = 20,
|
|
97
|
+
sort_by: Optional[str] = None,
|
|
98
|
+
ascending: bool = False) -> Axes:
|
|
99
|
+
|
|
100
|
+
return plot_timeserie(pd_df=self._obj,
|
|
101
|
+
label=label,
|
|
102
|
+
x=x,
|
|
103
|
+
y=y,
|
|
104
|
+
title=title,
|
|
105
|
+
style=style,
|
|
106
|
+
max_values=max_values,
|
|
107
|
+
sort_by=sort_by,
|
|
108
|
+
ascending=ascending)
|
|
109
|
+
|
|
110
|
+
def plot_network(self,
|
|
111
|
+
source: str = "source",
|
|
112
|
+
target: str = "target",
|
|
113
|
+
weight: str = "weight",
|
|
114
|
+
title: Optional[str] = None,
|
|
115
|
+
style: StyleTemplate = TIMESERIE_STYLE_TEMPLATE,
|
|
116
|
+
max_values: int = 20,
|
|
117
|
+
sort_by: Optional[str] = None,
|
|
118
|
+
ascending: bool = False) -> Axes:
|
|
119
|
+
|
|
120
|
+
graph = Graph.from_pandas_edgelist(df=self._obj,
|
|
121
|
+
source=source,
|
|
122
|
+
target=target,
|
|
123
|
+
weight=weight)
|
|
124
|
+
|
|
125
|
+
return graph.plotX(title, style)
|
|
@@ -1,13 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: MatplotLibAPI
|
|
3
|
-
Version:
|
|
3
|
+
Version: 3.0.0
|
|
4
4
|
Description-Content-Type: text/markdown
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Requires-Dist: pandas
|
|
7
7
|
Requires-Dist: matplotlib
|
|
8
8
|
Requires-Dist: seaborn
|
|
9
|
-
Requires-Dist: json
|
|
10
|
-
Requires-Dist: logging
|
|
11
9
|
Requires-Dist: scikit-learn
|
|
12
10
|
|
|
13
11
|
# MatplotLibAPI
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
MatplotLibAPI/Bubble.py,sha256=aYw8qcYF_cXJzAU7tIi5Cn7Y-sRfR8Cvm5RivMvVd5E,3860
|
|
2
|
+
MatplotLibAPI/Composite.py,sha256=1qP_9TEz58EDA-oEev21WfJh2mxAEVVkRqYsysvMw7E,2478
|
|
3
|
+
MatplotLibAPI/Network.py,sha256=fq1-LfDKy3oC5JzqU7IyZudgR2plmziigjpm2TzEA3U,13286
|
|
4
|
+
MatplotLibAPI/Pivot.py,sha256=8jRYdlvw8otrJ1hYBAaKQJoN84xe-YfroXmgRLykMmQ,7162
|
|
5
|
+
MatplotLibAPI/Style.py,sha256=xtw71VWHSQEvts1nJdKR0c_9Rqef6EGkyyvgdTK5M7c,4632
|
|
6
|
+
MatplotLibAPI/Table.py,sha256=Az5uX-ViqzJPTBym4QnUzn5PD5LEsVzQ9WohcSDFcK4,1990
|
|
7
|
+
MatplotLibAPI/Timeserie.py,sha256=KOAK6cewddQWLFOFy98Db3BTNjWfv9TfJlPiJDybK84,3386
|
|
8
|
+
MatplotLibAPI/__init__.py,sha256=-i49Ta56gER88f6R_BDOo-JiLc5UUGvxHg2aHdJv0PQ,457
|
|
9
|
+
MatplotLibAPI/pdAccessor.py,sha256=OT03Z8V7ijWux0984IIuQegyobCSg_bhyGhtdejVs9I,4765
|
|
10
|
+
MatplotLibAPI-3.0.0.dist-info/LICENSE,sha256=hMErKLb6YZR3lRR5zr-vxeFkvY69QAaafgSpZ5-P1dQ,1067
|
|
11
|
+
MatplotLibAPI-3.0.0.dist-info/METADATA,sha256=TOXB7fkQGBvJKUd5q-T4k1Il0OUmwLW40Q5iQ2_RQxk,250
|
|
12
|
+
MatplotLibAPI-3.0.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
13
|
+
MatplotLibAPI-3.0.0.dist-info/top_level.txt,sha256=MrzbBjDEW48Vb6YhQIqpFYGOhHzQnEIM5Qy2xy2iqew,14
|
|
14
|
+
MatplotLibAPI-3.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
MatplotLibAPI
|
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
MatplotLibAPI-1.0.dist-info/LICENSE,sha256=hMErKLb6YZR3lRR5zr-vxeFkvY69QAaafgSpZ5-P1dQ,1067
|
|
2
|
-
MatplotLibAPI-1.0.dist-info/METADATA,sha256=sdyXoT7TW1GNPfgrKSHF-Wfk2vO1zW58AFkI0rEkk0g,291
|
|
3
|
-
MatplotLibAPI-1.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
4
|
-
MatplotLibAPI-1.0.dist-info/top_level.txt,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
5
|
-
MatplotLibAPI-1.0.dist-info/RECORD,,
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
|
|
File without changes
|