MatplotLibAPI 1.0__tar.gz → 2.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- MatplotLibAPI-2.0.2/MatplotLibAPI/Bubble.py +108 -0
- MatplotLibAPI-2.0.2/MatplotLibAPI/Composite.py +109 -0
- MatplotLibAPI-2.0.2/MatplotLibAPI/Network.py +318 -0
- MatplotLibAPI-2.0.2/MatplotLibAPI/Pivot.py +224 -0
- MatplotLibAPI-2.0.2/MatplotLibAPI/Table.py +70 -0
- MatplotLibAPI-2.0.2/MatplotLibAPI/TimeSeries.py +136 -0
- MatplotLibAPI-2.0.2/MatplotLibAPI/Utils.py +204 -0
- MatplotLibAPI-2.0.2/MatplotLibAPI/__init__.py +0 -0
- {MatplotLibAPI-1.0 → MatplotLibAPI-2.0.2/MatplotLibAPI.egg-info}/PKG-INFO +1 -3
- MatplotLibAPI-2.0.2/MatplotLibAPI.egg-info/SOURCES.txt +17 -0
- {MatplotLibAPI-1.0 → MatplotLibAPI-2.0.2}/MatplotLibAPI.egg-info/requires.txt +0 -2
- MatplotLibAPI-2.0.2/MatplotLibAPI.egg-info/top_level.txt +1 -0
- {MatplotLibAPI-1.0/MatplotLibAPI.egg-info → MatplotLibAPI-2.0.2}/PKG-INFO +1 -3
- {MatplotLibAPI-1.0 → MatplotLibAPI-2.0.2}/setup.py +1 -3
- MatplotLibAPI-1.0/MatplotLibAPI.egg-info/SOURCES.txt +0 -9
- MatplotLibAPI-1.0/MatplotLibAPI.egg-info/top_level.txt +0 -1
- {MatplotLibAPI-1.0 → MatplotLibAPI-2.0.2}/LICENSE +0 -0
- {MatplotLibAPI-1.0 → MatplotLibAPI-2.0.2}/MatplotLibAPI.egg-info/dependency_links.txt +0 -0
- {MatplotLibAPI-1.0 → MatplotLibAPI-2.0.2}/README.md +0 -0
- {MatplotLibAPI-1.0 → MatplotLibAPI-2.0.2}/pyproject.toml +0 -0
- {MatplotLibAPI-1.0 → MatplotLibAPI-2.0.2}/setup.cfg +0 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional, Union
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import seaborn as sns
|
|
8
|
+
from sklearn.preprocessing import StandardScaler
|
|
9
|
+
|
|
10
|
+
from matplotlib.axes import Axes
|
|
11
|
+
|
|
12
|
+
from .Utils import (BUBBLE_STYLE_TEMPLATE, DynamicFuncFormatter,
|
|
13
|
+
StyleTemplate, generate_ticks)
|
|
14
|
+
|
|
15
|
+
# region Bubble
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def plot_bubble(ax: Axes,
|
|
19
|
+
data: pd.DataFrame,
|
|
20
|
+
x_col: str,
|
|
21
|
+
y_col: Union[str, List[str]],
|
|
22
|
+
fig_title: Optional[str] = None,
|
|
23
|
+
style: Optional[StyleTemplate] = None,
|
|
24
|
+
legend: bool = False,
|
|
25
|
+
z_col: str = "uniques",
|
|
26
|
+
hue_col: str = "uniques_quintile",
|
|
27
|
+
l_col: str = "dimension",
|
|
28
|
+
normalize_x: bool = True,
|
|
29
|
+
sizes: tuple = (20, 2000), **kwargs) -> Axes:
|
|
30
|
+
|
|
31
|
+
# Clear the axis before plotting
|
|
32
|
+
ax.clear()
|
|
33
|
+
|
|
34
|
+
# Start formatting
|
|
35
|
+
if fig_title is not None:
|
|
36
|
+
ax.set_title(fig_title)
|
|
37
|
+
if style is None:
|
|
38
|
+
style = BUBBLE_STYLE_TEMPLATE
|
|
39
|
+
ax.figure.set_facecolor(style.fig_background_color)
|
|
40
|
+
ax.figure.set_edgecolor(style.fig_border)
|
|
41
|
+
if normalize_x:
|
|
42
|
+
# Step 1: Standardize the data to have mean=0 and variance=1
|
|
43
|
+
scaler = StandardScaler()
|
|
44
|
+
scaled_data = scaler.fit_transform(data[[x_col]])
|
|
45
|
+
|
|
46
|
+
# Step 2: Find a scaling factor to confine data within [-100, 100]
|
|
47
|
+
scale_factor = 100 / np.max(np.abs(scaled_data))
|
|
48
|
+
|
|
49
|
+
# Apply scaling factor
|
|
50
|
+
scaled_data *= scale_factor
|
|
51
|
+
|
|
52
|
+
# Round to the nearest integer
|
|
53
|
+
data[f"{x_col}"] = np.round(scaled_data).astype(int)
|
|
54
|
+
|
|
55
|
+
if type(y_col) == list:
|
|
56
|
+
y_col = y_col[0]
|
|
57
|
+
|
|
58
|
+
g = sns.scatterplot(data=data,
|
|
59
|
+
x=x_col,
|
|
60
|
+
y=y_col,
|
|
61
|
+
size=z_col,
|
|
62
|
+
hue=hue_col,
|
|
63
|
+
palette=style.palette,
|
|
64
|
+
legend=legend,
|
|
65
|
+
sizes=sizes,
|
|
66
|
+
ax=ax)
|
|
67
|
+
|
|
68
|
+
g.set(yscale="log")
|
|
69
|
+
|
|
70
|
+
g.axes.xaxis.grid(True, "minor", linewidth=.25)
|
|
71
|
+
g.axes.yaxis.grid(True, "minor", linewidth=.25)
|
|
72
|
+
|
|
73
|
+
g.axes.axvline(x=0, linestyle='--')
|
|
74
|
+
|
|
75
|
+
y_min = data[y_col].min()
|
|
76
|
+
y_max = data[y_col].max()
|
|
77
|
+
if style.y_formatter is not None:
|
|
78
|
+
g.axes.yaxis.set_major_formatter(
|
|
79
|
+
DynamicFuncFormatter(style.y_formatter))
|
|
80
|
+
g.set_yticks(generate_ticks(y_min, y_max, num_ticks=style.y_ticks))
|
|
81
|
+
else:
|
|
82
|
+
ylabels = ['{:,.0f}%'.format(y) for y in g.get_yticks()*100]
|
|
83
|
+
g.set_yticklabels(ylabels)
|
|
84
|
+
|
|
85
|
+
y_mean = data[y_col].mean()
|
|
86
|
+
|
|
87
|
+
if style.x_formatter is not None:
|
|
88
|
+
x_min = data[x_col].min()
|
|
89
|
+
x_max = data[x_col].max()
|
|
90
|
+
g.xaxis.set_major_formatter(
|
|
91
|
+
DynamicFuncFormatter(style.x_formatter))
|
|
92
|
+
g.set_xticks(generate_ticks(x_min, x_max, num_ticks=style.x_ticks))
|
|
93
|
+
|
|
94
|
+
g.axes.xaxis.grid(True, "minor", linewidth=.25)
|
|
95
|
+
g.axes.yaxis.grid(True, "minor", linewidth=.25)
|
|
96
|
+
g.hlines(y=y_mean, xmin=x_min, xmax=x_max,
|
|
97
|
+
linestyle='--', colors=style.font_color)
|
|
98
|
+
|
|
99
|
+
for index, row in data.iterrows():
|
|
100
|
+
x = row[x_col]
|
|
101
|
+
y = row[y_col[0] if type(y_col) == List else y_col]
|
|
102
|
+
s = row[l_col]
|
|
103
|
+
g.text(x, y, s, horizontalalignment='center',
|
|
104
|
+
fontsize=style.font_size*row[hue_col], color=style.font_color)
|
|
105
|
+
|
|
106
|
+
return ax
|
|
107
|
+
|
|
108
|
+
# endregion
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# Hint for Visual Code Python Interactive window
|
|
2
|
+
# %%
|
|
3
|
+
import matplotlib.pyplot as plt
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from .Bubble import plot_bubble
|
|
6
|
+
from .Table import plot_table
|
|
7
|
+
from typing import Callable, Dict, Optional, List
|
|
8
|
+
from matplotlib.axes import Axes
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def plot_composite(data: pd.DataFrame,
|
|
12
|
+
sort_column: str,
|
|
13
|
+
mappings: Dict[str, Callable],
|
|
14
|
+
num_rows: int = 10,
|
|
15
|
+
font_size: int = 12,
|
|
16
|
+
fig_title: str = 'Bubble Plot',
|
|
17
|
+
fig_background_color: str = 'skyblue',
|
|
18
|
+
fig_border: str = 'steelblue',
|
|
19
|
+
font_name: str = 'Arial',
|
|
20
|
+
font_color: str = 'black') -> None:
|
|
21
|
+
|
|
22
|
+
data['uniques_quintile'] = pd.qcut(data['uniques'], 5, labels=False)
|
|
23
|
+
text_size_mapping = {0: 8, 1: 9, 2: 10, 3: 12, 4: 14}
|
|
24
|
+
data["font_size"] = data['uniques_quintile'].map(text_size_mapping)
|
|
25
|
+
|
|
26
|
+
data['audience_quintile'] = pd.qcut(data['audience'], 5, labels=False)
|
|
27
|
+
data['INDEX_quintile'] = pd.qcut(data['INDEX'], 5, labels=False)
|
|
28
|
+
# Adjust font size for better readability
|
|
29
|
+
plt.rc('font', size=font_size)
|
|
30
|
+
|
|
31
|
+
fig = plt.figure("Graph", figsize=(10, 10))
|
|
32
|
+
|
|
33
|
+
axgrid = fig.add_gridspec(5, 4)
|
|
34
|
+
|
|
35
|
+
ax0 = fig.add_subplot(axgrid[0:3, :])
|
|
36
|
+
plot_bubble(ax=ax0,
|
|
37
|
+
data=data,
|
|
38
|
+
font_size=font_size,
|
|
39
|
+
fig_background_color=fig_background_color,
|
|
40
|
+
fig_border=fig_border,
|
|
41
|
+
font_name=font_name)
|
|
42
|
+
ax0.set_title(fig_title) # Add title
|
|
43
|
+
ax0.set_axis_off()
|
|
44
|
+
|
|
45
|
+
ax1 = fig.add_subplot(axgrid[3:, :2])
|
|
46
|
+
top_10 = data.sort_values(by="INDEX", ascending=False).head(10)
|
|
47
|
+
|
|
48
|
+
plot_table(ax=ax1,
|
|
49
|
+
data=top_10,
|
|
50
|
+
mappings=mappings,
|
|
51
|
+
sort_column=sort_column,
|
|
52
|
+
num_rows=num_rows,
|
|
53
|
+
fig_background_color=fig_background_color,
|
|
54
|
+
fig_border=fig_border,
|
|
55
|
+
font_name=font_name,
|
|
56
|
+
font_size=font_size,
|
|
57
|
+
font_color=font_color)
|
|
58
|
+
ax1.set_title('Top Items') # Add title
|
|
59
|
+
ax1.set_axis_off()
|
|
60
|
+
|
|
61
|
+
ax2 = fig.add_subplot(axgrid[3:, 2:])
|
|
62
|
+
worst_10 = data.sort_values(by="INDEX").head(10)
|
|
63
|
+
plot_table(ax=ax2,
|
|
64
|
+
data=worst_10,
|
|
65
|
+
mappings=mappings,
|
|
66
|
+
sort_column=sort_column,
|
|
67
|
+
num_rows=num_rows,
|
|
68
|
+
fig_background_color=fig_background_color,
|
|
69
|
+
fig_border=fig_border,
|
|
70
|
+
font_name=font_name,
|
|
71
|
+
font_size=font_size,
|
|
72
|
+
sort_ascending=True)
|
|
73
|
+
ax2.set_title('Worst Items') # Add title
|
|
74
|
+
ax2.set_axis_off()
|
|
75
|
+
|
|
76
|
+
fig.tight_layout()
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def plot_composite_12(plot_func1, plot_func2, plot_func3,
|
|
80
|
+
data1, data2, data3,
|
|
81
|
+
metrics1, metrics2, metrics3,
|
|
82
|
+
highlights: Optional[List[str]] = None,
|
|
83
|
+
font_size: int = 12,
|
|
84
|
+
fig_title: str = 'Bubble Plot',
|
|
85
|
+
fig_background_color: str = 'skyblue',
|
|
86
|
+
fig_border: str = 'steelblue',
|
|
87
|
+
font_name: str = 'Arial',
|
|
88
|
+
font_color: str = 'black') -> None:
|
|
89
|
+
|
|
90
|
+
# Create a new figure and define the grid
|
|
91
|
+
fig = plt.figure(fig_title, figsize=(10, 10))
|
|
92
|
+
axgrid = fig.add_gridspec(5, 4)
|
|
93
|
+
|
|
94
|
+
# Create individual axes based on the grid
|
|
95
|
+
ax0 = fig.add_subplot(axgrid[0:3, :])
|
|
96
|
+
ax1 = fig.add_subplot(axgrid[3:, :2])
|
|
97
|
+
ax2 = fig.add_subplot(axgrid[3:, 2:])
|
|
98
|
+
|
|
99
|
+
# Call the individual plot functions with the respective axes and data
|
|
100
|
+
plot_func1(ax=ax0, data=data1, metrics=metrics1, highlights=highlights, font_size=font_size, fig_background_color=fig_background_color,
|
|
101
|
+
fig_border=fig_border, font_name=font_name, font_color=font_color)
|
|
102
|
+
plot_func2(ax=ax1, data=data2, metrics=metrics2, highlights=highlights, font_size=font_size, fig_background_color=fig_background_color,
|
|
103
|
+
fig_border=fig_border, font_name=font_name, font_color=font_color)
|
|
104
|
+
plot_func3(ax=ax2, data=data3, metrics=metrics3, highlights=highlights, font_size=font_size, fig_background_color=fig_background_color,
|
|
105
|
+
fig_border=fig_border, font_name=font_name, font_color=font_color)
|
|
106
|
+
|
|
107
|
+
fig.suptitle(fig_title, fontsize=16)
|
|
108
|
+
fig.tight_layout()
|
|
109
|
+
return fig
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from collections import defaultdict
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
5
|
+
|
|
6
|
+
import matplotlib.pyplot as plt
|
|
7
|
+
import networkx as nx
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from networkx import Graph
|
|
11
|
+
from networkx.classes.graph import Graph
|
|
12
|
+
|
|
13
|
+
DEFAULT = {"MAX_EDGES": 100,
|
|
14
|
+
"MAX_NODES": 30,
|
|
15
|
+
"MIN_NODE_SIZE": 100,
|
|
16
|
+
"MAX_NODE_SIZE": 2000,
|
|
17
|
+
"MAX_EDGE_WIDTH": 10,
|
|
18
|
+
"GRAPH_SCALE": 2,
|
|
19
|
+
"MAX_FONT_SIZE": 12,
|
|
20
|
+
"MIN_FONT_SIZE": 8
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def softmax(x):
|
|
25
|
+
return (np.exp(x - np.max(x)) / np.exp(x - np.max(x)).sum())
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def scale_weights(weights, scale_min=0,scale_max=1):
|
|
29
|
+
deciles = np.percentile(weights, [10, 20, 30, 40, 50, 60, 70, 80, 90])
|
|
30
|
+
outs = np.searchsorted(deciles, weights)
|
|
31
|
+
return [out * (scale_max-scale_min)/len(deciles)+scale_min for out in outs]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class NodeView(nx.classes.reportviews.NodeView):
|
|
35
|
+
def sort(self,
|
|
36
|
+
attribute: Optional[str] = 'weight',
|
|
37
|
+
reverse: Optional[bool] = True):
|
|
38
|
+
# Sort the nodes based on the specified attribute
|
|
39
|
+
sorted_nodes = sorted(self,
|
|
40
|
+
key=lambda node: self[node][attribute],
|
|
41
|
+
reverse=reverse)
|
|
42
|
+
return sorted_nodes
|
|
43
|
+
|
|
44
|
+
def filter(self, attribute: str, value: str):
|
|
45
|
+
# Filter the nodes based on the specified attribute and value
|
|
46
|
+
filtered_nodes = [
|
|
47
|
+
node for node in self if attribute in self[node] and self[node][attribute] == value]
|
|
48
|
+
return filtered_nodes
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class AdjacencyView(nx.classes.coreviews.AdjacencyView):
|
|
52
|
+
def sort(self,
|
|
53
|
+
attribute: Optional[str] = 'weight',
|
|
54
|
+
reverse: Optional[bool] = True):
|
|
55
|
+
# Sort the nodes based on the specified attribute
|
|
56
|
+
sorted_nodes = sorted(self,
|
|
57
|
+
key=lambda node: self[node][attribute],
|
|
58
|
+
reverse=reverse)
|
|
59
|
+
return sorted_nodes
|
|
60
|
+
|
|
61
|
+
def filter(self, attribute: str, value: str):
|
|
62
|
+
# Filter the nodes based on the specified attribute and value
|
|
63
|
+
filtered_nodes = [
|
|
64
|
+
node for node in self if attribute in self[node] and self[node][attribute] == value]
|
|
65
|
+
return filtered_nodes
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class EdgeView(nx.classes.reportviews.EdgeView):
|
|
69
|
+
def sort(self,
|
|
70
|
+
reverse: Optional[bool] = True,
|
|
71
|
+
attribute: Optional[str] = 'weight'):
|
|
72
|
+
sorted_edges = sorted(self(data=True),
|
|
73
|
+
key=lambda t: t[2].get(attribute, 1),
|
|
74
|
+
reverse=reverse)
|
|
75
|
+
return {(u, v): _ for u, v, _ in sorted_edges}
|
|
76
|
+
|
|
77
|
+
def filter(self, attribute: str, value: str):
|
|
78
|
+
# Filter the edges based on the specified attribute and value
|
|
79
|
+
filtered_edges = [
|
|
80
|
+
edge for edge in self if attribute in self[edge] and self[edge][attribute] == value]
|
|
81
|
+
return [(edge[0], edge[1]) for edge in filtered_edges]
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class Graph(nx.Graph):
|
|
85
|
+
"""
|
|
86
|
+
Custom graph class based on NetworkX's Graph class.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
def __init__(self):
|
|
90
|
+
super().__init__()
|
|
91
|
+
self._scale = 1.0
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def scale(self) -> float:
|
|
95
|
+
return self._scale
|
|
96
|
+
|
|
97
|
+
@scale.setter
|
|
98
|
+
def scale(self, value: float):
|
|
99
|
+
self._scale = value
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def nodes(self):
|
|
103
|
+
return NodeView(self)
|
|
104
|
+
|
|
105
|
+
@nodes.setter
|
|
106
|
+
def scale(self, value: NodeView):
|
|
107
|
+
self.nodes = value
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def edges(self):
|
|
111
|
+
return EdgeView(self)
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def adjacency(self):
|
|
115
|
+
return AdjacencyView(list(self))
|
|
116
|
+
|
|
117
|
+
def edge_subgraph(self, edges: Iterable) -> Graph:
|
|
118
|
+
return nx.edge_subgraph(self, edges)
|
|
119
|
+
|
|
120
|
+
def layout(self,
|
|
121
|
+
max_node_size: int = DEFAULT["MAX_NODES"],
|
|
122
|
+
min_node_size: int = DEFAULT["MAX_NODES"],
|
|
123
|
+
max_edge_width: int = DEFAULT["MAX_EDGE_WIDTH"],
|
|
124
|
+
max_font_size: int = DEFAULT["MAX_FONT_SIZE"],
|
|
125
|
+
min_font_size: int = DEFAULT["MIN_FONT_SIZE"]):
|
|
126
|
+
"""
|
|
127
|
+
Calculates the sizes for nodes, edges, and fonts based on node weights and edge weights.
|
|
128
|
+
|
|
129
|
+
Parameters:
|
|
130
|
+
- max_node_size (int): Maximum size for nodes (default: 300).
|
|
131
|
+
- max_edge_width (int): Maximum width for edges (default: 10).
|
|
132
|
+
- max_font_size (int): Maximum font size for node labels (default: 18).
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
- Tuple[List[int], List[int], Dict[int, List[str]]]: A tuple containing the node sizes, edge widths,
|
|
136
|
+
and font sizes for node labels.
|
|
137
|
+
"""
|
|
138
|
+
# Normalize and scale nodes' weights within the desired range of edge widths
|
|
139
|
+
node_weights = [data.get('weight', 1)
|
|
140
|
+
for node, data in self.nodes(data=True)]
|
|
141
|
+
node_size = scale_weights(
|
|
142
|
+
weights=node_weights, scale_max=max_node_size, scale_min=min_node_size)
|
|
143
|
+
|
|
144
|
+
# Normalize and scale edges' weights within the desired range of edge widths
|
|
145
|
+
edge_weights = [data.get('weight', 0)
|
|
146
|
+
for _, _, data in self.edges(data=True)]
|
|
147
|
+
edges_width = scale_weights(
|
|
148
|
+
weights=edge_weights, scale_max=max_edge_width)
|
|
149
|
+
|
|
150
|
+
# Scale the normalized node weights within the desired range of font sizes
|
|
151
|
+
node_size_dict = dict(zip(self.nodes, scale_weights(
|
|
152
|
+
weights=node_weights, scale_max=max_font_size, scale_min=min_font_size)))
|
|
153
|
+
fonts_size = defaultdict(list)
|
|
154
|
+
for node, width in node_size_dict.items():
|
|
155
|
+
fonts_size[int(width)].append(node)
|
|
156
|
+
fonts_size = dict(fonts_size)
|
|
157
|
+
|
|
158
|
+
return node_size, edges_width, fonts_size
|
|
159
|
+
|
|
160
|
+
def subgraphX(self, node_list=None, max_edges: int = DEFAULT["MAX_EDGES"]):
|
|
161
|
+
if node_list is None:
|
|
162
|
+
node_list = self.nodes.sort("weight")[:DEFAULT["MAX_NODES"]]
|
|
163
|
+
connected_subgraph_nodes=list(self.find_connected_subgraph())
|
|
164
|
+
node_list = [node for node in node_list if node in connected_subgraph_nodes]
|
|
165
|
+
|
|
166
|
+
subgraph = nx.subgraph(
|
|
167
|
+
self, nbunch=node_list)
|
|
168
|
+
edges = subgraph.top_k_edges(attribute="weight", k=5).keys()
|
|
169
|
+
subgraph = subgraph.edge_subgraph(list(edges)[:max_edges])
|
|
170
|
+
return subgraph
|
|
171
|
+
|
|
172
|
+
def plotX(self):
|
|
173
|
+
"""
|
|
174
|
+
Plots the degree distribution of the graph, including a degree rank plot and a degree histogram.
|
|
175
|
+
"""
|
|
176
|
+
degree_sequence = sorted([d for n, d in self.degree()], reverse=True)
|
|
177
|
+
dmax = max(degree_sequence)
|
|
178
|
+
|
|
179
|
+
fig, ax = plt.subplots()
|
|
180
|
+
|
|
181
|
+
node_sizes, edge_widths, font_sizes = self.layout(
|
|
182
|
+
DEFAULT["MAX_NODE_SIZE"], DEFAULT["MAX_EDGE_WIDTH"], 14)
|
|
183
|
+
pos = nx.spring_layout(self, k=1)
|
|
184
|
+
# nodes
|
|
185
|
+
nx.draw_networkx_nodes(self,
|
|
186
|
+
pos,
|
|
187
|
+
ax=ax,
|
|
188
|
+
node_size=list(node_sizes),
|
|
189
|
+
# node_color=list(node_sizes.values()),
|
|
190
|
+
cmap=plt.cm.Blues)
|
|
191
|
+
# edges
|
|
192
|
+
nx.draw_networkx_edges(self,
|
|
193
|
+
pos,
|
|
194
|
+
ax=ax,
|
|
195
|
+
alpha=0.4,
|
|
196
|
+
width=edge_widths)
|
|
197
|
+
# labels
|
|
198
|
+
for font_size, nodes in font_sizes.items():
|
|
199
|
+
nx.draw_networkx_labels(
|
|
200
|
+
self,
|
|
201
|
+
pos,
|
|
202
|
+
ax=ax,
|
|
203
|
+
font_size=font_size,
|
|
204
|
+
labels={n: n for n in nodes},
|
|
205
|
+
alpha=0.4)
|
|
206
|
+
|
|
207
|
+
ax.set_title(self.name)
|
|
208
|
+
ax.set_axis_off()
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
fig.tight_layout()
|
|
213
|
+
return fig
|
|
214
|
+
|
|
215
|
+
def analysis(self, node_list: Optional[List] = None,
|
|
216
|
+
scale: int = DEFAULT["GRAPH_SCALE"],
|
|
217
|
+
node_scale: int = DEFAULT["MAX_NODE_SIZE"],
|
|
218
|
+
edge_scale: float = DEFAULT["MAX_EDGE_WIDTH"],
|
|
219
|
+
max_nodes: int = DEFAULT["MAX_NODES"],
|
|
220
|
+
max_edges: int = DEFAULT["MAX_EDGES"],
|
|
221
|
+
plt_title: Optional[str] = "Top keywords"):
|
|
222
|
+
# node_list=self.nodes_circuits(node_list)
|
|
223
|
+
g = self.subgraphX(max_edges=max_edges, node_list=node_list)
|
|
224
|
+
connected_components = nx.connected_components(g)
|
|
225
|
+
for connected_component in connected_components:
|
|
226
|
+
if len(connected_component) > 5:
|
|
227
|
+
connected_component_graph = self.subgraphX(max_edges=max_edges,
|
|
228
|
+
node_list=connected_component)
|
|
229
|
+
connected_component_graph.plotX()
|
|
230
|
+
|
|
231
|
+
def find_connected_subgraph(self):
|
|
232
|
+
logging.info(f'find_connected_subgraph')
|
|
233
|
+
# Copy the original graph to avoid modifying it
|
|
234
|
+
H = self.copy()
|
|
235
|
+
|
|
236
|
+
# Flag to keep track of whether any node with degree < 2 was removed
|
|
237
|
+
removed_node = True
|
|
238
|
+
|
|
239
|
+
while removed_node:
|
|
240
|
+
removed_node = False
|
|
241
|
+
# Iterate over the nodes
|
|
242
|
+
for node in list(H.nodes):
|
|
243
|
+
if H.degree(node) < 2:
|
|
244
|
+
# Remove the node and its incident edges
|
|
245
|
+
logging.info(f'Removing the {node} node and its incident edges')
|
|
246
|
+
H.remove_node(node)
|
|
247
|
+
removed_node = True
|
|
248
|
+
break
|
|
249
|
+
|
|
250
|
+
return H
|
|
251
|
+
def top_k_edges(self, attribute: str, reverse: bool = True, k: int = 5) -> Dict[Any, List[Tuple[Any, Dict]]]:
|
|
252
|
+
"""
|
|
253
|
+
Returns the top k edges per node based on the given attribute.
|
|
254
|
+
|
|
255
|
+
Parameters:
|
|
256
|
+
attribute (str): The attribute name to be used for sorting.
|
|
257
|
+
reverse (bool): Flag indicating whether to sort in reverse order (default: True).
|
|
258
|
+
k (int): Number of top edges to return per node.
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
Dict[Any, List[Tuple[Any, Dict]]]: A dictionary where the key is a node
|
|
262
|
+
and the value is a list of top k edges for that node. Each edge is represented
|
|
263
|
+
as a tuple where the first element is the adjacent node and the second element
|
|
264
|
+
is a dictionary of edge attributes.
|
|
265
|
+
"""
|
|
266
|
+
top_list = {}
|
|
267
|
+
for node in self.nodes:
|
|
268
|
+
edges = self.edges(node, data=True)
|
|
269
|
+
edges_sorted = sorted(edges, key=lambda x: x[2].get(
|
|
270
|
+
attribute, 0), reverse=reverse)
|
|
271
|
+
top_k_edges = edges_sorted[:k]
|
|
272
|
+
for u, v, data in top_k_edges:
|
|
273
|
+
edge_key = (u, v)
|
|
274
|
+
top_list[edge_key] = data[attribute]
|
|
275
|
+
return top_list
|
|
276
|
+
|
|
277
|
+
@staticmethod
|
|
278
|
+
def from_pandas_edgelist(df,
|
|
279
|
+
source: Optional[str] = "source",
|
|
280
|
+
target: Optional[str] = "target",
|
|
281
|
+
weight: Optional[str] = "weight"):
|
|
282
|
+
"""
|
|
283
|
+
Initialize netX instance with a simple dataframe
|
|
284
|
+
|
|
285
|
+
:param df_source: DataFrame containing network data.
|
|
286
|
+
:param source: Name of source nodes column in df_source.
|
|
287
|
+
:param target: Name of target nodes column in df_source.
|
|
288
|
+
:param weight: Name of edges weight column in df_source.
|
|
289
|
+
|
|
290
|
+
"""
|
|
291
|
+
G = Graph()
|
|
292
|
+
G = nx.from_pandas_edgelist(
|
|
293
|
+
df, source=source, target=target, edge_attr=weight, create_using=G)
|
|
294
|
+
G=G.find_connected_subgraph()
|
|
295
|
+
|
|
296
|
+
edge_aggregates = G.top_k_edges(attribute=weight, k=10)
|
|
297
|
+
node_aggregates = {}
|
|
298
|
+
for (u, v), weight_value in edge_aggregates.items():
|
|
299
|
+
if u not in node_aggregates:
|
|
300
|
+
node_aggregates[u] = 0
|
|
301
|
+
if v not in node_aggregates:
|
|
302
|
+
node_aggregates[v] = 0
|
|
303
|
+
node_aggregates[u] += weight_value
|
|
304
|
+
node_aggregates[v] += weight_value
|
|
305
|
+
|
|
306
|
+
nx.set_node_attributes(G, node_aggregates, name=weight)
|
|
307
|
+
|
|
308
|
+
G = G.edge_subgraph(edges=G.top_k_edges(attribute=weight))
|
|
309
|
+
return G
|
|
310
|
+
|
|
311
|
+
def plot_network(data:pd.DataFrame):
|
|
312
|
+
graph = Graph.from_pandas_edgelist(data)
|
|
313
|
+
graph = graph.subgraphX()
|
|
314
|
+
return graph.analysis()
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
from typing import List, Optional, Union
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
import matplotlib.dates as mdates
|
|
6
|
+
import matplotlib.pyplot as plt
|
|
7
|
+
from matplotlib.axes import Axes
|
|
8
|
+
from matplotlib.dates import DateFormatter, MonthLocator
|
|
9
|
+
|
|
10
|
+
from .Utils import (PIVOTBARS_STYLE_TEMPLATE, PIVOTLINES_STYLE_TEMPLATE,
|
|
11
|
+
DynamicFuncFormatter, StyleTemplate, generate_ticks)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def plot_pivotbar(data, metric, n_top, title):
|
|
15
|
+
# Sort the data by metric column in descending order
|
|
16
|
+
data_sorted = data.sort_values(by=metric, ascending=False)
|
|
17
|
+
|
|
18
|
+
# Select the top rows
|
|
19
|
+
top_rows = data_sorted.head(n_top)
|
|
20
|
+
|
|
21
|
+
# Plotting the top 50 data points with tag labels
|
|
22
|
+
fig, ax = plt.subplots(figsize=(12, 6))
|
|
23
|
+
|
|
24
|
+
# Plot the 'Used' data points (where Used == 1) in green
|
|
25
|
+
used_data = top_rows[top_rows['used'] == 1]
|
|
26
|
+
ax.bar(used_data.tag, used_data[metric],
|
|
27
|
+
color='green', label='Used', alpha=0.7)
|
|
28
|
+
|
|
29
|
+
# Plot the 'Not Used' data points (where Used == 0) in red
|
|
30
|
+
not_used_data = top_rows[top_rows['used'] == 0]
|
|
31
|
+
ax.bar(not_used_data.tag, not_used_data[metric],
|
|
32
|
+
color='red', label='Not Used', alpha=0.7)
|
|
33
|
+
|
|
34
|
+
# Set labels and title
|
|
35
|
+
ax.set_ylabel('UVs')
|
|
36
|
+
ax.set_title(f'{title}\nTop {n_top} tags')
|
|
37
|
+
ax.legend()
|
|
38
|
+
|
|
39
|
+
ax.tick_params(axis='x', rotation=90)
|
|
40
|
+
return fig
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def plot_lines(ax: Axes,
|
|
44
|
+
data: pd.DataFrame,
|
|
45
|
+
x_col: str,
|
|
46
|
+
y_col: Union[str, List[str]],
|
|
47
|
+
style: Optional[StyleTemplate] = None,
|
|
48
|
+
fig_title: Optional[str] = None,
|
|
49
|
+
n_top: int = 4,
|
|
50
|
+
z_col: str = "browser") -> Axes:
|
|
51
|
+
"""
|
|
52
|
+
This function plots time series lines for the top n elements in the specified dimension.
|
|
53
|
+
|
|
54
|
+
Parameters:
|
|
55
|
+
ax (matplotlib.axes._axes.Axes): The ax to plot on.
|
|
56
|
+
data (pd.DataFrame): The data to plot.
|
|
57
|
+
metrics (Union[str, List[str]]): The column name(s) in data to plot.
|
|
58
|
+
date_col (str): The column name containing the date information.
|
|
59
|
+
... (other parameters): Various parameters to customize the plot.
|
|
60
|
+
date_format (str): The format of the date to display on the x-axis.
|
|
61
|
+
date_locator (matplotlib.dates.Locator): Locator object to determine the date ticks on the x-axis.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
ax (matplotlib.axes._axes.Axes): The ax with the plot.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
# Validate inputs
|
|
68
|
+
if x_col not in data.columns:
|
|
69
|
+
raise ValueError(f"'{x_col}' column not found in the data")
|
|
70
|
+
if not isinstance(y_col, list) and not isinstance(y_col, str):
|
|
71
|
+
raise TypeError("'metrics' should be a string or a list of strings")
|
|
72
|
+
if isinstance(y_col, list) and not len(y_col) >= 2:
|
|
73
|
+
raise ValueError(
|
|
74
|
+
f"metrics should be 2 of lengths column not found in the data")
|
|
75
|
+
ax.clear()
|
|
76
|
+
if fig_title is not None:
|
|
77
|
+
ax.set_title(fig_title)
|
|
78
|
+
if style is None:
|
|
79
|
+
style = PIVOTLINES_STYLE_TEMPLATE
|
|
80
|
+
ax.figure.set_facecolor(style.fig_background_color)
|
|
81
|
+
ax.figure.set_edgecolor(style.fig_border)
|
|
82
|
+
|
|
83
|
+
display_metric = y_col[0]
|
|
84
|
+
sort_metric = y_col[1]
|
|
85
|
+
# Get the top n elements in the specified z
|
|
86
|
+
top_elements = data.groupby(
|
|
87
|
+
z_col)[sort_metric].sum().nlargest(n_top).index.tolist()
|
|
88
|
+
top_elements_df = data[data[z_col].isin(top_elements)]
|
|
89
|
+
y_min = 0
|
|
90
|
+
# Plot the time series lines for each of the top elements
|
|
91
|
+
for element in top_elements:
|
|
92
|
+
subset = top_elements_df[top_elements_df[z_col] == element]
|
|
93
|
+
# Define the line style based on the element name
|
|
94
|
+
if element == "Chrome":
|
|
95
|
+
line_style = '-'
|
|
96
|
+
color = 'green'
|
|
97
|
+
elif element == "Android Webview":
|
|
98
|
+
line_style = '--'
|
|
99
|
+
color = 'green'
|
|
100
|
+
elif element == "Safari":
|
|
101
|
+
line_style = '-'
|
|
102
|
+
color = 'red'
|
|
103
|
+
elif element == "Safari (in-app)":
|
|
104
|
+
line_style = '--'
|
|
105
|
+
color = 'red'
|
|
106
|
+
else:
|
|
107
|
+
line_style = '-'
|
|
108
|
+
color = 'black'
|
|
109
|
+
y_min = min(y_min, subset[display_metric].min())
|
|
110
|
+
|
|
111
|
+
ax.plot(subset[x_col], subset[display_metric], label=element)
|
|
112
|
+
|
|
113
|
+
# Set x-axis date format and locator
|
|
114
|
+
if style.x_formatter is not None:
|
|
115
|
+
x_min = data[x_col].min()
|
|
116
|
+
x_max = data[x_col].max()
|
|
117
|
+
|
|
118
|
+
if style.x_formatter == "year_month_formatter":
|
|
119
|
+
ax.xaxis.set_major_locator(plt.matplotlib.dates.MonthLocator())
|
|
120
|
+
else:
|
|
121
|
+
ax.xaxis.set_major_formatter(
|
|
122
|
+
DynamicFuncFormatter(style.x_formatter))
|
|
123
|
+
ax.set_xticks(generate_ticks(
|
|
124
|
+
x_min, x_max, num_ticks=style.x_ticks))
|
|
125
|
+
|
|
126
|
+
ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
|
|
127
|
+
|
|
128
|
+
# Set title and labels
|
|
129
|
+
ax.set_xlabel(x_col)
|
|
130
|
+
y_max = data[display_metric].dropna().quantile(0.95)
|
|
131
|
+
|
|
132
|
+
ax.set_ylim(y_min, y_max)
|
|
133
|
+
ax.set_ylabel(display_metric)
|
|
134
|
+
if style.y_formatter is not None:
|
|
135
|
+
ax.yaxis.set_major_formatter(
|
|
136
|
+
DynamicFuncFormatter(style.y_formatter))
|
|
137
|
+
ax.set_yticks(generate_ticks(
|
|
138
|
+
y_min, y_max, num_ticks=style.y_ticks))
|
|
139
|
+
else:
|
|
140
|
+
ylabels = ['{:,.0f}%'.format(y) for y in ax.get_yticks()*100]
|
|
141
|
+
ax.yaxis.set_yticklabels(ylabels)
|
|
142
|
+
|
|
143
|
+
# Add legend and grid
|
|
144
|
+
ax.legend()
|
|
145
|
+
ax.grid(True)
|
|
146
|
+
|
|
147
|
+
return ax
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def plot_bars(ax: Axes,
|
|
151
|
+
data: pd.DataFrame,
|
|
152
|
+
x_col: str,
|
|
153
|
+
y_col: Union[str, List[str]],
|
|
154
|
+
style: Optional[StyleTemplate] = None,
|
|
155
|
+
fig_title: Optional[str] = None,
|
|
156
|
+
z_col: str = "browser",
|
|
157
|
+
n=5,
|
|
158
|
+
agg_func: str = 'sum') -> Axes:
|
|
159
|
+
|
|
160
|
+
# Validate inputs
|
|
161
|
+
|
|
162
|
+
if not isinstance(y_col, list):
|
|
163
|
+
y_col = [y_col, y_col] # Ensure y_col is a list
|
|
164
|
+
|
|
165
|
+
# Clear axis
|
|
166
|
+
ax.clear()
|
|
167
|
+
if style is None:
|
|
168
|
+
style = PIVOTBARS_STYLE_TEMPLATE
|
|
169
|
+
|
|
170
|
+
# Copying the da
|
|
171
|
+
df_plot = data.copy()
|
|
172
|
+
|
|
173
|
+
if df_plot.index.name != x_col:
|
|
174
|
+
df_plot.set_index(x_col, inplace=True)
|
|
175
|
+
print(df_plot.head())
|
|
176
|
+
|
|
177
|
+
# Set x-axis date format and locator
|
|
178
|
+
if style.x_formatter is not None:
|
|
179
|
+
if style.x_formatter == "year_month_formatter":
|
|
180
|
+
# Ensure the x_col is in datetime format
|
|
181
|
+
if not pd.api.types.is_datetime64_any_dtype(df_plot.index):
|
|
182
|
+
df_plot[x_col] = pd.to_datetime(df_plot[x_col])
|
|
183
|
+
df_plot.index = df_plot.index.to_pydatetime()
|
|
184
|
+
# Plot the data first
|
|
185
|
+
df_plot.plot(kind='bar', stacked=True, ax=ax)
|
|
186
|
+
|
|
187
|
+
ax.xaxis.set_major_locator(MonthLocator())
|
|
188
|
+
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m'))
|
|
189
|
+
|
|
190
|
+
# Convert the Pandas datetime64 objects to strings in 'Year-Month' format
|
|
191
|
+
formatted_dates = df_plot.index.strftime('%Y-%m')
|
|
192
|
+
# Find the index positions where the day is the first of the month
|
|
193
|
+
first_of_month_positions = [
|
|
194
|
+
i for i, date in enumerate(df_plot.index) if date.day == 1]
|
|
195
|
+
|
|
196
|
+
# Set x-ticks at the first of the month positions
|
|
197
|
+
ax.set_xticks(first_of_month_positions)
|
|
198
|
+
ax.set_xticklabels([formatted_dates[i]
|
|
199
|
+
for i in first_of_month_positions], rotation=45)
|
|
200
|
+
|
|
201
|
+
# Remove the blank space at the beginning
|
|
202
|
+
ax.set_xlim(left=0, right=len(df_plot.index) - 1)
|
|
203
|
+
|
|
204
|
+
else:
|
|
205
|
+
x_min = df_plot[x_col].min()
|
|
206
|
+
x_max = df_plot[x_col].max()
|
|
207
|
+
df_plot.plot(kind='bar', stacked=True, ax=ax)
|
|
208
|
+
ax.xaxis.set_major_formatter(
|
|
209
|
+
DynamicFuncFormatter(style.x_formatter))
|
|
210
|
+
ax.set_xticks(generate_ticks(
|
|
211
|
+
x_min, x_max, num_ticks=style.x_ticks))
|
|
212
|
+
else:
|
|
213
|
+
df_plot.plot(kind='bar', stacked=True, ax=ax)
|
|
214
|
+
|
|
215
|
+
# Apply custom y_formatter if provided
|
|
216
|
+
if style and style.y_formatter is not None:
|
|
217
|
+
ax.yaxis.set_major_formatter(DynamicFuncFormatter(style.y_formatter))
|
|
218
|
+
|
|
219
|
+
# Set title and labels
|
|
220
|
+
ax.set_title(fig_title if fig_title else "")
|
|
221
|
+
ax.set_xlabel(x_col)
|
|
222
|
+
ax.set_ylabel(y_col[0])
|
|
223
|
+
|
|
224
|
+
return ax
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import List, Optional, Dict, Callable
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import matplotlib.pyplot as plt
|
|
5
|
+
from matplotlib.axes import Axes
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def plot_table(ax: Axes,
|
|
9
|
+
data: pd.DataFrame,
|
|
10
|
+
mappings: Dict[str, Callable[[pd.Series], pd.Series]],
|
|
11
|
+
sort_column: str = "INDEX",
|
|
12
|
+
sort_ascending: bool = False,
|
|
13
|
+
num_rows: int = None,
|
|
14
|
+
fig_background_color: str = 'black',
|
|
15
|
+
fig_border: str = 'white',
|
|
16
|
+
font_name: str = 'Arial',
|
|
17
|
+
font_size: int = 10,
|
|
18
|
+
font_color="black",
|
|
19
|
+
fig_title: Optional[str] = None,
|
|
20
|
+
col_widths: Optional[List[float]] = None) -> Axes:
|
|
21
|
+
"""
|
|
22
|
+
Plots a table using Matplotlib in the provided axis.
|
|
23
|
+
|
|
24
|
+
Parameters:
|
|
25
|
+
ax (Axes): The Matplotlib axis to plot the table in.
|
|
26
|
+
data (pd.DataFrame): The pandas DataFrame containing the table data.
|
|
27
|
+
mappings (dict): Dictionary mapping column names to functions that transform the column data.
|
|
28
|
+
sort_column (str, optional): Column to sort the data by. Default is "INDEX".
|
|
29
|
+
sort_ascending (bool, optional): Whether to sort in ascending order. Default is False.
|
|
30
|
+
num_rows (int, optional): Number of rows to display. Default is 10.
|
|
31
|
+
fig_background_color (str, optional): Background color of the figure. Default is 'skyblue'.
|
|
32
|
+
fig_border (str, optional): Border color of the figure. Default is 'steelblue'.
|
|
33
|
+
font_name (str, optional): Font name for the table cells. Default is 'Arial'.
|
|
34
|
+
font_size (int, optional): Font size for the table cells. Default is 10.
|
|
35
|
+
col_widths (list, optional): List of relative column widths. Default is None.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
Axes: The Matplotlib axis with the plotted table.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
if num_rows is None:
|
|
42
|
+
num_rows = len(data.index)
|
|
43
|
+
cols = list(mappings.keys())
|
|
44
|
+
plot_data = data[cols].copy().sort_values(
|
|
45
|
+
by=sort_column, ascending=sort_ascending).head(num_rows).reset_index(drop=True)
|
|
46
|
+
|
|
47
|
+
for col, func in mappings.items():
|
|
48
|
+
plot_data[col] = plot_data[col].apply(func)
|
|
49
|
+
if fig_title is not None:
|
|
50
|
+
ax.text(0.5, 1.05,
|
|
51
|
+
fig_title,
|
|
52
|
+
va='top',
|
|
53
|
+
ha='center',
|
|
54
|
+
fontsize=font_size*1.5,
|
|
55
|
+
fontname=font_name,
|
|
56
|
+
color=font_color,
|
|
57
|
+
transform=ax.transAxes)
|
|
58
|
+
table = ax.table(cellText=plot_data.values, colLabels=plot_data.columns,
|
|
59
|
+
cellLoc='center', colWidths=col_widths, loc="center")
|
|
60
|
+
table.auto_set_font_size(False)
|
|
61
|
+
table.set_fontsize(font_size)
|
|
62
|
+
|
|
63
|
+
for key, cell in table.get_celld().items():
|
|
64
|
+
cell.get_text().set_fontname(font_name)
|
|
65
|
+
cell.get_text().set_color(font_color)
|
|
66
|
+
table.scale(1, 4)
|
|
67
|
+
table.auto_set_column_width(col=list(range(len(plot_data.columns))))
|
|
68
|
+
ax.axis('off')
|
|
69
|
+
|
|
70
|
+
return ax
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import List, Optional, Union
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
import matplotlib.pyplot as plt
|
|
8
|
+
from matplotlib.axes import Axes
|
|
9
|
+
|
|
10
|
+
from .Utils import (PIVOTLINES_STYLE_TEMPLATE, DynamicFuncFormatter,
|
|
11
|
+
StyleTemplate, generate_ticks)
|
|
12
|
+
|
|
13
|
+
# region TimeSeries
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def plot_timeseries(ax: Axes,
|
|
17
|
+
data: pd.DataFrame,
|
|
18
|
+
x_col: str,
|
|
19
|
+
y_col: Union[str, List[str]],
|
|
20
|
+
fig_title: Optional[str] = None,
|
|
21
|
+
style: Optional[StyleTemplate] = None,
|
|
22
|
+
|
|
23
|
+
rolling_days: int = 30,
|
|
24
|
+
highlight: Optional[List[datetime]] = None,
|
|
25
|
+
**kwargs) -> Axes:
|
|
26
|
+
"""
|
|
27
|
+
Plots a time series with the actual data, rolling mean, and standard deviation of multiple metrics.
|
|
28
|
+
Highlights the specified dates and the dates with the 5 highest cumulative sum of residuals
|
|
29
|
+
for each metric, ensuring each date is at least 10% of the total timeframe apart from the others.
|
|
30
|
+
|
|
31
|
+
Parameters:
|
|
32
|
+
data (pd.DataFrame): Data to plot. Must have a datetime index.
|
|
33
|
+
metrics (List[str]): List of column names in 'data' to plot.
|
|
34
|
+
rolling_days (int, optional): Window size for the rolling mean and standard deviation. Default is 30.
|
|
35
|
+
title (str, optional): Title for the plot. If None, no title is set. Default is None.
|
|
36
|
+
highlight (List[datetime], optional): List of dates to highlight. If None, highlights the entire timeframe.
|
|
37
|
+
Default is None.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
plt.Figure: The created matplotlib Figure object.
|
|
41
|
+
"""
|
|
42
|
+
if type(y_col) == str:
|
|
43
|
+
y_col = [y_col]
|
|
44
|
+
# Clear the axis before plotting
|
|
45
|
+
ax.clear()
|
|
46
|
+
if fig_title is not None:
|
|
47
|
+
ax.set_title(fig_title)
|
|
48
|
+
if style is None:
|
|
49
|
+
style = PIVOTLINES_STYLE_TEMPLATE
|
|
50
|
+
ax.figure.set_facecolor(style.fig_background_color)
|
|
51
|
+
ax.figure.set_edgecolor(style.fig_border)
|
|
52
|
+
if x_col in data.columns:
|
|
53
|
+
data[x_col] = pd.to_datetime(data[x_col])
|
|
54
|
+
data.sort_values(by=x_col)
|
|
55
|
+
data.set_index(x_col, inplace=True)
|
|
56
|
+
total_days = (data.index.max() - data.index.min()).days
|
|
57
|
+
min_interval = total_days * 0.1
|
|
58
|
+
|
|
59
|
+
for metric in y_col:
|
|
60
|
+
# Compute rolling mean, standard deviation, residuals, and cusum for each metric
|
|
61
|
+
rolling_mean = data[metric].rolling(window=rolling_days).mean()
|
|
62
|
+
rolling_std = data[metric].rolling(window=rolling_days).std()
|
|
63
|
+
residuals = data[metric] - rolling_mean
|
|
64
|
+
residuals.dropna(inplace=True)
|
|
65
|
+
cusum = np.cumsum(residuals)
|
|
66
|
+
# Prepare dates to highlight
|
|
67
|
+
if highlight is None:
|
|
68
|
+
highlight = [data.index.min(), data.index.max()]
|
|
69
|
+
z_scores = (data[metric] - rolling_mean) / rolling_std
|
|
70
|
+
z_scores.dropna(inplace=True)
|
|
71
|
+
Q1 = z_scores.quantile(0.25)
|
|
72
|
+
Q3 = z_scores.quantile(0.75)
|
|
73
|
+
IQR = Q3 - Q1
|
|
74
|
+
z_threshold = Q3 + 1.5 * IQR
|
|
75
|
+
# Find dates with z-scores above the threshold
|
|
76
|
+
outlier_dates = z_scores[z_scores > z_threshold].index.tolist()
|
|
77
|
+
|
|
78
|
+
sorted_z_scores = z_scores.sort_values(ascending=False)
|
|
79
|
+
top_5_z_dates = []
|
|
80
|
+
for date, value in sorted_z_scores.items():
|
|
81
|
+
if value >= z_threshold:
|
|
82
|
+
if all(abs((date - d).days) >= min_interval for d in top_5_z_dates):
|
|
83
|
+
top_5_z_dates.append(date)
|
|
84
|
+
if len(top_5_z_dates) >= 5:
|
|
85
|
+
break
|
|
86
|
+
highlight += top_5_z_dates
|
|
87
|
+
else:
|
|
88
|
+
highlight.sort()
|
|
89
|
+
|
|
90
|
+
min_date = data.index.min()
|
|
91
|
+
if min_date < highlight[0]:
|
|
92
|
+
highlight.insert(0, min_date)
|
|
93
|
+
|
|
94
|
+
max_date = data.index.max()
|
|
95
|
+
if max_date < highlight[len(highlight)-1]:
|
|
96
|
+
highlight.append(max_date)
|
|
97
|
+
|
|
98
|
+
# Plot the metric, its rolling mean, and standard deviation
|
|
99
|
+
# Get the line object to extract the color
|
|
100
|
+
line, = ax.plot(data[metric], label=metric)
|
|
101
|
+
ax.plot(rolling_mean, color=line.get_color(), linewidth=line.get_linewidth()
|
|
102
|
+
* 3, label='_nolegend_') # Use the same color for the rolling mean
|
|
103
|
+
ax.fill_between(rolling_std.index,
|
|
104
|
+
rolling_mean - rolling_std,
|
|
105
|
+
rolling_mean + rolling_std,
|
|
106
|
+
alpha=0.2)
|
|
107
|
+
|
|
108
|
+
# Sort and deduplicate the highlight dates
|
|
109
|
+
highlight = sorted(set(highlight))
|
|
110
|
+
|
|
111
|
+
# Calculate mean of each metric between each pair of consecutive highlight dates
|
|
112
|
+
for i in range(len(highlight) - 1):
|
|
113
|
+
start_date = highlight[i]
|
|
114
|
+
end_date = highlight[i+1]
|
|
115
|
+
for metric in y_col:
|
|
116
|
+
metric_mean = data.loc[start_date:end_date, metric].mean()
|
|
117
|
+
ax.hlines(y=metric_mean, xmin=start_date, xmax=end_date,
|
|
118
|
+
linestyle='--', color=style.font_color, alpha=0.5)
|
|
119
|
+
# ax.text(start_date, metric_mean, start_date.strftime('%Y-%m-%d'),
|
|
120
|
+
# va='center', ha='right', backgroundcolor='w')
|
|
121
|
+
|
|
122
|
+
# Add vertical lines for highlight dates
|
|
123
|
+
ax.axvline(x=start_date, color=style.font_color, linestyle='--')
|
|
124
|
+
|
|
125
|
+
ax.xaxis.set_major_formatter(DynamicFuncFormatter(style.x_formatter))
|
|
126
|
+
ax.xaxis.set_major_locator(plt.matplotlib.dates.MonthLocator())
|
|
127
|
+
ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
|
|
128
|
+
if style.y_formatter is None:
|
|
129
|
+
ylabels = ['{:,.0f}%'.format(y) for y in ax.get_yticks()*100]
|
|
130
|
+
ax.set_yticklabels(ylabels)
|
|
131
|
+
else:
|
|
132
|
+
ax.yaxis.set_major_formatter(DynamicFuncFormatter(style.y_formatter))
|
|
133
|
+
if style.legend:
|
|
134
|
+
ax.legend(loc='best')
|
|
135
|
+
return ax
|
|
136
|
+
# endregion
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
|
|
4
|
+
from typing import List, Optional, Union
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
from matplotlib.axes import Axes
|
|
11
|
+
from matplotlib.dates import num2date
|
|
12
|
+
from matplotlib.ticker import FuncFormatter
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# region Style
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class StyleTemplate:
|
|
20
|
+
fig_background_color: str = 'skyblue'
|
|
21
|
+
fig_border: str = 'steelblue'
|
|
22
|
+
font_name: str = 'Arial'
|
|
23
|
+
font_size: int = 10
|
|
24
|
+
font_color: str = 'black'
|
|
25
|
+
palette: str = 'rocket'
|
|
26
|
+
legend: bool = True
|
|
27
|
+
x_formatter: Optional[str] = None
|
|
28
|
+
x_ticks: int = 10
|
|
29
|
+
y_formatter: Optional[str] = None
|
|
30
|
+
y_ticks: int = 5
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
KILO_STYLE_TEMPLATE = StyleTemplate(
|
|
34
|
+
fig_background_color='white',
|
|
35
|
+
fig_border='lightgrey',
|
|
36
|
+
palette='viridis',
|
|
37
|
+
y_formatter="kilo_formatter"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
PERCENT_STYLE_TEMPLATE = StyleTemplate(
|
|
41
|
+
fig_background_color='white',
|
|
42
|
+
fig_border='lightgrey',
|
|
43
|
+
palette='viridis',
|
|
44
|
+
y_formatter="percent_formatter"
|
|
45
|
+
)
|
|
46
|
+
BUBBLE_STYLE_TEMPLATE = StyleTemplate(
|
|
47
|
+
fig_background_color='white',
|
|
48
|
+
fig_border='lightgrey',
|
|
49
|
+
palette='rocket',
|
|
50
|
+
x_formatter="integer_formatter",
|
|
51
|
+
y_formatter="percent_formatter",
|
|
52
|
+
y_ticks=5
|
|
53
|
+
)
|
|
54
|
+
TIMESERIES_STYLE_TEMPLATE = StyleTemplate(
|
|
55
|
+
fig_background_color='white',
|
|
56
|
+
fig_border='lightgrey',
|
|
57
|
+
palette='viridis',
|
|
58
|
+
x_formatter="year_month_formatter",
|
|
59
|
+
y_formatter="percent_formatter"
|
|
60
|
+
)
|
|
61
|
+
PIVOTLINES_STYLE_TEMPLATE = StyleTemplate(
|
|
62
|
+
fig_background_color='white',
|
|
63
|
+
fig_border='lightgrey',
|
|
64
|
+
palette='viridis',
|
|
65
|
+
x_formatter="year_month_formatter",
|
|
66
|
+
y_formatter="percent_formatter"
|
|
67
|
+
)
|
|
68
|
+
PIVOTBARS_STYLE_TEMPLATE = StyleTemplate(
|
|
69
|
+
fig_background_color='white',
|
|
70
|
+
fig_border='lightgrey',
|
|
71
|
+
palette='viridis',
|
|
72
|
+
x_formatter="year_month_formatter",
|
|
73
|
+
y_formatter="kilo_formatter"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
DARK_STYLE_TEMPLATE = StyleTemplate(
|
|
78
|
+
fig_background_color='black',
|
|
79
|
+
fig_border='darkgrey',
|
|
80
|
+
font_color='white',
|
|
81
|
+
palette='magma'
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def generate_ticks(min_val, max_val, num_ticks="10"):
|
|
86
|
+
# Identify the type of the input
|
|
87
|
+
try:
|
|
88
|
+
min_val = float(min_val)
|
|
89
|
+
max_val = float(max_val)
|
|
90
|
+
is_date = False
|
|
91
|
+
except ValueError:
|
|
92
|
+
is_date = True
|
|
93
|
+
|
|
94
|
+
# Convert string inputs to appropriate numerical or date types
|
|
95
|
+
num_ticks = int(num_ticks)
|
|
96
|
+
|
|
97
|
+
if is_date:
|
|
98
|
+
min_val = pd.Timestamp(min_val).to_datetime64()
|
|
99
|
+
max_val = pd.Timestamp(max_val).to_datetime64()
|
|
100
|
+
data_range = (max_val - min_val).astype('timedelta64[D]').astype(int)
|
|
101
|
+
else:
|
|
102
|
+
data_range = max_val - min_val
|
|
103
|
+
|
|
104
|
+
# Calculate a nice step size
|
|
105
|
+
step_size = data_range / (num_ticks - 1)
|
|
106
|
+
|
|
107
|
+
# If date, convert back to datetime
|
|
108
|
+
if is_date:
|
|
109
|
+
ticks = pd.date_range(
|
|
110
|
+
start=min_val, periods=num_ticks, freq=f"{step_size}D")
|
|
111
|
+
else:
|
|
112
|
+
# Round the step size to a "nice" number
|
|
113
|
+
exponent = np.floor(np.log10(step_size))
|
|
114
|
+
fraction = step_size / 10**exponent
|
|
115
|
+
nice_fraction = round(fraction)
|
|
116
|
+
|
|
117
|
+
# Create nice step size
|
|
118
|
+
nice_step = nice_fraction * 10**exponent
|
|
119
|
+
|
|
120
|
+
# Generate the tick marks based on the nice step size
|
|
121
|
+
ticks = np.arange(min_val, max_val + nice_step, nice_step)
|
|
122
|
+
|
|
123
|
+
return ticks
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class DynamicFuncFormatter(FuncFormatter):
|
|
127
|
+
def __init__(self, func_name):
|
|
128
|
+
self.func = globals()[func_name]
|
|
129
|
+
super().__init__(self.func)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def kilo_formatter(value, pos):
|
|
133
|
+
# Format values for better readability
|
|
134
|
+
if value >= 1000000:
|
|
135
|
+
return f"{value/1000000:.2f}M"
|
|
136
|
+
elif value >= 10000:
|
|
137
|
+
return f"{value/110000:.2f}K"
|
|
138
|
+
else:
|
|
139
|
+
return str(value)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def percent_formatter(x, pos):
|
|
143
|
+
return f"{x * 100:.0f}%"
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def integer_formatter(x, pos):
|
|
147
|
+
return f"{x:.0f}"
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def year_month_formatter(x, pos):
|
|
151
|
+
return num2date(x).strftime('%Y-%m')
|
|
152
|
+
# endregion
|
|
153
|
+
|
|
154
|
+
# region Wrapper
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def plot_func(plot_type, ax: Axes,
|
|
158
|
+
data: pd.DataFrame,
|
|
159
|
+
x_col: str,
|
|
160
|
+
y_col: Union[str, List[str]],
|
|
161
|
+
fig_title: Optional[str] = None,
|
|
162
|
+
style: Optional[StyleTemplate] = None,
|
|
163
|
+
legend: bool = False,
|
|
164
|
+
**kwargs):
|
|
165
|
+
from .Bubble import plot_bubble
|
|
166
|
+
from .Pivot import plot_bars, plot_lines
|
|
167
|
+
from .TimeSeries import plot_timeseries
|
|
168
|
+
if plot_type == "bubble":
|
|
169
|
+
plot_bubble(ax=ax,
|
|
170
|
+
data=data,
|
|
171
|
+
x_col=x_col,
|
|
172
|
+
y_col=y_col,
|
|
173
|
+
fig_title=fig_title,
|
|
174
|
+
style=style,
|
|
175
|
+
legend=legend,
|
|
176
|
+
**kwargs)
|
|
177
|
+
elif plot_type == "timeseries":
|
|
178
|
+
plot_timeseries(ax=ax,
|
|
179
|
+
data=data,
|
|
180
|
+
x_col=x_col,
|
|
181
|
+
y_col=y_col,
|
|
182
|
+
fig_title=fig_title,
|
|
183
|
+
style=style,
|
|
184
|
+
legend=legend,
|
|
185
|
+
**kwargs)
|
|
186
|
+
elif plot_type == "lines":
|
|
187
|
+
plot_lines(ax=ax,
|
|
188
|
+
data=data,
|
|
189
|
+
x_col=x_col,
|
|
190
|
+
y_col=y_col,
|
|
191
|
+
fig_title=fig_title,
|
|
192
|
+
style=style,
|
|
193
|
+
**kwargs)
|
|
194
|
+
elif plot_type == "bars":
|
|
195
|
+
plot_bars(ax=ax,
|
|
196
|
+
data=data,
|
|
197
|
+
x_col=x_col,
|
|
198
|
+
y_col=y_col,
|
|
199
|
+
fig_title=fig_title,
|
|
200
|
+
style=style,
|
|
201
|
+
**kwargs)
|
|
202
|
+
else:
|
|
203
|
+
raise ValueError(f"Unknown plot type: {plot_type}")
|
|
204
|
+
# endregion
|
|
File without changes
|
|
@@ -1,13 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: MatplotLibAPI
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2.0.2
|
|
4
4
|
Description-Content-Type: text/markdown
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Requires-Dist: pandas
|
|
7
7
|
Requires-Dist: matplotlib
|
|
8
8
|
Requires-Dist: seaborn
|
|
9
|
-
Requires-Dist: json
|
|
10
|
-
Requires-Dist: logging
|
|
11
9
|
Requires-Dist: scikit-learn
|
|
12
10
|
|
|
13
11
|
# MatplotLibAPI
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
setup.py
|
|
5
|
+
MatplotLibAPI/Bubble.py
|
|
6
|
+
MatplotLibAPI/Composite.py
|
|
7
|
+
MatplotLibAPI/Network.py
|
|
8
|
+
MatplotLibAPI/Pivot.py
|
|
9
|
+
MatplotLibAPI/Table.py
|
|
10
|
+
MatplotLibAPI/TimeSeries.py
|
|
11
|
+
MatplotLibAPI/Utils.py
|
|
12
|
+
MatplotLibAPI/__init__.py
|
|
13
|
+
MatplotLibAPI.egg-info/PKG-INFO
|
|
14
|
+
MatplotLibAPI.egg-info/SOURCES.txt
|
|
15
|
+
MatplotLibAPI.egg-info/dependency_links.txt
|
|
16
|
+
MatplotLibAPI.egg-info/requires.txt
|
|
17
|
+
MatplotLibAPI.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
MatplotLibAPI
|
|
@@ -1,13 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: MatplotLibAPI
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2.0.2
|
|
4
4
|
Description-Content-Type: text/markdown
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Requires-Dist: pandas
|
|
7
7
|
Requires-Dist: matplotlib
|
|
8
8
|
Requires-Dist: seaborn
|
|
9
|
-
Requires-Dist: json
|
|
10
|
-
Requires-Dist: logging
|
|
11
9
|
Requires-Dist: scikit-learn
|
|
12
10
|
|
|
13
11
|
# MatplotLibAPI
|
|
@@ -2,14 +2,12 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name='MatplotLibAPI',
|
|
5
|
-
version='
|
|
5
|
+
version='v2.0.2',
|
|
6
6
|
packages=find_packages(),
|
|
7
7
|
install_requires=[
|
|
8
8
|
"pandas",
|
|
9
9
|
"matplotlib",
|
|
10
10
|
"seaborn",
|
|
11
|
-
"json",
|
|
12
|
-
"logging",
|
|
13
11
|
"scikit-learn"
|
|
14
12
|
],
|
|
15
13
|
long_description=open('README.md').read(),
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|