MatplotLibAPI 3.2.13__py3-none-any.whl → 3.2.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- MatplotLibAPI/Area.py +76 -0
- MatplotLibAPI/Bar.py +79 -0
- MatplotLibAPI/BoxViolin.py +69 -0
- MatplotLibAPI/Bubble.py +413 -136
- MatplotLibAPI/Composite.py +139 -140
- MatplotLibAPI/Heatmap.py +113 -0
- MatplotLibAPI/Histogram.py +69 -0
- MatplotLibAPI/Network.py +818 -338
- MatplotLibAPI/Pie.py +66 -0
- MatplotLibAPI/Pivot.py +115 -194
- MatplotLibAPI/Sankey.py +39 -0
- MatplotLibAPI/StyleTemplate.py +242 -296
- MatplotLibAPI/Sunburst.py +83 -0
- MatplotLibAPI/Table.py +185 -56
- MatplotLibAPI/Timeserie.py +292 -78
- MatplotLibAPI/Treemap.py +133 -75
- MatplotLibAPI/Waffle.py +82 -0
- MatplotLibAPI/Wordcloud.py +314 -0
- MatplotLibAPI/__init__.py +76 -325
- MatplotLibAPI/_visualization_utils.py +38 -0
- MatplotLibAPI/accessor.py +1647 -0
- matplotlibapi-3.2.15.dist-info/METADATA +269 -0
- matplotlibapi-3.2.15.dist-info/RECORD +25 -0
- {matplotlibapi-3.2.13.dist-info → matplotlibapi-3.2.15.dist-info}/WHEEL +1 -2
- matplotlibapi-3.2.13.dist-info/METADATA +0 -24
- matplotlibapi-3.2.13.dist-info/RECORD +0 -14
- matplotlibapi-3.2.13.dist-info/top_level.txt +0 -1
- {matplotlibapi-3.2.13.dist-info → matplotlibapi-3.2.15.dist-info}/licenses/LICENSE +0 -0
MatplotLibAPI/Waffle.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""Waffle chart helpers."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Optional, Tuple
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import seaborn as sns
|
|
7
|
+
from matplotlib.axes import Axes
|
|
8
|
+
from matplotlib.figure import Figure
|
|
9
|
+
from matplotlib.patches import Rectangle
|
|
10
|
+
import matplotlib.pyplot as plt
|
|
11
|
+
|
|
12
|
+
from .StyleTemplate import PIE_STYLE_TEMPLATE, StyleTemplate, validate_dataframe
|
|
13
|
+
from ._visualization_utils import _get_axis, _wrap_aplot
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def aplot_waffle(
|
|
17
|
+
pd_df: pd.DataFrame,
|
|
18
|
+
category: str,
|
|
19
|
+
value: str,
|
|
20
|
+
rows: int = 10,
|
|
21
|
+
title: Optional[str] = None,
|
|
22
|
+
style: StyleTemplate = PIE_STYLE_TEMPLATE,
|
|
23
|
+
ax: Optional[Axes] = None,
|
|
24
|
+
**kwargs: Any,
|
|
25
|
+
) -> Axes:
|
|
26
|
+
"""Plot a simple waffle chart as a grid of proportional squares."""
|
|
27
|
+
validate_dataframe(pd_df, cols=[category, value])
|
|
28
|
+
plot_ax = _get_axis(ax)
|
|
29
|
+
total = float(pd_df[value].sum())
|
|
30
|
+
squares = rows * rows
|
|
31
|
+
colors = sns.color_palette(style.palette, n_colors=len(pd_df))
|
|
32
|
+
plot_ax.set_aspect("equal")
|
|
33
|
+
|
|
34
|
+
start = 0
|
|
35
|
+
for idx, (label, val) in enumerate(zip(pd_df[category], pd_df[value])):
|
|
36
|
+
count = int(round((val / total) * squares))
|
|
37
|
+
for square in range(start, min(start + count, squares)):
|
|
38
|
+
row = square // rows
|
|
39
|
+
col = square % rows
|
|
40
|
+
plot_ax.add_patch(
|
|
41
|
+
Rectangle(
|
|
42
|
+
(col, rows - row),
|
|
43
|
+
1,
|
|
44
|
+
1,
|
|
45
|
+
facecolor=colors[idx],
|
|
46
|
+
edgecolor=style.background_color,
|
|
47
|
+
)
|
|
48
|
+
)
|
|
49
|
+
start += count
|
|
50
|
+
|
|
51
|
+
plot_ax.set_xlim(0, rows)
|
|
52
|
+
plot_ax.set_ylim(0, rows + 1)
|
|
53
|
+
plot_ax.axis("off")
|
|
54
|
+
if title:
|
|
55
|
+
plot_ax.set_title(title)
|
|
56
|
+
legend_handles = [Rectangle((0, 0), 1, 1, color=color) for color in colors]
|
|
57
|
+
plot_ax.legend(
|
|
58
|
+
legend_handles, pd_df[category], loc="upper center", ncol=3, frameon=False
|
|
59
|
+
)
|
|
60
|
+
return plot_ax
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def fplot_waffle(
|
|
64
|
+
pd_df: pd.DataFrame,
|
|
65
|
+
category: str,
|
|
66
|
+
value: str,
|
|
67
|
+
rows: int = 10,
|
|
68
|
+
title: Optional[str] = None,
|
|
69
|
+
style: StyleTemplate = PIE_STYLE_TEMPLATE,
|
|
70
|
+
figsize: Tuple[float, float] = (8, 8),
|
|
71
|
+
) -> Figure:
|
|
72
|
+
"""Plot waffle charts on a new figure."""
|
|
73
|
+
return _wrap_aplot(
|
|
74
|
+
aplot_waffle,
|
|
75
|
+
pd_df=pd_df,
|
|
76
|
+
figsize=figsize,
|
|
77
|
+
category=category,
|
|
78
|
+
value=value,
|
|
79
|
+
rows=rows,
|
|
80
|
+
title=title,
|
|
81
|
+
style=style,
|
|
82
|
+
)
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
"""Word cloud plotting utilities."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Iterable, Optional, Sequence, Tuple, cast
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
|
+
import matplotlib.pyplot as plt
|
|
10
|
+
from matplotlib import colormaps
|
|
11
|
+
from matplotlib.axes import Axes
|
|
12
|
+
from matplotlib.figure import Figure
|
|
13
|
+
|
|
14
|
+
from .StyleTemplate import (
|
|
15
|
+
FIG_SIZE,
|
|
16
|
+
MAX_RESULTS,
|
|
17
|
+
StyleTemplate,
|
|
18
|
+
string_formatter,
|
|
19
|
+
validate_dataframe,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
WORDCLOUD_STYLE_TEMPLATE = StyleTemplate(
|
|
23
|
+
background_color="black", font_color="white", palette="plasma"
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _normalize_weights(weights: Sequence[float], base_size: int) -> np.ndarray:
|
|
28
|
+
"""Normalize weights to a range of font sizes.
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
weights : Sequence[float]
|
|
33
|
+
Sequence of weights representing word importance.
|
|
34
|
+
base_size : int
|
|
35
|
+
Base font size used as the lower bound for scaling.
|
|
36
|
+
|
|
37
|
+
Returns
|
|
38
|
+
-------
|
|
39
|
+
numpy.ndarray
|
|
40
|
+
Array of font sizes corresponding to the provided weights.
|
|
41
|
+
"""
|
|
42
|
+
numeric_weights = np.asarray(weights, dtype=float)
|
|
43
|
+
if numeric_weights.size == 0:
|
|
44
|
+
return np.array([], dtype=float)
|
|
45
|
+
min_weight = numeric_weights.min()
|
|
46
|
+
max_weight = numeric_weights.max()
|
|
47
|
+
if min_weight == max_weight:
|
|
48
|
+
return np.full_like(numeric_weights, fill_value=base_size, dtype=float)
|
|
49
|
+
|
|
50
|
+
min_size, max_size = base_size, base_size * 4
|
|
51
|
+
return np.interp(numeric_weights, (min_weight, max_weight), (min_size, max_size))
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _filter_stopwords(
|
|
55
|
+
words: Iterable[str], stopwords: Optional[Iterable[str]]
|
|
56
|
+
) -> np.ndarray:
|
|
57
|
+
"""Remove stopwords from a sequence of words.
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
words : Iterable[str]
|
|
62
|
+
Words to filter.
|
|
63
|
+
stopwords : Iterable[str], optional
|
|
64
|
+
Collection of stopwords to exclude. Defaults to ``None``.
|
|
65
|
+
|
|
66
|
+
Returns
|
|
67
|
+
-------
|
|
68
|
+
numpy.ndarray
|
|
69
|
+
Filtered words.
|
|
70
|
+
"""
|
|
71
|
+
if stopwords is None:
|
|
72
|
+
return np.array(list(words))
|
|
73
|
+
|
|
74
|
+
stop_set = {word.lower() for word in stopwords}
|
|
75
|
+
return np.array([word for word in words if word.lower() not in stop_set])
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _prepare_word_frequencies(
|
|
79
|
+
pd_df: pd.DataFrame,
|
|
80
|
+
text_column: str,
|
|
81
|
+
weight_column: Optional[str],
|
|
82
|
+
max_words: int,
|
|
83
|
+
stopwords: Optional[Iterable[str]],
|
|
84
|
+
) -> Tuple[list[str], list[float]]:
|
|
85
|
+
"""Aggregate and filter word frequencies.
|
|
86
|
+
|
|
87
|
+
Parameters
|
|
88
|
+
----------
|
|
89
|
+
pd_df : pandas.DataFrame
|
|
90
|
+
Input DataFrame containing word data.
|
|
91
|
+
text_column : str
|
|
92
|
+
Column containing words or phrases.
|
|
93
|
+
weight_column : str, optional
|
|
94
|
+
Column containing numeric weights. Defaults to ``None``.
|
|
95
|
+
max_words : int
|
|
96
|
+
Maximum number of words to include.
|
|
97
|
+
stopwords : Iterable[str], optional
|
|
98
|
+
Words to exclude from the visualization. Defaults to ``None``.
|
|
99
|
+
|
|
100
|
+
Returns
|
|
101
|
+
-------
|
|
102
|
+
tuple of list
|
|
103
|
+
Lists of filtered words and their corresponding weights.
|
|
104
|
+
|
|
105
|
+
Raises
|
|
106
|
+
------
|
|
107
|
+
AttributeError
|
|
108
|
+
If required columns are missing from the DataFrame.
|
|
109
|
+
"""
|
|
110
|
+
validate_dataframe(pd_df, cols=[text_column], sort_by=weight_column)
|
|
111
|
+
|
|
112
|
+
if weight_column is None:
|
|
113
|
+
freq_series: pd.Series = pd_df[text_column].value_counts()
|
|
114
|
+
else:
|
|
115
|
+
weight_col = cast(str, weight_column)
|
|
116
|
+
freq_series = cast(pd.Series, pd_df.groupby(text_column)[weight_col].sum())
|
|
117
|
+
freq_series = freq_series.sort_values(ascending=False)
|
|
118
|
+
|
|
119
|
+
words = freq_series.index.to_numpy()
|
|
120
|
+
weights = freq_series.to_numpy(dtype=float)
|
|
121
|
+
|
|
122
|
+
words = _filter_stopwords(words, stopwords)
|
|
123
|
+
mask = np.isin(freq_series.index, words)
|
|
124
|
+
weights = weights[mask]
|
|
125
|
+
|
|
126
|
+
sorted_indices = np.argsort(weights)[::-1]
|
|
127
|
+
words = words[sorted_indices][:max_words].tolist()
|
|
128
|
+
weights = weights[sorted_indices][:max_words].tolist()
|
|
129
|
+
|
|
130
|
+
return words, weights
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _plot_words(
|
|
134
|
+
ax: Axes,
|
|
135
|
+
words: Sequence[str],
|
|
136
|
+
weights: Sequence[float],
|
|
137
|
+
style: StyleTemplate,
|
|
138
|
+
title: Optional[str],
|
|
139
|
+
random_state: Optional[int],
|
|
140
|
+
) -> Axes:
|
|
141
|
+
"""Render words on the provided axes with sizes proportional to weights.
|
|
142
|
+
|
|
143
|
+
Parameters
|
|
144
|
+
----------
|
|
145
|
+
ax : matplotlib.axes.Axes
|
|
146
|
+
Axes on which to draw.
|
|
147
|
+
words : Sequence[str]
|
|
148
|
+
Words to render.
|
|
149
|
+
weights : Sequence[float]
|
|
150
|
+
Corresponding weights for sizing.
|
|
151
|
+
style : StyleTemplate
|
|
152
|
+
Style configuration for the plot.
|
|
153
|
+
title : str, optional
|
|
154
|
+
Title of the plot. Defaults to ``None``.
|
|
155
|
+
random_state : int, optional
|
|
156
|
+
Seed for reproducible placement. Defaults to ``None``.
|
|
157
|
+
|
|
158
|
+
Returns
|
|
159
|
+
-------
|
|
160
|
+
matplotlib.axes.Axes
|
|
161
|
+
Axes containing the rendered word cloud.
|
|
162
|
+
"""
|
|
163
|
+
rng = np.random.default_rng(seed=random_state)
|
|
164
|
+
font_sizes = _normalize_weights(weights, base_size=style.font_size)
|
|
165
|
+
cmap = colormaps.get_cmap(style.palette)
|
|
166
|
+
|
|
167
|
+
ax.set_facecolor(style.background_color)
|
|
168
|
+
ax.axis("off")
|
|
169
|
+
|
|
170
|
+
x_positions = rng.uniform(0.05, 0.95, size=len(words))
|
|
171
|
+
y_positions = rng.uniform(0.05, 0.95, size=len(words))
|
|
172
|
+
|
|
173
|
+
for idx, (word, weight) in enumerate(zip(words, weights)):
|
|
174
|
+
size = font_sizes[idx]
|
|
175
|
+
color = cmap(rng.random())
|
|
176
|
+
ax.text(
|
|
177
|
+
x_positions[idx],
|
|
178
|
+
y_positions[idx],
|
|
179
|
+
string_formatter(word),
|
|
180
|
+
ha="center",
|
|
181
|
+
va="center",
|
|
182
|
+
fontsize=size,
|
|
183
|
+
color=color,
|
|
184
|
+
transform=ax.transAxes,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
if title:
|
|
188
|
+
ax.set_title(title, color=style.font_color, fontsize=style.font_size * 1.5)
|
|
189
|
+
return ax
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def aplot_wordcloud(
|
|
193
|
+
pd_df: pd.DataFrame,
|
|
194
|
+
text_column: str,
|
|
195
|
+
weight_column: Optional[str] = None,
|
|
196
|
+
title: Optional[str] = None,
|
|
197
|
+
style: StyleTemplate = WORDCLOUD_STYLE_TEMPLATE,
|
|
198
|
+
max_words: int = MAX_RESULTS,
|
|
199
|
+
stopwords: Optional[Iterable[str]] = None,
|
|
200
|
+
random_state: Optional[int] = None,
|
|
201
|
+
ax: Optional[Axes] = None,
|
|
202
|
+
) -> Axes:
|
|
203
|
+
"""Plot a word cloud on the provided axes.
|
|
204
|
+
|
|
205
|
+
Parameters
|
|
206
|
+
----------
|
|
207
|
+
pd_df : pandas.DataFrame
|
|
208
|
+
DataFrame containing the words to visualize.
|
|
209
|
+
text_column : str
|
|
210
|
+
Column containing words or phrases.
|
|
211
|
+
weight_column : str, optional
|
|
212
|
+
Column containing numeric weights. Defaults to ``None`` for equal weights.
|
|
213
|
+
title : str, optional
|
|
214
|
+
Plot title. Defaults to ``None``.
|
|
215
|
+
style : StyleTemplate, optional
|
|
216
|
+
Styling options. Defaults to ``WORDCLOUD_STYLE_TEMPLATE``.
|
|
217
|
+
max_words : int, optional
|
|
218
|
+
Maximum number of words to display. Defaults to ``MAX_RESULTS``.
|
|
219
|
+
stopwords : Iterable[str], optional
|
|
220
|
+
Words to exclude from the visualization. Defaults to ``None``.
|
|
221
|
+
random_state : int, optional
|
|
222
|
+
Seed for word placement. Defaults to ``None``.
|
|
223
|
+
ax : matplotlib.axes.Axes, optional
|
|
224
|
+
Axes to draw on. Defaults to ``None`` which uses the current axes.
|
|
225
|
+
|
|
226
|
+
Returns
|
|
227
|
+
-------
|
|
228
|
+
matplotlib.axes.Axes
|
|
229
|
+
Axes containing the rendered word cloud.
|
|
230
|
+
|
|
231
|
+
Raises
|
|
232
|
+
------
|
|
233
|
+
AttributeError
|
|
234
|
+
If required columns are missing from the DataFrame.
|
|
235
|
+
"""
|
|
236
|
+
if ax is None:
|
|
237
|
+
ax = cast(Axes, plt.gca())
|
|
238
|
+
|
|
239
|
+
words, weights = _prepare_word_frequencies(
|
|
240
|
+
pd_df=pd_df,
|
|
241
|
+
text_column=text_column,
|
|
242
|
+
weight_column=weight_column,
|
|
243
|
+
max_words=max_words,
|
|
244
|
+
stopwords=stopwords,
|
|
245
|
+
)
|
|
246
|
+
return _plot_words(
|
|
247
|
+
ax, words, weights, style=style, title=title, random_state=random_state
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def fplot_wordcloud(
|
|
252
|
+
pd_df: pd.DataFrame,
|
|
253
|
+
text_column: str,
|
|
254
|
+
weight_column: Optional[str] = None,
|
|
255
|
+
title: Optional[str] = None,
|
|
256
|
+
style: StyleTemplate = WORDCLOUD_STYLE_TEMPLATE,
|
|
257
|
+
max_words: int = MAX_RESULTS,
|
|
258
|
+
stopwords: Optional[Iterable[str]] = None,
|
|
259
|
+
random_state: Optional[int] = None,
|
|
260
|
+
figsize: Tuple[float, float] = FIG_SIZE,
|
|
261
|
+
) -> Figure:
|
|
262
|
+
"""Create a new figure with a word cloud.
|
|
263
|
+
|
|
264
|
+
Parameters
|
|
265
|
+
----------
|
|
266
|
+
pd_df : pandas.DataFrame
|
|
267
|
+
DataFrame containing the words to visualize.
|
|
268
|
+
text_column : str
|
|
269
|
+
Column containing words or phrases.
|
|
270
|
+
weight_column : str, optional
|
|
271
|
+
Column containing numeric weights. Defaults to ``None`` for equal weights.
|
|
272
|
+
title : str, optional
|
|
273
|
+
Plot title. Defaults to ``None``.
|
|
274
|
+
style : StyleTemplate, optional
|
|
275
|
+
Styling options. Defaults to ``WORDCLOUD_STYLE_TEMPLATE``.
|
|
276
|
+
max_words : int, optional
|
|
277
|
+
Maximum number of words to display. Defaults to ``MAX_RESULTS``.
|
|
278
|
+
stopwords : Iterable[str], optional
|
|
279
|
+
Words to exclude from the visualization. Defaults to ``None``.
|
|
280
|
+
random_state : int, optional
|
|
281
|
+
Seed for word placement. Defaults to ``None``.
|
|
282
|
+
figsize : tuple of float, optional
|
|
283
|
+
Figure size. Defaults to ``FIG_SIZE``.
|
|
284
|
+
|
|
285
|
+
Returns
|
|
286
|
+
-------
|
|
287
|
+
matplotlib.figure.Figure
|
|
288
|
+
Figure containing the rendered word cloud.
|
|
289
|
+
|
|
290
|
+
Raises
|
|
291
|
+
------
|
|
292
|
+
AttributeError
|
|
293
|
+
If required columns are missing from the DataFrame.
|
|
294
|
+
"""
|
|
295
|
+
fig_raw, ax_raw = plt.subplots(figsize=figsize)
|
|
296
|
+
fig = cast(Figure, fig_raw)
|
|
297
|
+
ax = cast(Axes, ax_raw)
|
|
298
|
+
|
|
299
|
+
_plot_words(
|
|
300
|
+
ax,
|
|
301
|
+
*_prepare_word_frequencies(
|
|
302
|
+
pd_df=pd_df,
|
|
303
|
+
text_column=text_column,
|
|
304
|
+
weight_column=weight_column,
|
|
305
|
+
max_words=max_words,
|
|
306
|
+
stopwords=stopwords,
|
|
307
|
+
),
|
|
308
|
+
style=style,
|
|
309
|
+
title=title,
|
|
310
|
+
random_state=random_state,
|
|
311
|
+
)
|
|
312
|
+
fig.patch.set_facecolor(style.background_color)
|
|
313
|
+
fig.tight_layout()
|
|
314
|
+
return fig
|