MatplotLibAPI 3.2.13__py3-none-any.whl → 3.2.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,82 @@
1
+ """Waffle chart helpers."""
2
+
3
+ from typing import Any, Optional, Tuple
4
+
5
+ import pandas as pd
6
+ import seaborn as sns
7
+ from matplotlib.axes import Axes
8
+ from matplotlib.figure import Figure
9
+ from matplotlib.patches import Rectangle
10
+ import matplotlib.pyplot as plt
11
+
12
+ from .StyleTemplate import PIE_STYLE_TEMPLATE, StyleTemplate, validate_dataframe
13
+ from ._visualization_utils import _get_axis, _wrap_aplot
14
+
15
+
16
+ def aplot_waffle(
17
+ pd_df: pd.DataFrame,
18
+ category: str,
19
+ value: str,
20
+ rows: int = 10,
21
+ title: Optional[str] = None,
22
+ style: StyleTemplate = PIE_STYLE_TEMPLATE,
23
+ ax: Optional[Axes] = None,
24
+ **kwargs: Any,
25
+ ) -> Axes:
26
+ """Plot a simple waffle chart as a grid of proportional squares."""
27
+ validate_dataframe(pd_df, cols=[category, value])
28
+ plot_ax = _get_axis(ax)
29
+ total = float(pd_df[value].sum())
30
+ squares = rows * rows
31
+ colors = sns.color_palette(style.palette, n_colors=len(pd_df))
32
+ plot_ax.set_aspect("equal")
33
+
34
+ start = 0
35
+ for idx, (label, val) in enumerate(zip(pd_df[category], pd_df[value])):
36
+ count = int(round((val / total) * squares))
37
+ for square in range(start, min(start + count, squares)):
38
+ row = square // rows
39
+ col = square % rows
40
+ plot_ax.add_patch(
41
+ Rectangle(
42
+ (col, rows - row),
43
+ 1,
44
+ 1,
45
+ facecolor=colors[idx],
46
+ edgecolor=style.background_color,
47
+ )
48
+ )
49
+ start += count
50
+
51
+ plot_ax.set_xlim(0, rows)
52
+ plot_ax.set_ylim(0, rows + 1)
53
+ plot_ax.axis("off")
54
+ if title:
55
+ plot_ax.set_title(title)
56
+ legend_handles = [Rectangle((0, 0), 1, 1, color=color) for color in colors]
57
+ plot_ax.legend(
58
+ legend_handles, pd_df[category], loc="upper center", ncol=3, frameon=False
59
+ )
60
+ return plot_ax
61
+
62
+
63
+ def fplot_waffle(
64
+ pd_df: pd.DataFrame,
65
+ category: str,
66
+ value: str,
67
+ rows: int = 10,
68
+ title: Optional[str] = None,
69
+ style: StyleTemplate = PIE_STYLE_TEMPLATE,
70
+ figsize: Tuple[float, float] = (8, 8),
71
+ ) -> Figure:
72
+ """Plot waffle charts on a new figure."""
73
+ return _wrap_aplot(
74
+ aplot_waffle,
75
+ pd_df=pd_df,
76
+ figsize=figsize,
77
+ category=category,
78
+ value=value,
79
+ rows=rows,
80
+ title=title,
81
+ style=style,
82
+ )
@@ -0,0 +1,314 @@
1
+ """Word cloud plotting utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Iterable, Optional, Sequence, Tuple, cast
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ import matplotlib.pyplot as plt
10
+ from matplotlib import colormaps
11
+ from matplotlib.axes import Axes
12
+ from matplotlib.figure import Figure
13
+
14
+ from .StyleTemplate import (
15
+ FIG_SIZE,
16
+ MAX_RESULTS,
17
+ StyleTemplate,
18
+ string_formatter,
19
+ validate_dataframe,
20
+ )
21
+
22
+ WORDCLOUD_STYLE_TEMPLATE = StyleTemplate(
23
+ background_color="black", font_color="white", palette="plasma"
24
+ )
25
+
26
+
27
+ def _normalize_weights(weights: Sequence[float], base_size: int) -> np.ndarray:
28
+ """Normalize weights to a range of font sizes.
29
+
30
+ Parameters
31
+ ----------
32
+ weights : Sequence[float]
33
+ Sequence of weights representing word importance.
34
+ base_size : int
35
+ Base font size used as the lower bound for scaling.
36
+
37
+ Returns
38
+ -------
39
+ numpy.ndarray
40
+ Array of font sizes corresponding to the provided weights.
41
+ """
42
+ numeric_weights = np.asarray(weights, dtype=float)
43
+ if numeric_weights.size == 0:
44
+ return np.array([], dtype=float)
45
+ min_weight = numeric_weights.min()
46
+ max_weight = numeric_weights.max()
47
+ if min_weight == max_weight:
48
+ return np.full_like(numeric_weights, fill_value=base_size, dtype=float)
49
+
50
+ min_size, max_size = base_size, base_size * 4
51
+ return np.interp(numeric_weights, (min_weight, max_weight), (min_size, max_size))
52
+
53
+
54
+ def _filter_stopwords(
55
+ words: Iterable[str], stopwords: Optional[Iterable[str]]
56
+ ) -> np.ndarray:
57
+ """Remove stopwords from a sequence of words.
58
+
59
+ Parameters
60
+ ----------
61
+ words : Iterable[str]
62
+ Words to filter.
63
+ stopwords : Iterable[str], optional
64
+ Collection of stopwords to exclude. Defaults to ``None``.
65
+
66
+ Returns
67
+ -------
68
+ numpy.ndarray
69
+ Filtered words.
70
+ """
71
+ if stopwords is None:
72
+ return np.array(list(words))
73
+
74
+ stop_set = {word.lower() for word in stopwords}
75
+ return np.array([word for word in words if word.lower() not in stop_set])
76
+
77
+
78
+ def _prepare_word_frequencies(
79
+ pd_df: pd.DataFrame,
80
+ text_column: str,
81
+ weight_column: Optional[str],
82
+ max_words: int,
83
+ stopwords: Optional[Iterable[str]],
84
+ ) -> Tuple[list[str], list[float]]:
85
+ """Aggregate and filter word frequencies.
86
+
87
+ Parameters
88
+ ----------
89
+ pd_df : pandas.DataFrame
90
+ Input DataFrame containing word data.
91
+ text_column : str
92
+ Column containing words or phrases.
93
+ weight_column : str, optional
94
+ Column containing numeric weights. Defaults to ``None``.
95
+ max_words : int
96
+ Maximum number of words to include.
97
+ stopwords : Iterable[str], optional
98
+ Words to exclude from the visualization. Defaults to ``None``.
99
+
100
+ Returns
101
+ -------
102
+ tuple of list
103
+ Lists of filtered words and their corresponding weights.
104
+
105
+ Raises
106
+ ------
107
+ AttributeError
108
+ If required columns are missing from the DataFrame.
109
+ """
110
+ validate_dataframe(pd_df, cols=[text_column], sort_by=weight_column)
111
+
112
+ if weight_column is None:
113
+ freq_series: pd.Series = pd_df[text_column].value_counts()
114
+ else:
115
+ weight_col = cast(str, weight_column)
116
+ freq_series = cast(pd.Series, pd_df.groupby(text_column)[weight_col].sum())
117
+ freq_series = freq_series.sort_values(ascending=False)
118
+
119
+ words = freq_series.index.to_numpy()
120
+ weights = freq_series.to_numpy(dtype=float)
121
+
122
+ words = _filter_stopwords(words, stopwords)
123
+ mask = np.isin(freq_series.index, words)
124
+ weights = weights[mask]
125
+
126
+ sorted_indices = np.argsort(weights)[::-1]
127
+ words = words[sorted_indices][:max_words].tolist()
128
+ weights = weights[sorted_indices][:max_words].tolist()
129
+
130
+ return words, weights
131
+
132
+
133
+ def _plot_words(
134
+ ax: Axes,
135
+ words: Sequence[str],
136
+ weights: Sequence[float],
137
+ style: StyleTemplate,
138
+ title: Optional[str],
139
+ random_state: Optional[int],
140
+ ) -> Axes:
141
+ """Render words on the provided axes with sizes proportional to weights.
142
+
143
+ Parameters
144
+ ----------
145
+ ax : matplotlib.axes.Axes
146
+ Axes on which to draw.
147
+ words : Sequence[str]
148
+ Words to render.
149
+ weights : Sequence[float]
150
+ Corresponding weights for sizing.
151
+ style : StyleTemplate
152
+ Style configuration for the plot.
153
+ title : str, optional
154
+ Title of the plot. Defaults to ``None``.
155
+ random_state : int, optional
156
+ Seed for reproducible placement. Defaults to ``None``.
157
+
158
+ Returns
159
+ -------
160
+ matplotlib.axes.Axes
161
+ Axes containing the rendered word cloud.
162
+ """
163
+ rng = np.random.default_rng(seed=random_state)
164
+ font_sizes = _normalize_weights(weights, base_size=style.font_size)
165
+ cmap = colormaps.get_cmap(style.palette)
166
+
167
+ ax.set_facecolor(style.background_color)
168
+ ax.axis("off")
169
+
170
+ x_positions = rng.uniform(0.05, 0.95, size=len(words))
171
+ y_positions = rng.uniform(0.05, 0.95, size=len(words))
172
+
173
+ for idx, (word, weight) in enumerate(zip(words, weights)):
174
+ size = font_sizes[idx]
175
+ color = cmap(rng.random())
176
+ ax.text(
177
+ x_positions[idx],
178
+ y_positions[idx],
179
+ string_formatter(word),
180
+ ha="center",
181
+ va="center",
182
+ fontsize=size,
183
+ color=color,
184
+ transform=ax.transAxes,
185
+ )
186
+
187
+ if title:
188
+ ax.set_title(title, color=style.font_color, fontsize=style.font_size * 1.5)
189
+ return ax
190
+
191
+
192
+ def aplot_wordcloud(
193
+ pd_df: pd.DataFrame,
194
+ text_column: str,
195
+ weight_column: Optional[str] = None,
196
+ title: Optional[str] = None,
197
+ style: StyleTemplate = WORDCLOUD_STYLE_TEMPLATE,
198
+ max_words: int = MAX_RESULTS,
199
+ stopwords: Optional[Iterable[str]] = None,
200
+ random_state: Optional[int] = None,
201
+ ax: Optional[Axes] = None,
202
+ ) -> Axes:
203
+ """Plot a word cloud on the provided axes.
204
+
205
+ Parameters
206
+ ----------
207
+ pd_df : pandas.DataFrame
208
+ DataFrame containing the words to visualize.
209
+ text_column : str
210
+ Column containing words or phrases.
211
+ weight_column : str, optional
212
+ Column containing numeric weights. Defaults to ``None`` for equal weights.
213
+ title : str, optional
214
+ Plot title. Defaults to ``None``.
215
+ style : StyleTemplate, optional
216
+ Styling options. Defaults to ``WORDCLOUD_STYLE_TEMPLATE``.
217
+ max_words : int, optional
218
+ Maximum number of words to display. Defaults to ``MAX_RESULTS``.
219
+ stopwords : Iterable[str], optional
220
+ Words to exclude from the visualization. Defaults to ``None``.
221
+ random_state : int, optional
222
+ Seed for word placement. Defaults to ``None``.
223
+ ax : matplotlib.axes.Axes, optional
224
+ Axes to draw on. Defaults to ``None`` which uses the current axes.
225
+
226
+ Returns
227
+ -------
228
+ matplotlib.axes.Axes
229
+ Axes containing the rendered word cloud.
230
+
231
+ Raises
232
+ ------
233
+ AttributeError
234
+ If required columns are missing from the DataFrame.
235
+ """
236
+ if ax is None:
237
+ ax = cast(Axes, plt.gca())
238
+
239
+ words, weights = _prepare_word_frequencies(
240
+ pd_df=pd_df,
241
+ text_column=text_column,
242
+ weight_column=weight_column,
243
+ max_words=max_words,
244
+ stopwords=stopwords,
245
+ )
246
+ return _plot_words(
247
+ ax, words, weights, style=style, title=title, random_state=random_state
248
+ )
249
+
250
+
251
+ def fplot_wordcloud(
252
+ pd_df: pd.DataFrame,
253
+ text_column: str,
254
+ weight_column: Optional[str] = None,
255
+ title: Optional[str] = None,
256
+ style: StyleTemplate = WORDCLOUD_STYLE_TEMPLATE,
257
+ max_words: int = MAX_RESULTS,
258
+ stopwords: Optional[Iterable[str]] = None,
259
+ random_state: Optional[int] = None,
260
+ figsize: Tuple[float, float] = FIG_SIZE,
261
+ ) -> Figure:
262
+ """Create a new figure with a word cloud.
263
+
264
+ Parameters
265
+ ----------
266
+ pd_df : pandas.DataFrame
267
+ DataFrame containing the words to visualize.
268
+ text_column : str
269
+ Column containing words or phrases.
270
+ weight_column : str, optional
271
+ Column containing numeric weights. Defaults to ``None`` for equal weights.
272
+ title : str, optional
273
+ Plot title. Defaults to ``None``.
274
+ style : StyleTemplate, optional
275
+ Styling options. Defaults to ``WORDCLOUD_STYLE_TEMPLATE``.
276
+ max_words : int, optional
277
+ Maximum number of words to display. Defaults to ``MAX_RESULTS``.
278
+ stopwords : Iterable[str], optional
279
+ Words to exclude from the visualization. Defaults to ``None``.
280
+ random_state : int, optional
281
+ Seed for word placement. Defaults to ``None``.
282
+ figsize : tuple of float, optional
283
+ Figure size. Defaults to ``FIG_SIZE``.
284
+
285
+ Returns
286
+ -------
287
+ matplotlib.figure.Figure
288
+ Figure containing the rendered word cloud.
289
+
290
+ Raises
291
+ ------
292
+ AttributeError
293
+ If required columns are missing from the DataFrame.
294
+ """
295
+ fig_raw, ax_raw = plt.subplots(figsize=figsize)
296
+ fig = cast(Figure, fig_raw)
297
+ ax = cast(Axes, ax_raw)
298
+
299
+ _plot_words(
300
+ ax,
301
+ *_prepare_word_frequencies(
302
+ pd_df=pd_df,
303
+ text_column=text_column,
304
+ weight_column=weight_column,
305
+ max_words=max_words,
306
+ stopwords=stopwords,
307
+ ),
308
+ style=style,
309
+ title=title,
310
+ random_state=random_state,
311
+ )
312
+ fig.patch.set_facecolor(style.background_color)
313
+ fig.tight_layout()
314
+ return fig