pandas-plots 0.12.7__tar.gz → 0.12.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pandas_plots-0.12.7/src/pandas_plots.egg-info → pandas_plots-0.12.9}/PKG-INFO +1 -1
- {pandas_plots-0.12.7 → pandas_plots-0.12.9}/setup.cfg +1 -1
- {pandas_plots-0.12.7 → pandas_plots-0.12.9}/src/pandas_plots/pls.py +254 -122
- {pandas_plots-0.12.7 → pandas_plots-0.12.9/src/pandas_plots.egg-info}/PKG-INFO +1 -1
- {pandas_plots-0.12.7 → pandas_plots-0.12.9}/LICENSE +0 -0
- {pandas_plots-0.12.7 → pandas_plots-0.12.9}/README.md +0 -0
- {pandas_plots-0.12.7 → pandas_plots-0.12.9}/pyproject.toml +0 -0
- {pandas_plots-0.12.7 → pandas_plots-0.12.9}/src/pandas_plots/hlp.py +0 -0
- {pandas_plots-0.12.7 → pandas_plots-0.12.9}/src/pandas_plots/pii.py +0 -0
- {pandas_plots-0.12.7 → pandas_plots-0.12.9}/src/pandas_plots/tbl.py +0 -0
- {pandas_plots-0.12.7 → pandas_plots-0.12.9}/src/pandas_plots/ven.py +0 -0
- {pandas_plots-0.12.7 → pandas_plots-0.12.9}/src/pandas_plots.egg-info/SOURCES.txt +0 -0
- {pandas_plots-0.12.7 → pandas_plots-0.12.9}/src/pandas_plots.egg-info/dependency_links.txt +0 -0
- {pandas_plots-0.12.7 → pandas_plots-0.12.9}/src/pandas_plots.egg-info/pii.py +0 -0
- {pandas_plots-0.12.7 → pandas_plots-0.12.9}/src/pandas_plots.egg-info/requires.txt +0 -0
- {pandas_plots-0.12.7 → pandas_plots-0.12.9}/src/pandas_plots.egg-info/top_level.txt +0 -0
@@ -12,51 +12,118 @@ from matplotlib import pyplot as plt
|
|
12
12
|
from plotly import express as px
|
13
13
|
import plotly.graph_objects as go
|
14
14
|
from plotly.subplots import make_subplots
|
15
|
-
import plotly
|
15
|
+
import plotly # needed for return types
|
16
16
|
|
17
17
|
from .hlp import *
|
18
18
|
from .tbl import print_summary
|
19
19
|
|
20
20
|
### helper functions
|
21
21
|
|
22
|
+
|
22
23
|
def _set_caption(caption: str) -> str:
|
23
24
|
return f"#️⃣{'-'.join(caption.split())}, " if caption else ""
|
24
25
|
|
25
26
|
|
26
|
-
def aggregate_data(
|
27
|
+
def aggregate_data(
|
28
|
+
df: pd.DataFrame,
|
29
|
+
top_n_index: int,
|
30
|
+
top_n_color: int,
|
31
|
+
top_n_facet: int,
|
32
|
+
null_label: str,
|
33
|
+
show_other: bool = False,
|
34
|
+
sort_values_index: bool = False,
|
35
|
+
sort_values_color: bool = False,
|
36
|
+
sort_values_facet: bool = False,
|
37
|
+
) -> pd.DataFrame:
|
27
38
|
"""
|
28
39
|
Aggregates the data, ensuring each combination of 'index', 'col', and 'facet' is unique with summed 'value'.
|
29
|
-
|
40
|
+
|
30
41
|
Args:
|
31
42
|
df (pd.DataFrame): Input DataFrame.
|
32
43
|
top_n_index (int): top N values of the first column to keep. 0 means take all.
|
33
|
-
|
44
|
+
top_n_color (int): top N values of the second column to keep. 0 means take all.
|
34
45
|
top_n_facet (int): top N values of the third column to keep. 0 means take all.
|
35
46
|
null_label (str): Label for null values.
|
47
|
+
show_other (bool): Whether to include "<other>" for columns not in top_n_color. Defaults to False.
|
48
|
+
sort_values (bool): Whether to sort values in descending order based on group sum. Defaults to False.
|
36
49
|
|
37
50
|
Returns:
|
38
51
|
pd.DataFrame: Aggregated and filtered dataset.
|
39
52
|
"""
|
40
|
-
|
53
|
+
|
54
|
+
for col in ["index", "col", "facet"]: # Skip 'value' column (numeric)
|
41
55
|
df[col] = df[col].fillna(null_label)
|
42
56
|
|
43
57
|
# Aggregate data to ensure unique combinations
|
44
|
-
aggregated_df = df.groupby([
|
58
|
+
aggregated_df = df.groupby(["index", "col", "facet"], as_index=False)["value"].sum()
|
45
59
|
|
46
|
-
# Reduce data based on top_n parameters
|
47
|
-
if
|
48
|
-
top_indexes =
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
60
|
+
# * Reduce data based on top_n parameters
|
61
|
+
if sort_values_index:
|
62
|
+
top_indexes = (
|
63
|
+
aggregated_df.groupby("index")["value"]
|
64
|
+
.sum()
|
65
|
+
.sort_values(ascending=False)[:top_n_index or None]
|
66
|
+
.index
|
67
|
+
)
|
68
|
+
else:
|
69
|
+
top_indexes = aggregated_df["index"].sort_values().unique()[:top_n_index or None]
|
70
|
+
|
71
|
+
aggregated_df = aggregated_df[aggregated_df["index"].isin(top_indexes)]
|
72
|
+
|
73
|
+
if sort_values_color:
|
74
|
+
top_colors = (
|
75
|
+
aggregated_df.groupby("col")["value"]
|
76
|
+
.sum()
|
77
|
+
.sort_values(ascending=False)[:top_n_color or None]
|
78
|
+
.index
|
79
|
+
)
|
80
|
+
else:
|
81
|
+
top_colors = aggregated_df["col"].sort_values().unique()[:top_n_color or None]
|
82
|
+
|
83
|
+
others_df = df[~df["col"].isin(top_colors)]
|
84
|
+
aggregated_df = aggregated_df[aggregated_df["col"].isin(top_colors)]
|
85
|
+
if show_other and top_n_color > 0 and not others_df.empty:
|
86
|
+
other_agg = others_df.groupby(["index", "facet"], as_index=False)[
|
87
|
+
"value"
|
88
|
+
].sum()
|
89
|
+
other_agg["col"] = "<other>"
|
90
|
+
other_agg = other_agg[["index", "col", "facet", "value"]]
|
91
|
+
aggregated_df = pd.concat([aggregated_df, other_agg], ignore_index=True)
|
92
|
+
top_colors = [*top_colors, "<other>"]
|
93
|
+
|
94
|
+
if sort_values_facet:
|
95
|
+
top_facets = (
|
96
|
+
aggregated_df.groupby("facet")["value"]
|
97
|
+
.sum()
|
98
|
+
.sort_values(ascending=False)[:top_n_facet or None]
|
99
|
+
.index
|
100
|
+
)
|
101
|
+
else:
|
102
|
+
top_facets = aggregated_df["facet"].sort_values().unique()[:top_n_facet or None]
|
103
|
+
|
104
|
+
aggregated_df = aggregated_df[aggregated_df["facet"].isin(top_facets)]
|
105
|
+
|
106
|
+
# * Ensure facets are sorted alphabetically
|
107
|
+
aggregated_df["facet"] = pd.Categorical(
|
108
|
+
values=aggregated_df["facet"],
|
109
|
+
categories=top_facets,
|
110
|
+
ordered=True,
|
111
|
+
)
|
56
112
|
|
57
|
-
|
58
|
-
|
59
|
-
|
113
|
+
aggregated_df["index"] = pd.Categorical(
|
114
|
+
values=aggregated_df["index"],
|
115
|
+
categories=top_indexes,
|
116
|
+
ordered=True,
|
117
|
+
)
|
118
|
+
|
119
|
+
aggregated_df["col"] = pd.Categorical(
|
120
|
+
values=aggregated_df["col"],
|
121
|
+
categories=top_colors,
|
122
|
+
ordered=True,
|
123
|
+
)
|
124
|
+
|
125
|
+
|
126
|
+
# aggregated_df = aggregated_df.sort_values(by="facet")
|
60
127
|
|
61
128
|
return aggregated_df
|
62
129
|
|
@@ -77,13 +144,15 @@ def assign_column_colors(columns, color_palette, null_label):
|
|
77
144
|
palette = getattr(px.colors.qualitative, color_palette)
|
78
145
|
else:
|
79
146
|
raise ValueError(f"Invalid color palette: {color_palette}")
|
80
|
-
|
147
|
+
|
81
148
|
colors = {col: palette[i % len(palette)] for i, col in enumerate(sorted(columns))}
|
82
149
|
colors[null_label] = "lightgray"
|
83
150
|
return colors
|
84
151
|
|
152
|
+
|
85
153
|
### main functions
|
86
154
|
|
155
|
+
|
87
156
|
def plot_quadrants(
|
88
157
|
df: pd.DataFrame,
|
89
158
|
title: str = None,
|
@@ -163,7 +232,7 @@ def plot_quadrants(
|
|
163
232
|
|
164
233
|
# * save to png if path is provided
|
165
234
|
if png_path is not None:
|
166
|
-
plt.savefig(Path(png_path).as_posix(), format=
|
235
|
+
plt.savefig(Path(png_path).as_posix(), format="png")
|
167
236
|
|
168
237
|
return q1, q2, q3, q4, n
|
169
238
|
# * plotly express is not used for the heatmap, although it does not need the derived wide format.
|
@@ -185,11 +254,14 @@ def plot_stacked_bars(
|
|
185
254
|
renderer: Literal["png", "svg", None] = "png",
|
186
255
|
caption: str = None,
|
187
256
|
sort_values: bool = False,
|
257
|
+
sort_values_index: bool = False,
|
258
|
+
sort_values_color: bool = False,
|
188
259
|
show_total: bool = False,
|
189
260
|
precision: int = 0,
|
190
261
|
png_path: Path | str = None,
|
191
262
|
color_palette: str = "Plotly",
|
192
263
|
null_label: str = "<NA>",
|
264
|
+
show_other: bool = False,
|
193
265
|
) -> plotly.graph_objects:
|
194
266
|
"""
|
195
267
|
Generates a stacked bar plot using the provided DataFrame.
|
@@ -208,7 +280,7 @@ def plot_stacked_bars(
|
|
208
280
|
- title (str): Custom title for the plot.
|
209
281
|
- renderer (Literal["png", "svg", None]): Defines the output format.
|
210
282
|
- caption (str): Optional caption for additional context.
|
211
|
-
- sort_values (bool):
|
283
|
+
- sort_values (bool):
|
212
284
|
- If True, sorts bars by the sum of their values (descending).
|
213
285
|
- If False, sorts bars alphabetically.
|
214
286
|
- show_total (bool): If True, adds a row with the total sum of all categories.
|
@@ -216,20 +288,33 @@ def plot_stacked_bars(
|
|
216
288
|
- png_path (Path | str): If specified, saves the plot as a PNG file.
|
217
289
|
- color_palette (str): Name of the color palette to use.
|
218
290
|
- null_label (str): Label for null values.
|
219
|
-
|
291
|
+
- show_other (bool): If True, shows the "Other" category in the legend.
|
292
|
+
- sort_values_index (bool): If True, sorts the index categories by group sum
|
293
|
+
- sort_values_color (bool): If True, sorts the columns categories by group sum
|
294
|
+
|
220
295
|
Returns:
|
221
296
|
- A Plotly figure object representing the stacked bar chart.
|
222
297
|
"""
|
223
298
|
BAR_LENGTH_MULTIPLIER = 1.05
|
224
|
-
|
299
|
+
|
225
300
|
# * 2 axis means at least 2 columns
|
226
301
|
if len(df.columns) < 2 or len(df.columns) > 3:
|
227
302
|
print("❌ df must have exactly 2 or 3 columns")
|
228
303
|
return
|
229
304
|
|
230
|
-
#
|
231
|
-
|
232
|
-
|
305
|
+
# ! do not enforce str columns anymore
|
306
|
+
# # * check if first 2 columns are str
|
307
|
+
# dtypes = set(df.iloc[:, [0, 1]].dtypes)
|
308
|
+
# dtypes_kind = [i.kind for i in dtypes]
|
309
|
+
|
310
|
+
# if set(dtypes_kind) - set(["O", "b"]):
|
311
|
+
# print("❌ first 2 columns must be str")
|
312
|
+
# # * overkill ^^
|
313
|
+
# df.iloc[:, [0, 1]] = df.iloc[:, [0, 1]].astype(str)
|
314
|
+
|
315
|
+
# * but last col must be numeric
|
316
|
+
if df.iloc[:, -1].dtype.kind not in ("f", "i"):
|
317
|
+
print("❌ last column must be numeric")
|
233
318
|
return
|
234
319
|
|
235
320
|
df = df.copy() # Copy the input DataFrame to avoid modifying the original
|
@@ -253,87 +338,103 @@ def plot_stacked_bars(
|
|
253
338
|
# * apply precision
|
254
339
|
df.iloc[:, 2] = df.iloc[:, 2].round(precision)
|
255
340
|
|
256
|
-
# * set index + color col
|
341
|
+
# # * set index + color col
|
257
342
|
col_index = df.columns[0] if not swap else df.columns[1]
|
258
343
|
col_color = df.columns[1] if not swap else df.columns[0]
|
259
344
|
|
260
345
|
# * ensure df is grouped to prevent false aggregations
|
261
|
-
df = (
|
262
|
-
df.groupby([df.columns[0], df.columns[1]])
|
263
|
-
[df.columns[2]]
|
264
|
-
.sum()
|
265
|
-
.reset_index()
|
266
|
-
)
|
346
|
+
df = df.groupby([df.columns[0], df.columns[1]])[df.columns[2]].sum().reset_index()
|
267
347
|
|
268
348
|
# * add total as aggregation of df
|
269
349
|
if show_total:
|
270
|
-
df_total = df.groupby(df.columns[1], observed=True, as_index=False)[
|
350
|
+
df_total = df.groupby(df.columns[1], observed=True, as_index=False)[
|
351
|
+
df.columns[2]
|
352
|
+
].sum()
|
271
353
|
df_total[df.columns[0]] = " Total"
|
272
354
|
df = pd.concat([df, df_total], ignore_index=True)
|
273
355
|
|
274
|
-
|
275
|
-
# * apply top_n, reduce df
|
276
|
-
n_col = top_n_color if top_n_color > 0 else None
|
277
|
-
n_idx = top_n_index if top_n_index > 0 else None
|
278
|
-
|
279
|
-
unique_colors = sorted(
|
280
|
-
df.groupby(col_color)[df.columns[2]]
|
281
|
-
.sum()
|
282
|
-
.sort_values(ascending=False)
|
283
|
-
.index.tolist()[:n_col]
|
284
|
-
)
|
285
|
-
|
286
|
-
unique_idx = df[col_index].sort_values().unique()[:n_idx]
|
287
|
-
|
288
|
-
df = df[df[col_color].isin(unique_colors)]#.sort_values(by=[col_index, col_color])
|
289
|
-
df = df[df[col_index].isin(unique_idx)]#.sort_values(by=[col_index, col_color])
|
290
|
-
|
291
|
-
|
292
|
-
# # * Sorting logic based on sort_values
|
293
|
-
if sort_values:
|
294
|
-
sort_order = (
|
295
|
-
df.groupby(col_index)[df.columns[2]].sum().sort_values(ascending=False).index
|
296
|
-
)
|
297
|
-
else:
|
298
|
-
sort_order = sorted(df[col_index].unique()) # Alphabetical order
|
299
|
-
|
300
|
-
# # * Convert to categorical with explicit ordering
|
301
|
-
df[col_index] = pd.Categorical(df[col_index], categories=sort_order, ordered=True)
|
302
|
-
|
303
|
-
column_colors = assign_column_colors(
|
304
|
-
columns=unique_colors,
|
305
|
-
color_palette=color_palette,
|
306
|
-
null_label=null_label
|
307
|
-
)
|
308
|
-
|
309
356
|
# * calculate n
|
310
357
|
divider = 2 if show_total else 1
|
311
|
-
n = int(df
|
358
|
+
n = int(df.iloc[:, 2].sum() / divider)
|
312
359
|
|
313
360
|
# * title str
|
314
361
|
_title_str_top_index = f"TOP{top_n_index} " if top_n_index > 0 else ""
|
315
362
|
_title_str_top_color = f"TOP{top_n_color} " if top_n_color > 0 else ""
|
316
363
|
_title_str_null = f", NULL excluded" if dropna else ""
|
317
364
|
_title_str_n = f", n={n:_}"
|
365
|
+
|
366
|
+
_df = df.copy().assign(facet=None)
|
367
|
+
_df.columns = (
|
368
|
+
["index", "col", "value", "facet"]
|
369
|
+
if not swap
|
370
|
+
else ["col", "index", "value", "facet"]
|
371
|
+
)
|
372
|
+
|
373
|
+
aggregated_df = aggregate_data(
|
374
|
+
df=_df,
|
375
|
+
top_n_index=top_n_index,
|
376
|
+
top_n_color=top_n_color,
|
377
|
+
top_n_facet=0,
|
378
|
+
null_label=null_label,
|
379
|
+
show_other=show_other,
|
380
|
+
sort_values_index=sort_values_index,
|
381
|
+
sort_values_color=sort_values_color,
|
382
|
+
sort_values_facet=False, # just a placeholder
|
383
|
+
)
|
384
|
+
|
385
|
+
df = aggregated_df.copy()
|
386
|
+
|
387
|
+
columns = sorted(
|
388
|
+
df.groupby("col", observed=True)["value"]
|
389
|
+
.sum()
|
390
|
+
.sort_values(ascending=False)
|
391
|
+
.index.tolist()
|
392
|
+
)
|
393
|
+
column_colors = assign_column_colors(columns, color_palette, null_label)
|
394
|
+
|
318
395
|
caption = _set_caption(caption)
|
319
396
|
|
320
|
-
|
321
|
-
df["
|
397
|
+
# * after grouping add cols for pct and formatting
|
398
|
+
df["cnt_pct_only"] = df["value"].apply(lambda x: f"{(x / n) * 100:.{precision}f}%")
|
322
399
|
|
323
400
|
# * format output
|
324
|
-
df["cnt_str"] = df[
|
401
|
+
df["cnt_str"] = df["value"].apply(lambda x: f"{x:_.{precision}f}")
|
325
402
|
|
326
403
|
divider2 = "<br>" if orientation == "v" else " "
|
327
404
|
df["cnt_pct_str"] = df.apply(
|
328
|
-
lambda row: f"{row['cnt_str']}{divider2}({row['
|
405
|
+
lambda row: f"{row['cnt_str']}{divider2}({row['cnt_pct_only']})", axis=1
|
329
406
|
)
|
330
407
|
|
408
|
+
# # # * Sorting logic based on sort_values
|
409
|
+
# if sort_values_index:
|
410
|
+
# sort_order = (
|
411
|
+
# df.groupby("index")["value"].sum().sort_values(ascending=False).index
|
412
|
+
# )
|
413
|
+
# else:
|
414
|
+
# sort_order = sorted(df["index"].unique(), reverse=False) # Alphabetical order
|
415
|
+
|
416
|
+
# display(sort_order)
|
417
|
+
|
418
|
+
# df["index"] = pd.Categorical(
|
419
|
+
# values=df["index"],
|
420
|
+
# # categories=sort_order,
|
421
|
+
# ordered=True,
|
422
|
+
# )
|
423
|
+
|
424
|
+
df = (
|
425
|
+
df.sort_values(by=["col","index"], ascending=[True, False])
|
426
|
+
if orientation == "h"
|
427
|
+
else df.sort_values(by=["index","col"], ascending=[True, True])
|
428
|
+
)
|
429
|
+
|
430
|
+
# display(df)
|
431
|
+
|
331
432
|
# * plot
|
332
433
|
fig = px.bar(
|
333
434
|
df,
|
334
|
-
x=
|
335
|
-
y=
|
336
|
-
color=
|
435
|
+
x="index" if orientation == "v" else "value",
|
436
|
+
y="value" if orientation == "v" else "index",
|
437
|
+
color="col",
|
337
438
|
text="cnt_pct_str" if normalize else "cnt_str",
|
338
439
|
orientation=orientation,
|
339
440
|
title=title
|
@@ -342,13 +443,15 @@ def plot_stacked_bars(
|
|
342
443
|
width=width,
|
343
444
|
height=height,
|
344
445
|
color_discrete_map=column_colors, # Use assigned colors
|
345
|
-
category_orders={
|
346
|
-
|
446
|
+
category_orders={
|
447
|
+
col_index: list(df["index"].cat.categories)
|
448
|
+
}, # <- Add this line
|
347
449
|
)
|
348
|
-
|
349
|
-
|
450
|
+
|
451
|
+
|
452
|
+
# * get longest bar
|
350
453
|
bar_max = (
|
351
|
-
df.groupby(
|
454
|
+
df.groupby("index")["value"].sum().sort_values(ascending=False).iloc[0]
|
352
455
|
* BAR_LENGTH_MULTIPLIER
|
353
456
|
)
|
354
457
|
# * ignore if bar mode is on
|
@@ -372,7 +475,7 @@ def plot_stacked_bars(
|
|
372
475
|
},
|
373
476
|
},
|
374
477
|
)
|
375
|
-
|
478
|
+
|
376
479
|
# * set dtick
|
377
480
|
if orientation == "h":
|
378
481
|
if relative:
|
@@ -692,7 +795,7 @@ def plot_histogram(
|
|
692
795
|
caption (str): The caption for the plot. Default is None.
|
693
796
|
title (str): The title of the plot. Default is None.
|
694
797
|
png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
|
695
|
-
|
798
|
+
|
696
799
|
|
697
800
|
Returns:
|
698
801
|
plot object
|
@@ -744,7 +847,7 @@ def plot_histogram(
|
|
744
847
|
)
|
745
848
|
|
746
849
|
fig.show(renderer)
|
747
|
-
|
850
|
+
|
748
851
|
# * save to png if path is provided
|
749
852
|
if png_path is not None:
|
750
853
|
fig.write_image(Path(png_path).as_posix())
|
@@ -1156,12 +1259,11 @@ def plot_boxes(
|
|
1156
1259
|
return fig
|
1157
1260
|
|
1158
1261
|
|
1159
|
-
|
1160
1262
|
def plot_facet_stacked_bars(
|
1161
1263
|
df: pd.DataFrame,
|
1162
1264
|
subplots_per_row: int = 4,
|
1163
1265
|
top_n_index: int = 0,
|
1164
|
-
|
1266
|
+
top_n_color: int = 0,
|
1165
1267
|
top_n_facet: int = 0,
|
1166
1268
|
null_label: str = "<NA>",
|
1167
1269
|
subplot_size: int = 300,
|
@@ -1171,6 +1273,12 @@ def plot_facet_stacked_bars(
|
|
1171
1273
|
annotations: bool = False,
|
1172
1274
|
precision: int = 0,
|
1173
1275
|
png_path: Optional[Path] = None,
|
1276
|
+
show_other: bool = False,
|
1277
|
+
sort_values: bool = True,
|
1278
|
+
sort_values_index: bool = False,
|
1279
|
+
sort_values_color: bool = False,
|
1280
|
+
sort_values_facet: bool = False,
|
1281
|
+
|
1174
1282
|
) -> object:
|
1175
1283
|
"""
|
1176
1284
|
Create a grid of stacked bar charts.
|
@@ -1179,7 +1287,7 @@ def plot_facet_stacked_bars(
|
|
1179
1287
|
df (pd.DataFrame): DataFrame with 3 or 4 columns.
|
1180
1288
|
subplots_per_row (int): Number of subplots per row.
|
1181
1289
|
top_n_index (int): top N index values to keep.
|
1182
|
-
|
1290
|
+
top_n_color (int): top N column values to keep.
|
1183
1291
|
top_n_facet (int): top N facet values to keep.
|
1184
1292
|
null_label (str): Label for null values.
|
1185
1293
|
subplot_size (int): Size of each subplot.
|
@@ -1189,47 +1297,57 @@ def plot_facet_stacked_bars(
|
|
1189
1297
|
annotations (bool): Whether to show annotations in the subplots.
|
1190
1298
|
precision (int): Decimal precision for annotations.
|
1191
1299
|
png_path (Optional[Path]): Path to save the image.
|
1300
|
+
show_other (bool): If True, adds an "<other>" bar for columns not in top_n_color.
|
1301
|
+
sort_values_index (bool): If True, sorts index by group sum.
|
1302
|
+
sort_values_color (bool): If True, sorts columns by group sum.
|
1303
|
+
sort_values_facet (bool): If True, sorts facet by group sum.
|
1304
|
+
sort_values (bool): DEPRECATED
|
1305
|
+
|
1192
1306
|
|
1193
1307
|
Returns:
|
1194
1308
|
plot object
|
1195
|
-
|
1309
|
+
|
1196
1310
|
Remarks:
|
1197
1311
|
If you need to include facets that have no data, fill up like this beforehand:
|
1198
1312
|
df.loc[len(df)]=[None, None, 12]
|
1199
1313
|
"""
|
1200
|
-
|
1314
|
+
|
1201
1315
|
df = df.copy() # Copy the input DataFrame to avoid modifying the original
|
1202
1316
|
|
1203
1317
|
if not (df.shape[1] == 3 or df.shape[1] == 4):
|
1204
1318
|
raise ValueError("Input DataFrame must have 3 or 4 columns.")
|
1205
|
-
|
1319
|
+
|
1206
1320
|
original_column_names = df.columns.tolist()
|
1321
|
+
original_rows = len(df)
|
1207
1322
|
|
1208
1323
|
if df.shape[1] == 3:
|
1209
|
-
df.columns = [
|
1210
|
-
df[
|
1324
|
+
df.columns = ["index", "col", "facet"]
|
1325
|
+
df["value"] = 1
|
1211
1326
|
elif df.shape[1] == 4:
|
1212
|
-
df.columns = [
|
1213
|
-
|
1214
|
-
aggregated_df = aggregate_data(df, top_n_index, top_n_columns, top_n_facet, null_label)
|
1215
|
-
|
1216
|
-
# facets = aggregated_df['facet'].unique()
|
1217
|
-
facets = sorted(aggregated_df['facet'].unique()) # Ensure facets are sorted consistently
|
1327
|
+
df.columns = ["index", "col", "facet", "value"]
|
1218
1328
|
|
1219
|
-
|
1220
|
-
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1224
|
-
|
1225
|
-
|
1226
|
-
|
1227
|
-
|
1228
|
-
|
1329
|
+
aggregated_df = aggregate_data(
|
1330
|
+
df,
|
1331
|
+
top_n_index,
|
1332
|
+
top_n_color,
|
1333
|
+
top_n_facet,
|
1334
|
+
null_label,
|
1335
|
+
show_other=show_other,
|
1336
|
+
sort_values_index=sort_values_index,
|
1337
|
+
sort_values_color=sort_values_color,
|
1338
|
+
sort_values_facet=sort_values_facet,
|
1339
|
+
)
|
1229
1340
|
|
1341
|
+
facets = sorted(
|
1342
|
+
aggregated_df["facet"].unique()
|
1343
|
+
) # Ensure facets are sorted consistently
|
1230
1344
|
|
1231
|
-
|
1232
|
-
|
1345
|
+
columns = sorted(
|
1346
|
+
aggregated_df.groupby("col", observed=True)["value"]
|
1347
|
+
.sum()
|
1348
|
+
.sort_values(ascending=False)
|
1349
|
+
.index.tolist()
|
1350
|
+
)
|
1233
1351
|
column_colors = assign_column_colors(columns, color_palette, null_label)
|
1234
1352
|
|
1235
1353
|
fig = make_subplots(
|
@@ -1238,25 +1356,39 @@ def plot_facet_stacked_bars(
|
|
1238
1356
|
subplot_titles=facets,
|
1239
1357
|
)
|
1240
1358
|
|
1359
|
+
# * Ensure all categories appear in the legend by adding an invisible trace
|
1360
|
+
for column in columns:
|
1361
|
+
fig.add_trace(
|
1362
|
+
go.Bar(
|
1363
|
+
x=[None], # Invisible bar
|
1364
|
+
y=[None],
|
1365
|
+
name=column,
|
1366
|
+
marker=dict(color=column_colors[column]),
|
1367
|
+
showlegend=True, # Ensure it appears in the legend
|
1368
|
+
)
|
1369
|
+
)
|
1370
|
+
|
1241
1371
|
added_to_legend = set()
|
1242
1372
|
for i, facet in enumerate(facets):
|
1243
|
-
facet_data = aggregated_df[aggregated_df[
|
1373
|
+
facet_data = aggregated_df[aggregated_df["facet"] == facet]
|
1244
1374
|
row = (i // subplots_per_row) + 1
|
1245
1375
|
col = (i % subplots_per_row) + 1
|
1246
1376
|
|
1247
1377
|
for column in columns:
|
1248
|
-
column_data = facet_data[facet_data[
|
1378
|
+
column_data = facet_data[facet_data["col"] == column]
|
1379
|
+
|
1249
1380
|
show_legend = column not in added_to_legend
|
1250
1381
|
if show_legend:
|
1251
1382
|
added_to_legend.add(column)
|
1252
1383
|
|
1253
1384
|
fig.add_trace(
|
1254
1385
|
go.Bar(
|
1255
|
-
x=column_data[
|
1256
|
-
y=column_data[
|
1386
|
+
x=column_data["index"],
|
1387
|
+
y=column_data["value"],
|
1257
1388
|
name=column,
|
1258
1389
|
marker=dict(color=column_colors[column]),
|
1259
|
-
|
1390
|
+
legendgroup=column, # Ensures multiple traces use the same legend entry
|
1391
|
+
showlegend=False, # suppress further legend items
|
1260
1392
|
),
|
1261
1393
|
row=row,
|
1262
1394
|
col=col,
|
@@ -1265,8 +1397,8 @@ def plot_facet_stacked_bars(
|
|
1265
1397
|
if annotations:
|
1266
1398
|
for _, row_data in column_data.iterrows():
|
1267
1399
|
fig.add_annotation(
|
1268
|
-
x=row_data[
|
1269
|
-
y=row_data[
|
1400
|
+
x=row_data["index"],
|
1401
|
+
y=row_data["value"],
|
1270
1402
|
text=f"{row_data['value']:.{precision}f}",
|
1271
1403
|
showarrow=False,
|
1272
1404
|
row=row,
|
@@ -1280,8 +1412,8 @@ def plot_facet_stacked_bars(
|
|
1280
1412
|
else:
|
1281
1413
|
axis_details.append(f"[{original_column_names[0]}]")
|
1282
1414
|
|
1283
|
-
if
|
1284
|
-
axis_details.append(f"TOP {
|
1415
|
+
if top_n_color > 0:
|
1416
|
+
axis_details.append(f"TOP {top_n_color} [{original_column_names[1]}]")
|
1285
1417
|
else:
|
1286
1418
|
axis_details.append(f"[{original_column_names[1]}]")
|
1287
1419
|
|
@@ -1290,7 +1422,7 @@ def plot_facet_stacked_bars(
|
|
1290
1422
|
else:
|
1291
1423
|
axis_details.append(f"[{original_column_names[2]}]")
|
1292
1424
|
|
1293
|
-
title = f"{caption} {', '.join(axis_details)}, n = {
|
1425
|
+
title = f"{caption} {', '.join(axis_details)}, n = {original_rows:_}"
|
1294
1426
|
template = "plotly_dark" if os.getenv("THEME") == "dark" else "plotly"
|
1295
1427
|
fig.update_layout(
|
1296
1428
|
title=title,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|