pandas-plots 0.12.7__tar.gz → 0.12.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: pandas-plots
3
- Version: 0.12.7
3
+ Version: 0.12.9
4
4
  Summary: A collection of helper for table handling and visualization
5
5
  Home-page: https://github.com/smeisegeier/pandas-plots
6
6
  Author: smeisegeier
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = pandas-plots
3
- version = 0.12.7
3
+ version = 0.12.9
4
4
  author = smeisegeier
5
5
  author_email = dexterDSDo@googlemail.com
6
6
  description = A collection of helper for table handling and visualization
@@ -12,51 +12,118 @@ from matplotlib import pyplot as plt
12
12
  from plotly import express as px
13
13
  import plotly.graph_objects as go
14
14
  from plotly.subplots import make_subplots
15
- import plotly # needed for return types
15
+ import plotly # needed for return types
16
16
 
17
17
  from .hlp import *
18
18
  from .tbl import print_summary
19
19
 
20
20
  ### helper functions
21
21
 
22
+
22
23
  def _set_caption(caption: str) -> str:
23
24
  return f"#️⃣{'-'.join(caption.split())}, " if caption else ""
24
25
 
25
26
 
26
- def aggregate_data(df: pd.DataFrame, top_n_index: int, top_n_columns: int, top_n_facet: int, null_label: str) -> pd.DataFrame:
27
+ def aggregate_data(
28
+ df: pd.DataFrame,
29
+ top_n_index: int,
30
+ top_n_color: int,
31
+ top_n_facet: int,
32
+ null_label: str,
33
+ show_other: bool = False,
34
+ sort_values_index: bool = False,
35
+ sort_values_color: bool = False,
36
+ sort_values_facet: bool = False,
37
+ ) -> pd.DataFrame:
27
38
  """
28
39
  Aggregates the data, ensuring each combination of 'index', 'col', and 'facet' is unique with summed 'value'.
29
-
40
+
30
41
  Args:
31
42
  df (pd.DataFrame): Input DataFrame.
32
43
  top_n_index (int): top N values of the first column to keep. 0 means take all.
33
- top_n_columns (int): top N values of the second column to keep. 0 means take all.
44
+ top_n_color (int): top N values of the second column to keep. 0 means take all.
34
45
  top_n_facet (int): top N values of the third column to keep. 0 means take all.
35
46
  null_label (str): Label for null values.
47
+ show_other (bool): Whether to include "<other>" for columns not in top_n_color. Defaults to False.
48
+ sort_values (bool): Whether to sort values in descending order based on group sum. Defaults to False.
36
49
 
37
50
  Returns:
38
51
  pd.DataFrame: Aggregated and filtered dataset.
39
52
  """
40
- for col in ['index', 'col', 'facet']: # Skip 'value' column (numeric)
53
+
54
+ for col in ["index", "col", "facet"]: # Skip 'value' column (numeric)
41
55
  df[col] = df[col].fillna(null_label)
42
56
 
43
57
  # Aggregate data to ensure unique combinations
44
- aggregated_df = df.groupby(['index', 'col', 'facet'], as_index=False)['value'].sum()
58
+ aggregated_df = df.groupby(["index", "col", "facet"], as_index=False)["value"].sum()
45
59
 
46
- # Reduce data based on top_n parameters
47
- if top_n_index > 0:
48
- top_indexes = aggregated_df.groupby('index')['value'].sum().nlargest(top_n_index).index
49
- aggregated_df = aggregated_df[aggregated_df['index'].isin(top_indexes)]
50
- if top_n_columns > 0:
51
- top_columns = aggregated_df.groupby('col')['value'].sum().nlargest(top_n_columns).index
52
- aggregated_df = aggregated_df[aggregated_df['col'].isin(top_columns)]
53
- if top_n_facet > 0:
54
- top_facets = aggregated_df.groupby('facet')['value'].sum().nlargest(top_n_facet).index
55
- aggregated_df = aggregated_df[aggregated_df['facet'].isin(top_facets)]
60
+ # * Reduce data based on top_n parameters
61
+ if sort_values_index:
62
+ top_indexes = (
63
+ aggregated_df.groupby("index")["value"]
64
+ .sum()
65
+ .sort_values(ascending=False)[:top_n_index or None]
66
+ .index
67
+ )
68
+ else:
69
+ top_indexes = aggregated_df["index"].sort_values().unique()[:top_n_index or None]
70
+
71
+ aggregated_df = aggregated_df[aggregated_df["index"].isin(top_indexes)]
72
+
73
+ if sort_values_color:
74
+ top_colors = (
75
+ aggregated_df.groupby("col")["value"]
76
+ .sum()
77
+ .sort_values(ascending=False)[:top_n_color or None]
78
+ .index
79
+ )
80
+ else:
81
+ top_colors = aggregated_df["col"].sort_values().unique()[:top_n_color or None]
82
+
83
+ others_df = df[~df["col"].isin(top_colors)]
84
+ aggregated_df = aggregated_df[aggregated_df["col"].isin(top_colors)]
85
+ if show_other and top_n_color > 0 and not others_df.empty:
86
+ other_agg = others_df.groupby(["index", "facet"], as_index=False)[
87
+ "value"
88
+ ].sum()
89
+ other_agg["col"] = "<other>"
90
+ other_agg = other_agg[["index", "col", "facet", "value"]]
91
+ aggregated_df = pd.concat([aggregated_df, other_agg], ignore_index=True)
92
+ top_colors = [*top_colors, "<other>"]
93
+
94
+ if sort_values_facet:
95
+ top_facets = (
96
+ aggregated_df.groupby("facet")["value"]
97
+ .sum()
98
+ .sort_values(ascending=False)[:top_n_facet or None]
99
+ .index
100
+ )
101
+ else:
102
+ top_facets = aggregated_df["facet"].sort_values().unique()[:top_n_facet or None]
103
+
104
+ aggregated_df = aggregated_df[aggregated_df["facet"].isin(top_facets)]
105
+
106
+ # * Ensure facets are sorted alphabetically
107
+ aggregated_df["facet"] = pd.Categorical(
108
+ values=aggregated_df["facet"],
109
+ categories=top_facets,
110
+ ordered=True,
111
+ )
56
112
 
57
- # Ensure facets are sorted alphabetically
58
- aggregated_df['facet'] = pd.Categorical(aggregated_df['facet'], sorted(aggregated_df['facet'].unique()))
59
- aggregated_df = aggregated_df.sort_values(by='facet')
113
+ aggregated_df["index"] = pd.Categorical(
114
+ values=aggregated_df["index"],
115
+ categories=top_indexes,
116
+ ordered=True,
117
+ )
118
+
119
+ aggregated_df["col"] = pd.Categorical(
120
+ values=aggregated_df["col"],
121
+ categories=top_colors,
122
+ ordered=True,
123
+ )
124
+
125
+
126
+ # aggregated_df = aggregated_df.sort_values(by="facet")
60
127
 
61
128
  return aggregated_df
62
129
 
@@ -77,13 +144,15 @@ def assign_column_colors(columns, color_palette, null_label):
77
144
  palette = getattr(px.colors.qualitative, color_palette)
78
145
  else:
79
146
  raise ValueError(f"Invalid color palette: {color_palette}")
80
-
147
+
81
148
  colors = {col: palette[i % len(palette)] for i, col in enumerate(sorted(columns))}
82
149
  colors[null_label] = "lightgray"
83
150
  return colors
84
151
 
152
+
85
153
  ### main functions
86
154
 
155
+
87
156
  def plot_quadrants(
88
157
  df: pd.DataFrame,
89
158
  title: str = None,
@@ -163,7 +232,7 @@ def plot_quadrants(
163
232
 
164
233
  # * save to png if path is provided
165
234
  if png_path is not None:
166
- plt.savefig(Path(png_path).as_posix(), format='png')
235
+ plt.savefig(Path(png_path).as_posix(), format="png")
167
236
 
168
237
  return q1, q2, q3, q4, n
169
238
  # * plotly express is not used for the heatmap, although it does not need the derived wide format.
@@ -185,11 +254,14 @@ def plot_stacked_bars(
185
254
  renderer: Literal["png", "svg", None] = "png",
186
255
  caption: str = None,
187
256
  sort_values: bool = False,
257
+ sort_values_index: bool = False,
258
+ sort_values_color: bool = False,
188
259
  show_total: bool = False,
189
260
  precision: int = 0,
190
261
  png_path: Path | str = None,
191
262
  color_palette: str = "Plotly",
192
263
  null_label: str = "<NA>",
264
+ show_other: bool = False,
193
265
  ) -> plotly.graph_objects:
194
266
  """
195
267
  Generates a stacked bar plot using the provided DataFrame.
@@ -208,7 +280,7 @@ def plot_stacked_bars(
208
280
  - title (str): Custom title for the plot.
209
281
  - renderer (Literal["png", "svg", None]): Defines the output format.
210
282
  - caption (str): Optional caption for additional context.
211
- - sort_values (bool):
283
+ - sort_values (bool):
212
284
  - If True, sorts bars by the sum of their values (descending).
213
285
  - If False, sorts bars alphabetically.
214
286
  - show_total (bool): If True, adds a row with the total sum of all categories.
@@ -216,20 +288,33 @@ def plot_stacked_bars(
216
288
  - png_path (Path | str): If specified, saves the plot as a PNG file.
217
289
  - color_palette (str): Name of the color palette to use.
218
290
  - null_label (str): Label for null values.
219
-
291
+ - show_other (bool): If True, shows the "Other" category in the legend.
292
+ - sort_values_index (bool): If True, sorts the index categories by group sum
293
+ - sort_values_color (bool): If True, sorts the columns categories by group sum
294
+
220
295
  Returns:
221
296
  - A Plotly figure object representing the stacked bar chart.
222
297
  """
223
298
  BAR_LENGTH_MULTIPLIER = 1.05
224
-
299
+
225
300
  # * 2 axis means at least 2 columns
226
301
  if len(df.columns) < 2 or len(df.columns) > 3:
227
302
  print("❌ df must have exactly 2 or 3 columns")
228
303
  return
229
304
 
230
- # * check if first 2 columns are str
231
- if list(set((df.iloc[:, [0, 1]].dtypes)))[0].kind not in ["O", "b"]:
232
- print("❌ first 2 columns must be str")
305
+ # ! do not enforce str columns anymore
306
+ # # * check if first 2 columns are str
307
+ # dtypes = set(df.iloc[:, [0, 1]].dtypes)
308
+ # dtypes_kind = [i.kind for i in dtypes]
309
+
310
+ # if set(dtypes_kind) - set(["O", "b"]):
311
+ # print("❌ first 2 columns must be str")
312
+ # # * overkill ^^
313
+ # df.iloc[:, [0, 1]] = df.iloc[:, [0, 1]].astype(str)
314
+
315
+ # * but last col must be numeric
316
+ if df.iloc[:, -1].dtype.kind not in ("f", "i"):
317
+ print("❌ last column must be numeric")
233
318
  return
234
319
 
235
320
  df = df.copy() # Copy the input DataFrame to avoid modifying the original
@@ -253,87 +338,103 @@ def plot_stacked_bars(
253
338
  # * apply precision
254
339
  df.iloc[:, 2] = df.iloc[:, 2].round(precision)
255
340
 
256
- # * set index + color col
341
+ # # * set index + color col
257
342
  col_index = df.columns[0] if not swap else df.columns[1]
258
343
  col_color = df.columns[1] if not swap else df.columns[0]
259
344
 
260
345
  # * ensure df is grouped to prevent false aggregations
261
- df = (
262
- df.groupby([df.columns[0], df.columns[1]])
263
- [df.columns[2]]
264
- .sum()
265
- .reset_index()
266
- )
346
+ df = df.groupby([df.columns[0], df.columns[1]])[df.columns[2]].sum().reset_index()
267
347
 
268
348
  # * add total as aggregation of df
269
349
  if show_total:
270
- df_total = df.groupby(df.columns[1], observed=True, as_index=False)[df.columns[2]].sum()
350
+ df_total = df.groupby(df.columns[1], observed=True, as_index=False)[
351
+ df.columns[2]
352
+ ].sum()
271
353
  df_total[df.columns[0]] = " Total"
272
354
  df = pd.concat([df, df_total], ignore_index=True)
273
355
 
274
-
275
- # * apply top_n, reduce df
276
- n_col = top_n_color if top_n_color > 0 else None
277
- n_idx = top_n_index if top_n_index > 0 else None
278
-
279
- unique_colors = sorted(
280
- df.groupby(col_color)[df.columns[2]]
281
- .sum()
282
- .sort_values(ascending=False)
283
- .index.tolist()[:n_col]
284
- )
285
-
286
- unique_idx = df[col_index].sort_values().unique()[:n_idx]
287
-
288
- df = df[df[col_color].isin(unique_colors)]#.sort_values(by=[col_index, col_color])
289
- df = df[df[col_index].isin(unique_idx)]#.sort_values(by=[col_index, col_color])
290
-
291
-
292
- # # * Sorting logic based on sort_values
293
- if sort_values:
294
- sort_order = (
295
- df.groupby(col_index)[df.columns[2]].sum().sort_values(ascending=False).index
296
- )
297
- else:
298
- sort_order = sorted(df[col_index].unique()) # Alphabetical order
299
-
300
- # # * Convert to categorical with explicit ordering
301
- df[col_index] = pd.Categorical(df[col_index], categories=sort_order, ordered=True)
302
-
303
- column_colors = assign_column_colors(
304
- columns=unique_colors,
305
- color_palette=color_palette,
306
- null_label=null_label
307
- )
308
-
309
356
  # * calculate n
310
357
  divider = 2 if show_total else 1
311
- n = int(df[df.columns[2]].sum() / divider)
358
+ n = int(df.iloc[:, 2].sum() / divider)
312
359
 
313
360
  # * title str
314
361
  _title_str_top_index = f"TOP{top_n_index} " if top_n_index > 0 else ""
315
362
  _title_str_top_color = f"TOP{top_n_color} " if top_n_color > 0 else ""
316
363
  _title_str_null = f", NULL excluded" if dropna else ""
317
364
  _title_str_n = f", n={n:_}"
365
+
366
+ _df = df.copy().assign(facet=None)
367
+ _df.columns = (
368
+ ["index", "col", "value", "facet"]
369
+ if not swap
370
+ else ["col", "index", "value", "facet"]
371
+ )
372
+
373
+ aggregated_df = aggregate_data(
374
+ df=_df,
375
+ top_n_index=top_n_index,
376
+ top_n_color=top_n_color,
377
+ top_n_facet=0,
378
+ null_label=null_label,
379
+ show_other=show_other,
380
+ sort_values_index=sort_values_index,
381
+ sort_values_color=sort_values_color,
382
+ sort_values_facet=False, # just a placeholder
383
+ )
384
+
385
+ df = aggregated_df.copy()
386
+
387
+ columns = sorted(
388
+ df.groupby("col", observed=True)["value"]
389
+ .sum()
390
+ .sort_values(ascending=False)
391
+ .index.tolist()
392
+ )
393
+ column_colors = assign_column_colors(columns, color_palette, null_label)
394
+
318
395
  caption = _set_caption(caption)
319
396
 
320
- # * after grouping add cols for pct and formatting
321
- df["pct"] = df[df.columns[2]].apply(lambda x: f"{(x / n) * 100:.{precision}f}%")
397
+ # * after grouping add cols for pct and formatting
398
+ df["cnt_pct_only"] = df["value"].apply(lambda x: f"{(x / n) * 100:.{precision}f}%")
322
399
 
323
400
  # * format output
324
- df["cnt_str"] = df[df.columns[2]].apply(lambda x: f"{x:_.{precision}f}")
401
+ df["cnt_str"] = df["value"].apply(lambda x: f"{x:_.{precision}f}")
325
402
 
326
403
  divider2 = "<br>" if orientation == "v" else " "
327
404
  df["cnt_pct_str"] = df.apply(
328
- lambda row: f"{row['cnt_str']}{divider2}({row['pct']})", axis=1
405
+ lambda row: f"{row['cnt_str']}{divider2}({row['cnt_pct_only']})", axis=1
329
406
  )
330
407
 
408
+ # # # * Sorting logic based on sort_values
409
+ # if sort_values_index:
410
+ # sort_order = (
411
+ # df.groupby("index")["value"].sum().sort_values(ascending=False).index
412
+ # )
413
+ # else:
414
+ # sort_order = sorted(df["index"].unique(), reverse=False) # Alphabetical order
415
+
416
+ # display(sort_order)
417
+
418
+ # df["index"] = pd.Categorical(
419
+ # values=df["index"],
420
+ # # categories=sort_order,
421
+ # ordered=True,
422
+ # )
423
+
424
+ df = (
425
+ df.sort_values(by=["col","index"], ascending=[True, False])
426
+ if orientation == "h"
427
+ else df.sort_values(by=["index","col"], ascending=[True, True])
428
+ )
429
+
430
+ # display(df)
431
+
331
432
  # * plot
332
433
  fig = px.bar(
333
434
  df,
334
- x=col_index if orientation == "v" else df.columns[2],
335
- y=df.columns[2] if orientation == "v" else col_index,
336
- color=col_color,
435
+ x="index" if orientation == "v" else "value",
436
+ y="value" if orientation == "v" else "index",
437
+ color="col",
337
438
  text="cnt_pct_str" if normalize else "cnt_str",
338
439
  orientation=orientation,
339
440
  title=title
@@ -342,13 +443,15 @@ def plot_stacked_bars(
342
443
  width=width,
343
444
  height=height,
344
445
  color_discrete_map=column_colors, # Use assigned colors
345
- category_orders={col_index: list(df[col_index].cat.categories)}, # <- Add this line
346
-
446
+ category_orders={
447
+ col_index: list(df["index"].cat.categories)
448
+ }, # <- Add this line
347
449
  )
348
-
349
- # * get longest bar
450
+
451
+
452
+ # * get longest bar
350
453
  bar_max = (
351
- df.groupby(col_index)[df.columns[2]].sum().sort_values(ascending=False).iloc[0]
454
+ df.groupby("index")["value"].sum().sort_values(ascending=False).iloc[0]
352
455
  * BAR_LENGTH_MULTIPLIER
353
456
  )
354
457
  # * ignore if bar mode is on
@@ -372,7 +475,7 @@ def plot_stacked_bars(
372
475
  },
373
476
  },
374
477
  )
375
-
478
+
376
479
  # * set dtick
377
480
  if orientation == "h":
378
481
  if relative:
@@ -692,7 +795,7 @@ def plot_histogram(
692
795
  caption (str): The caption for the plot. Default is None.
693
796
  title (str): The title of the plot. Default is None.
694
797
  png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
695
-
798
+
696
799
 
697
800
  Returns:
698
801
  plot object
@@ -744,7 +847,7 @@ def plot_histogram(
744
847
  )
745
848
 
746
849
  fig.show(renderer)
747
-
850
+
748
851
  # * save to png if path is provided
749
852
  if png_path is not None:
750
853
  fig.write_image(Path(png_path).as_posix())
@@ -1156,12 +1259,11 @@ def plot_boxes(
1156
1259
  return fig
1157
1260
 
1158
1261
 
1159
-
1160
1262
  def plot_facet_stacked_bars(
1161
1263
  df: pd.DataFrame,
1162
1264
  subplots_per_row: int = 4,
1163
1265
  top_n_index: int = 0,
1164
- top_n_columns: int = 0,
1266
+ top_n_color: int = 0,
1165
1267
  top_n_facet: int = 0,
1166
1268
  null_label: str = "<NA>",
1167
1269
  subplot_size: int = 300,
@@ -1171,6 +1273,12 @@ def plot_facet_stacked_bars(
1171
1273
  annotations: bool = False,
1172
1274
  precision: int = 0,
1173
1275
  png_path: Optional[Path] = None,
1276
+ show_other: bool = False,
1277
+ sort_values: bool = True,
1278
+ sort_values_index: bool = False,
1279
+ sort_values_color: bool = False,
1280
+ sort_values_facet: bool = False,
1281
+
1174
1282
  ) -> object:
1175
1283
  """
1176
1284
  Create a grid of stacked bar charts.
@@ -1179,7 +1287,7 @@ def plot_facet_stacked_bars(
1179
1287
  df (pd.DataFrame): DataFrame with 3 or 4 columns.
1180
1288
  subplots_per_row (int): Number of subplots per row.
1181
1289
  top_n_index (int): top N index values to keep.
1182
- top_n_columns (int): top N column values to keep.
1290
+ top_n_color (int): top N column values to keep.
1183
1291
  top_n_facet (int): top N facet values to keep.
1184
1292
  null_label (str): Label for null values.
1185
1293
  subplot_size (int): Size of each subplot.
@@ -1189,47 +1297,57 @@ def plot_facet_stacked_bars(
1189
1297
  annotations (bool): Whether to show annotations in the subplots.
1190
1298
  precision (int): Decimal precision for annotations.
1191
1299
  png_path (Optional[Path]): Path to save the image.
1300
+ show_other (bool): If True, adds an "<other>" bar for columns not in top_n_color.
1301
+ sort_values_index (bool): If True, sorts index by group sum.
1302
+ sort_values_color (bool): If True, sorts columns by group sum.
1303
+ sort_values_facet (bool): If True, sorts facet by group sum.
1304
+ sort_values (bool): DEPRECATED
1305
+
1192
1306
 
1193
1307
  Returns:
1194
1308
  plot object
1195
-
1309
+
1196
1310
  Remarks:
1197
1311
  If you need to include facets that have no data, fill up like this beforehand:
1198
1312
  df.loc[len(df)]=[None, None, 12]
1199
1313
  """
1200
-
1314
+
1201
1315
  df = df.copy() # Copy the input DataFrame to avoid modifying the original
1202
1316
 
1203
1317
  if not (df.shape[1] == 3 or df.shape[1] == 4):
1204
1318
  raise ValueError("Input DataFrame must have 3 or 4 columns.")
1205
-
1319
+
1206
1320
  original_column_names = df.columns.tolist()
1321
+ original_rows = len(df)
1207
1322
 
1208
1323
  if df.shape[1] == 3:
1209
- df.columns = ['index', 'col', 'facet']
1210
- df['value'] = 1
1324
+ df.columns = ["index", "col", "facet"]
1325
+ df["value"] = 1
1211
1326
  elif df.shape[1] == 4:
1212
- df.columns = ['index', 'col', 'facet', 'value']
1213
-
1214
- aggregated_df = aggregate_data(df, top_n_index, top_n_columns, top_n_facet, null_label)
1215
-
1216
- # facets = aggregated_df['facet'].unique()
1217
- facets = sorted(aggregated_df['facet'].unique()) # Ensure facets are sorted consistently
1327
+ df.columns = ["index", "col", "facet", "value"]
1218
1328
 
1219
- if top_n_columns > 0:
1220
- top_columns = aggregated_df.groupby('col', observed=True)['value'].sum().nlargest(top_n_columns).index.tolist()
1221
- # aggregated_df['col'] = aggregated_df['col'].apply(lambda x: x if x in top_columns else "<other>")
1222
- # aggregated_df['col'] = pd.Categorical(aggregated_df['col'], categories=top_columns + ["<other>"], ordered=True)
1223
- # aggregated_df['col'] = pd.Categorical(
1224
- # aggregated_df['col'].map(lambda x: x if x in top_columns else "<other>"),
1225
- # categories=top_columns + ["<other>"],
1226
- # ordered=True
1227
- # )
1228
- aggregated_df['col'] = aggregated_df['col'].apply(lambda x: x if x in top_columns else "<other>")
1329
+ aggregated_df = aggregate_data(
1330
+ df,
1331
+ top_n_index,
1332
+ top_n_color,
1333
+ top_n_facet,
1334
+ null_label,
1335
+ show_other=show_other,
1336
+ sort_values_index=sort_values_index,
1337
+ sort_values_color=sort_values_color,
1338
+ sort_values_facet=sort_values_facet,
1339
+ )
1229
1340
 
1341
+ facets = sorted(
1342
+ aggregated_df["facet"].unique()
1343
+ ) # Ensure facets are sorted consistently
1230
1344
 
1231
- # columns = sorted(aggregated_df['col'].unique())
1232
- columns = aggregated_df.groupby('col', observed=True)['value'].sum().sort_values(ascending=False).index.tolist()
1345
+ columns = sorted(
1346
+ aggregated_df.groupby("col", observed=True)["value"]
1347
+ .sum()
1348
+ .sort_values(ascending=False)
1349
+ .index.tolist()
1350
+ )
1233
1351
  column_colors = assign_column_colors(columns, color_palette, null_label)
1234
1352
 
1235
1353
  fig = make_subplots(
@@ -1238,25 +1356,39 @@ def plot_facet_stacked_bars(
1238
1356
  subplot_titles=facets,
1239
1357
  )
1240
1358
 
1359
+ # * Ensure all categories appear in the legend by adding an invisible trace
1360
+ for column in columns:
1361
+ fig.add_trace(
1362
+ go.Bar(
1363
+ x=[None], # Invisible bar
1364
+ y=[None],
1365
+ name=column,
1366
+ marker=dict(color=column_colors[column]),
1367
+ showlegend=True, # Ensure it appears in the legend
1368
+ )
1369
+ )
1370
+
1241
1371
  added_to_legend = set()
1242
1372
  for i, facet in enumerate(facets):
1243
- facet_data = aggregated_df[aggregated_df['facet'] == facet]
1373
+ facet_data = aggregated_df[aggregated_df["facet"] == facet]
1244
1374
  row = (i // subplots_per_row) + 1
1245
1375
  col = (i % subplots_per_row) + 1
1246
1376
 
1247
1377
  for column in columns:
1248
- column_data = facet_data[facet_data['col'] == column]
1378
+ column_data = facet_data[facet_data["col"] == column]
1379
+
1249
1380
  show_legend = column not in added_to_legend
1250
1381
  if show_legend:
1251
1382
  added_to_legend.add(column)
1252
1383
 
1253
1384
  fig.add_trace(
1254
1385
  go.Bar(
1255
- x=column_data['index'],
1256
- y=column_data['value'],
1386
+ x=column_data["index"],
1387
+ y=column_data["value"],
1257
1388
  name=column,
1258
1389
  marker=dict(color=column_colors[column]),
1259
- showlegend=show_legend,
1390
+ legendgroup=column, # Ensures multiple traces use the same legend entry
1391
+ showlegend=False, # suppress further legend items
1260
1392
  ),
1261
1393
  row=row,
1262
1394
  col=col,
@@ -1265,8 +1397,8 @@ def plot_facet_stacked_bars(
1265
1397
  if annotations:
1266
1398
  for _, row_data in column_data.iterrows():
1267
1399
  fig.add_annotation(
1268
- x=row_data['index'],
1269
- y=row_data['value'],
1400
+ x=row_data["index"],
1401
+ y=row_data["value"],
1270
1402
  text=f"{row_data['value']:.{precision}f}",
1271
1403
  showarrow=False,
1272
1404
  row=row,
@@ -1280,8 +1412,8 @@ def plot_facet_stacked_bars(
1280
1412
  else:
1281
1413
  axis_details.append(f"[{original_column_names[0]}]")
1282
1414
 
1283
- if top_n_columns > 0:
1284
- axis_details.append(f"TOP {top_n_columns} [{original_column_names[1]}]")
1415
+ if top_n_color > 0:
1416
+ axis_details.append(f"TOP {top_n_color} [{original_column_names[1]}]")
1285
1417
  else:
1286
1418
  axis_details.append(f"[{original_column_names[1]}]")
1287
1419
 
@@ -1290,7 +1422,7 @@ def plot_facet_stacked_bars(
1290
1422
  else:
1291
1423
  axis_details.append(f"[{original_column_names[2]}]")
1292
1424
 
1293
- title = f"{caption} {', '.join(axis_details)}, n = {unique_rows:_}"
1425
+ title = f"{caption} {', '.join(axis_details)}, n = {original_rows:_}"
1294
1426
  template = "plotly_dark" if os.getenv("THEME") == "dark" else "plotly"
1295
1427
  fig.update_layout(
1296
1428
  title=title,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: pandas-plots
3
- Version: 0.12.7
3
+ Version: 0.12.9
4
4
  Summary: A collection of helper for table handling and visualization
5
5
  Home-page: https://github.com/smeisegeier/pandas-plots
6
6
  Author: smeisegeier
File without changes
File without changes