pandas-plots 0.12.8__tar.gz → 0.12.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: pandas-plots
3
- Version: 0.12.8
3
+ Version: 0.12.10
4
4
  Summary: A collection of helper for table handling and visualization
5
5
  Home-page: https://github.com/smeisegeier/pandas-plots
6
6
  Author: smeisegeier
@@ -96,7 +96,7 @@ tbl.show_num_df(
96
96
  - `plot_histogram()` histogram for one or more **numerical** columns
97
97
  - `plot_joints()` a joint plot for **exactly two numerical** columns
98
98
  - `plot_quadrants()` quickly shows a 2x2 heatmap
99
- - 🆕 `plot_facet_stacked_bars()` shows stacked bars for a facet value as subplots
99
+ - `plot_facet_stacked_bars()` shows stacked bars for a facet value as subplots
100
100
  <br>
101
101
 
102
102
  - `ven` offers functions for _venn diagrams_
@@ -113,7 +113,7 @@ tbl.show_num_df(
113
113
  - `add_datetime_col()` adds a datetime columns to a dataframe (chainable)
114
114
  - `show_package_version` prints version of a list of packages
115
115
  - `get_os` helps to identify and ensure operating system at runtime
116
- - `🆕 add_bitmask_label()` adds a column to the data that resolves a bitmask column into human-readable labels
116
+ - `add_bitmask_label()` adds a column to the data that resolves a bitmask column into human-readable labels
117
117
  <br>
118
118
 
119
119
  - `pii` has routines for handling of personally identifiable information
@@ -61,7 +61,7 @@ tbl.show_num_df(
61
61
  - `plot_histogram()` histogram for one or more **numerical** columns
62
62
  - `plot_joints()` a joint plot for **exactly two numerical** columns
63
63
  - `plot_quadrants()` quickly shows a 2x2 heatmap
64
- - 🆕 `plot_facet_stacked_bars()` shows stacked bars for a facet value as subplots
64
+ - `plot_facet_stacked_bars()` shows stacked bars for a facet value as subplots
65
65
  <br>
66
66
 
67
67
  - `ven` offers functions for _venn diagrams_
@@ -78,7 +78,7 @@ tbl.show_num_df(
78
78
  - `add_datetime_col()` adds a datetime columns to a dataframe (chainable)
79
79
  - `show_package_version` prints version of a list of packages
80
80
  - `get_os` helps to identify and ensure operating system at runtime
81
- - `🆕 add_bitmask_label()` adds a column to the data that resolves a bitmask column into human-readable labels
81
+ - `add_bitmask_label()` adds a column to the data that resolves a bitmask column into human-readable labels
82
82
  <br>
83
83
 
84
84
  - `pii` has routines for handling of personally identifiable information
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = pandas-plots
3
- version = 0.12.8
3
+ version = 0.12.10
4
4
  author = smeisegeier
5
5
  author_email = dexterDSDo@googlemail.com
6
6
  description = A collection of helper for table handling and visualization
@@ -1,4 +1,7 @@
1
1
  from pathlib import Path
2
+ import warnings
3
+
4
+ warnings.filterwarnings("ignore")
2
5
 
3
6
  import os
4
7
  from typing import Optional, Literal
@@ -45,7 +48,7 @@ def aggregate_data(
45
48
  sort_values (bool): Whether to sort values in descending order based on group sum. Defaults to False.
46
49
 
47
50
  Returns:
48
- pd.DataFrame: Aggregated and filtered dataset.
51
+ pd.DataFrame: Aggregated and filtered dataset (but not sorted!)
49
52
  """
50
53
 
51
54
  for col in ["index", "col", "facet"]: # Skip 'value' column (numeric)
@@ -62,6 +65,7 @@ def aggregate_data(
62
65
  .sort_values(ascending=False)[:top_n_index or None]
63
66
  .index
64
67
  )
68
+
65
69
  else:
66
70
  top_indexes = aggregated_df["index"].sort_values().unique()[:top_n_index or None]
67
71
 
@@ -100,28 +104,6 @@ def aggregate_data(
100
104
 
101
105
  aggregated_df = aggregated_df[aggregated_df["facet"].isin(top_facets)]
102
106
 
103
- # * Ensure facets are sorted alphabetically
104
- aggregated_df["facet"] = pd.Categorical(
105
- values=aggregated_df["facet"],
106
- categories=top_facets,
107
- ordered=True,
108
- )
109
-
110
- aggregated_df["index"] = pd.Categorical(
111
- values=aggregated_df["index"],
112
- categories=top_indexes,
113
- ordered=True,
114
- )
115
-
116
- aggregated_df["col"] = pd.Categorical(
117
- values=aggregated_df["col"],
118
- categories=top_colors,
119
- ordered=True,
120
- )
121
-
122
-
123
- # aggregated_df = aggregated_df.sort_values(by="facet")
124
-
125
107
  return aggregated_df
126
108
 
127
109
 
@@ -381,14 +363,6 @@ def plot_stacked_bars(
381
363
 
382
364
  df = aggregated_df.copy()
383
365
 
384
- columns = sorted(
385
- df.groupby("col", observed=True)["value"]
386
- .sum()
387
- .sort_values(ascending=False)
388
- .index.tolist()
389
- )
390
- column_colors = assign_column_colors(columns, color_palette, null_label)
391
-
392
366
  caption = _set_caption(caption)
393
367
 
394
368
  # * after grouping add cols for pct and formatting
@@ -402,34 +376,44 @@ def plot_stacked_bars(
402
376
  lambda row: f"{row['cnt_str']}{divider2}({row['cnt_pct_only']})", axis=1
403
377
  )
404
378
 
405
- # # # * Sorting logic based on sort_values
406
- # if sort_values_index:
407
- # sort_order = (
408
- # df.groupby("index")["value"].sum().sort_values(ascending=False).index
409
- # )
410
- # else:
411
- # sort_order = sorted(df["index"].unique(), reverse=False) # Alphabetical order
379
+ if sort_values_color:
380
+ colors_unique = (df
381
+ .groupby("col", observed=True)["value"]
382
+ .sum()
383
+ .sort_values(ascending=False)
384
+ .index.tolist()
385
+ )
386
+ else:
387
+ colors_unique = sorted(df["col"].unique().tolist())
412
388
 
413
- # display(sort_order)
389
+ if sort_values_index:
390
+ index_unique = (df
391
+ .groupby("index", observed=True)["value"]
392
+ .sum()
393
+ .sort_values(ascending=False)
394
+ .index.tolist()
395
+ )
396
+ else:
397
+ index_unique = sorted(df["index"].unique().tolist())
414
398
 
415
- # df["index"] = pd.Categorical(
416
- # values=df["index"],
417
- # # categories=sort_order,
418
- # ordered=True,
419
- # )
420
- df = (
421
- df.sort_values(by="index", ascending=False)
422
- if orientation == "h"
423
- else df.sort_values(by="index", ascending=True)
424
- )
399
+ color_map = assign_column_colors(colors_unique, color_palette, null_label)
400
+
401
+
402
+ cat_orders = {
403
+ "index": index_unique,
404
+ "col": colors_unique,
405
+ }
406
+
407
+ # Ensure bl is categorical with the correct order
408
+ df["index"] = pd.Categorical(df["index"], categories=cat_orders["index"], ordered=True)
425
409
 
426
- # display(df)
427
410
 
428
411
  # * plot
429
412
  fig = px.bar(
430
413
  df,
431
414
  x="index" if orientation == "v" else "value",
432
415
  y="value" if orientation == "v" else "index",
416
+ # color=columns,
433
417
  color="col",
434
418
  text="cnt_pct_str" if normalize else "cnt_str",
435
419
  orientation=orientation,
@@ -438,13 +422,15 @@ def plot_stacked_bars(
438
422
  template="plotly_dark" if os.getenv("THEME") == "dark" else "plotly",
439
423
  width=width,
440
424
  height=height,
441
- color_discrete_map=column_colors, # Use assigned colors
442
- category_orders={
443
- col_index: list(df["index"].cat.categories)
444
- }, # <- Add this line
425
+ color_discrete_map=color_map, # Use assigned colors
426
+ category_orders= cat_orders,
445
427
  )
446
428
 
447
429
 
430
+ # print(cat_orders)
431
+ # print(color_map)
432
+ # display(df)
433
+
448
434
  # * get longest bar
449
435
  bar_max = (
450
436
  df.groupby("index")["value"].sum().sort_values(ascending=False).iloc[0]
@@ -471,6 +457,9 @@ def plot_stacked_bars(
471
457
  },
472
458
  },
473
459
  )
460
+ fig.update_layout(legend_traceorder="normal")
461
+ fig.update_layout(legend_title_text=col_color)
462
+
474
463
 
475
464
  # * set dtick
476
465
  if orientation == "h":
@@ -1274,6 +1263,7 @@ def plot_facet_stacked_bars(
1274
1263
  sort_values_index: bool = False,
1275
1264
  sort_values_color: bool = False,
1276
1265
  sort_values_facet: bool = False,
1266
+ relative: bool = False,
1277
1267
 
1278
1268
  ) -> object:
1279
1269
  """
@@ -1297,6 +1287,7 @@ def plot_facet_stacked_bars(
1297
1287
  sort_values_index (bool): If True, sorts index by group sum.
1298
1288
  sort_values_color (bool): If True, sorts columns by group sum.
1299
1289
  sort_values_facet (bool): If True, sorts facet by group sum.
1290
+ relative (bool): If True, show bars as relative proportions to 100%.
1300
1291
  sort_values (bool): DEPRECATED
1301
1292
 
1302
1293
 
@@ -1352,6 +1343,11 @@ def plot_facet_stacked_bars(
1352
1343
  subplot_titles=facets,
1353
1344
  )
1354
1345
 
1346
+ # * relative?
1347
+ if relative:
1348
+ aggregated_df["value"] = aggregated_df.groupby(["facet", "index"])["value"].transform(lambda x: x / x.sum())
1349
+ fig.update_layout(yaxis_tickformat=".0%") # Show as percentage
1350
+
1355
1351
  # * Ensure all categories appear in the legend by adding an invisible trace
1356
1352
  for column in columns:
1357
1353
  fig.add_trace(
@@ -1420,6 +1416,7 @@ def plot_facet_stacked_bars(
1420
1416
 
1421
1417
  title = f"{caption} {', '.join(axis_details)}, n = {original_rows:_}"
1422
1418
  template = "plotly_dark" if os.getenv("THEME") == "dark" else "plotly"
1419
+
1423
1420
  fig.update_layout(
1424
1421
  title=title,
1425
1422
  barmode="stack",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: pandas-plots
3
- Version: 0.12.8
3
+ Version: 0.12.10
4
4
  Summary: A collection of helper for table handling and visualization
5
5
  Home-page: https://github.com/smeisegeier/pandas-plots
6
6
  Author: smeisegeier
@@ -96,7 +96,7 @@ tbl.show_num_df(
96
96
  - `plot_histogram()` histogram for one or more **numerical** columns
97
97
  - `plot_joints()` a joint plot for **exactly two numerical** columns
98
98
  - `plot_quadrants()` quickly shows a 2x2 heatmap
99
- - 🆕 `plot_facet_stacked_bars()` shows stacked bars for a facet value as subplots
99
+ - `plot_facet_stacked_bars()` shows stacked bars for a facet value as subplots
100
100
  <br>
101
101
 
102
102
  - `ven` offers functions for _venn diagrams_
@@ -113,7 +113,7 @@ tbl.show_num_df(
113
113
  - `add_datetime_col()` adds a datetime columns to a dataframe (chainable)
114
114
  - `show_package_version` prints version of a list of packages
115
115
  - `get_os` helps to identify and ensure operating system at runtime
116
- - `🆕 add_bitmask_label()` adds a column to the data that resolves a bitmask column into human-readable labels
116
+ - `add_bitmask_label()` adds a column to the data that resolves a bitmask column into human-readable labels
117
117
  <br>
118
118
 
119
119
  - `pii` has routines for handling of personally identifiable information
File without changes