pandas-plots 0.12.8__tar.gz → 0.12.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pandas_plots-0.12.8/src/pandas_plots.egg-info → pandas_plots-0.12.10}/PKG-INFO +3 -3
- {pandas_plots-0.12.8 → pandas_plots-0.12.10}/README.md +2 -2
- {pandas_plots-0.12.8 → pandas_plots-0.12.10}/setup.cfg +1 -1
- {pandas_plots-0.12.8 → pandas_plots-0.12.10}/src/pandas_plots/pls.py +51 -54
- {pandas_plots-0.12.8 → pandas_plots-0.12.10/src/pandas_plots.egg-info}/PKG-INFO +3 -3
- {pandas_plots-0.12.8 → pandas_plots-0.12.10}/LICENSE +0 -0
- {pandas_plots-0.12.8 → pandas_plots-0.12.10}/pyproject.toml +0 -0
- {pandas_plots-0.12.8 → pandas_plots-0.12.10}/src/pandas_plots/hlp.py +0 -0
- {pandas_plots-0.12.8 → pandas_plots-0.12.10}/src/pandas_plots/pii.py +0 -0
- {pandas_plots-0.12.8 → pandas_plots-0.12.10}/src/pandas_plots/tbl.py +0 -0
- {pandas_plots-0.12.8 → pandas_plots-0.12.10}/src/pandas_plots/ven.py +0 -0
- {pandas_plots-0.12.8 → pandas_plots-0.12.10}/src/pandas_plots.egg-info/SOURCES.txt +0 -0
- {pandas_plots-0.12.8 → pandas_plots-0.12.10}/src/pandas_plots.egg-info/dependency_links.txt +0 -0
- {pandas_plots-0.12.8 → pandas_plots-0.12.10}/src/pandas_plots.egg-info/pii.py +0 -0
- {pandas_plots-0.12.8 → pandas_plots-0.12.10}/src/pandas_plots.egg-info/requires.txt +0 -0
- {pandas_plots-0.12.8 → pandas_plots-0.12.10}/src/pandas_plots.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: pandas-plots
|
3
|
-
Version: 0.12.
|
3
|
+
Version: 0.12.10
|
4
4
|
Summary: A collection of helper for table handling and visualization
|
5
5
|
Home-page: https://github.com/smeisegeier/pandas-plots
|
6
6
|
Author: smeisegeier
|
@@ -96,7 +96,7 @@ tbl.show_num_df(
|
|
96
96
|
- `plot_histogram()` histogram for one or more **numerical** columns
|
97
97
|
- `plot_joints()` a joint plot for **exactly two numerical** columns
|
98
98
|
- `plot_quadrants()` quickly shows a 2x2 heatmap
|
99
|
-
-
|
99
|
+
- `plot_facet_stacked_bars()` shows stacked bars for a facet value as subplots
|
100
100
|
<br>
|
101
101
|
|
102
102
|
- `ven` offers functions for _venn diagrams_
|
@@ -113,7 +113,7 @@ tbl.show_num_df(
|
|
113
113
|
- `add_datetime_col()` adds a datetime columns to a dataframe (chainable)
|
114
114
|
- `show_package_version` prints version of a list of packages
|
115
115
|
- `get_os` helps to identify and ensure operating system at runtime
|
116
|
-
-
|
116
|
+
- `add_bitmask_label()` adds a column to the data that resolves a bitmask column into human-readable labels
|
117
117
|
<br>
|
118
118
|
|
119
119
|
- `pii` has routines for handling of personally identifiable information
|
@@ -61,7 +61,7 @@ tbl.show_num_df(
|
|
61
61
|
- `plot_histogram()` histogram for one or more **numerical** columns
|
62
62
|
- `plot_joints()` a joint plot for **exactly two numerical** columns
|
63
63
|
- `plot_quadrants()` quickly shows a 2x2 heatmap
|
64
|
-
-
|
64
|
+
- `plot_facet_stacked_bars()` shows stacked bars for a facet value as subplots
|
65
65
|
<br>
|
66
66
|
|
67
67
|
- `ven` offers functions for _venn diagrams_
|
@@ -78,7 +78,7 @@ tbl.show_num_df(
|
|
78
78
|
- `add_datetime_col()` adds a datetime columns to a dataframe (chainable)
|
79
79
|
- `show_package_version` prints version of a list of packages
|
80
80
|
- `get_os` helps to identify and ensure operating system at runtime
|
81
|
-
-
|
81
|
+
- `add_bitmask_label()` adds a column to the data that resolves a bitmask column into human-readable labels
|
82
82
|
<br>
|
83
83
|
|
84
84
|
- `pii` has routines for handling of personally identifiable information
|
@@ -1,4 +1,7 @@
|
|
1
1
|
from pathlib import Path
|
2
|
+
import warnings
|
3
|
+
|
4
|
+
warnings.filterwarnings("ignore")
|
2
5
|
|
3
6
|
import os
|
4
7
|
from typing import Optional, Literal
|
@@ -45,7 +48,7 @@ def aggregate_data(
|
|
45
48
|
sort_values (bool): Whether to sort values in descending order based on group sum. Defaults to False.
|
46
49
|
|
47
50
|
Returns:
|
48
|
-
pd.DataFrame: Aggregated and filtered dataset
|
51
|
+
pd.DataFrame: Aggregated and filtered dataset (but not sorted!)
|
49
52
|
"""
|
50
53
|
|
51
54
|
for col in ["index", "col", "facet"]: # Skip 'value' column (numeric)
|
@@ -62,6 +65,7 @@ def aggregate_data(
|
|
62
65
|
.sort_values(ascending=False)[:top_n_index or None]
|
63
66
|
.index
|
64
67
|
)
|
68
|
+
|
65
69
|
else:
|
66
70
|
top_indexes = aggregated_df["index"].sort_values().unique()[:top_n_index or None]
|
67
71
|
|
@@ -100,28 +104,6 @@ def aggregate_data(
|
|
100
104
|
|
101
105
|
aggregated_df = aggregated_df[aggregated_df["facet"].isin(top_facets)]
|
102
106
|
|
103
|
-
# * Ensure facets are sorted alphabetically
|
104
|
-
aggregated_df["facet"] = pd.Categorical(
|
105
|
-
values=aggregated_df["facet"],
|
106
|
-
categories=top_facets,
|
107
|
-
ordered=True,
|
108
|
-
)
|
109
|
-
|
110
|
-
aggregated_df["index"] = pd.Categorical(
|
111
|
-
values=aggregated_df["index"],
|
112
|
-
categories=top_indexes,
|
113
|
-
ordered=True,
|
114
|
-
)
|
115
|
-
|
116
|
-
aggregated_df["col"] = pd.Categorical(
|
117
|
-
values=aggregated_df["col"],
|
118
|
-
categories=top_colors,
|
119
|
-
ordered=True,
|
120
|
-
)
|
121
|
-
|
122
|
-
|
123
|
-
# aggregated_df = aggregated_df.sort_values(by="facet")
|
124
|
-
|
125
107
|
return aggregated_df
|
126
108
|
|
127
109
|
|
@@ -381,14 +363,6 @@ def plot_stacked_bars(
|
|
381
363
|
|
382
364
|
df = aggregated_df.copy()
|
383
365
|
|
384
|
-
columns = sorted(
|
385
|
-
df.groupby("col", observed=True)["value"]
|
386
|
-
.sum()
|
387
|
-
.sort_values(ascending=False)
|
388
|
-
.index.tolist()
|
389
|
-
)
|
390
|
-
column_colors = assign_column_colors(columns, color_palette, null_label)
|
391
|
-
|
392
366
|
caption = _set_caption(caption)
|
393
367
|
|
394
368
|
# * after grouping add cols for pct and formatting
|
@@ -402,34 +376,44 @@ def plot_stacked_bars(
|
|
402
376
|
lambda row: f"{row['cnt_str']}{divider2}({row['cnt_pct_only']})", axis=1
|
403
377
|
)
|
404
378
|
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
379
|
+
if sort_values_color:
|
380
|
+
colors_unique = (df
|
381
|
+
.groupby("col", observed=True)["value"]
|
382
|
+
.sum()
|
383
|
+
.sort_values(ascending=False)
|
384
|
+
.index.tolist()
|
385
|
+
)
|
386
|
+
else:
|
387
|
+
colors_unique = sorted(df["col"].unique().tolist())
|
412
388
|
|
413
|
-
|
389
|
+
if sort_values_index:
|
390
|
+
index_unique = (df
|
391
|
+
.groupby("index", observed=True)["value"]
|
392
|
+
.sum()
|
393
|
+
.sort_values(ascending=False)
|
394
|
+
.index.tolist()
|
395
|
+
)
|
396
|
+
else:
|
397
|
+
index_unique = sorted(df["index"].unique().tolist())
|
414
398
|
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
)
|
399
|
+
color_map = assign_column_colors(colors_unique, color_palette, null_label)
|
400
|
+
|
401
|
+
|
402
|
+
cat_orders = {
|
403
|
+
"index": index_unique,
|
404
|
+
"col": colors_unique,
|
405
|
+
}
|
406
|
+
|
407
|
+
# Ensure bl is categorical with the correct order
|
408
|
+
df["index"] = pd.Categorical(df["index"], categories=cat_orders["index"], ordered=True)
|
425
409
|
|
426
|
-
# display(df)
|
427
410
|
|
428
411
|
# * plot
|
429
412
|
fig = px.bar(
|
430
413
|
df,
|
431
414
|
x="index" if orientation == "v" else "value",
|
432
415
|
y="value" if orientation == "v" else "index",
|
416
|
+
# color=columns,
|
433
417
|
color="col",
|
434
418
|
text="cnt_pct_str" if normalize else "cnt_str",
|
435
419
|
orientation=orientation,
|
@@ -438,13 +422,15 @@ def plot_stacked_bars(
|
|
438
422
|
template="plotly_dark" if os.getenv("THEME") == "dark" else "plotly",
|
439
423
|
width=width,
|
440
424
|
height=height,
|
441
|
-
color_discrete_map=
|
442
|
-
category_orders=
|
443
|
-
col_index: list(df["index"].cat.categories)
|
444
|
-
}, # <- Add this line
|
425
|
+
color_discrete_map=color_map, # Use assigned colors
|
426
|
+
category_orders= cat_orders,
|
445
427
|
)
|
446
428
|
|
447
429
|
|
430
|
+
# print(cat_orders)
|
431
|
+
# print(color_map)
|
432
|
+
# display(df)
|
433
|
+
|
448
434
|
# * get longest bar
|
449
435
|
bar_max = (
|
450
436
|
df.groupby("index")["value"].sum().sort_values(ascending=False).iloc[0]
|
@@ -471,6 +457,9 @@ def plot_stacked_bars(
|
|
471
457
|
},
|
472
458
|
},
|
473
459
|
)
|
460
|
+
fig.update_layout(legend_traceorder="normal")
|
461
|
+
fig.update_layout(legend_title_text=col_color)
|
462
|
+
|
474
463
|
|
475
464
|
# * set dtick
|
476
465
|
if orientation == "h":
|
@@ -1274,6 +1263,7 @@ def plot_facet_stacked_bars(
|
|
1274
1263
|
sort_values_index: bool = False,
|
1275
1264
|
sort_values_color: bool = False,
|
1276
1265
|
sort_values_facet: bool = False,
|
1266
|
+
relative: bool = False,
|
1277
1267
|
|
1278
1268
|
) -> object:
|
1279
1269
|
"""
|
@@ -1297,6 +1287,7 @@ def plot_facet_stacked_bars(
|
|
1297
1287
|
sort_values_index (bool): If True, sorts index by group sum.
|
1298
1288
|
sort_values_color (bool): If True, sorts columns by group sum.
|
1299
1289
|
sort_values_facet (bool): If True, sorts facet by group sum.
|
1290
|
+
relative (bool): If True, show bars as relative proportions to 100%.
|
1300
1291
|
sort_values (bool): DEPRECATED
|
1301
1292
|
|
1302
1293
|
|
@@ -1352,6 +1343,11 @@ def plot_facet_stacked_bars(
|
|
1352
1343
|
subplot_titles=facets,
|
1353
1344
|
)
|
1354
1345
|
|
1346
|
+
# * relative?
|
1347
|
+
if relative:
|
1348
|
+
aggregated_df["value"] = aggregated_df.groupby(["facet", "index"])["value"].transform(lambda x: x / x.sum())
|
1349
|
+
fig.update_layout(yaxis_tickformat=".0%") # Show as percentage
|
1350
|
+
|
1355
1351
|
# * Ensure all categories appear in the legend by adding an invisible trace
|
1356
1352
|
for column in columns:
|
1357
1353
|
fig.add_trace(
|
@@ -1420,6 +1416,7 @@ def plot_facet_stacked_bars(
|
|
1420
1416
|
|
1421
1417
|
title = f"{caption} {', '.join(axis_details)}, n = {original_rows:_}"
|
1422
1418
|
template = "plotly_dark" if os.getenv("THEME") == "dark" else "plotly"
|
1419
|
+
|
1423
1420
|
fig.update_layout(
|
1424
1421
|
title=title,
|
1425
1422
|
barmode="stack",
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: pandas-plots
|
3
|
-
Version: 0.12.
|
3
|
+
Version: 0.12.10
|
4
4
|
Summary: A collection of helper for table handling and visualization
|
5
5
|
Home-page: https://github.com/smeisegeier/pandas-plots
|
6
6
|
Author: smeisegeier
|
@@ -96,7 +96,7 @@ tbl.show_num_df(
|
|
96
96
|
- `plot_histogram()` histogram for one or more **numerical** columns
|
97
97
|
- `plot_joints()` a joint plot for **exactly two numerical** columns
|
98
98
|
- `plot_quadrants()` quickly shows a 2x2 heatmap
|
99
|
-
-
|
99
|
+
- `plot_facet_stacked_bars()` shows stacked bars for a facet value as subplots
|
100
100
|
<br>
|
101
101
|
|
102
102
|
- `ven` offers functions for _venn diagrams_
|
@@ -113,7 +113,7 @@ tbl.show_num_df(
|
|
113
113
|
- `add_datetime_col()` adds a datetime columns to a dataframe (chainable)
|
114
114
|
- `show_package_version` prints version of a list of packages
|
115
115
|
- `get_os` helps to identify and ensure operating system at runtime
|
116
|
-
-
|
116
|
+
- `add_bitmask_label()` adds a column to the data that resolves a bitmask column into human-readable labels
|
117
117
|
<br>
|
118
118
|
|
119
119
|
- `pii` has routines for handling of personally identifiable information
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|