pandas-plots 0.12.9__tar.gz → 0.12.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pandas_plots-0.12.9/src/pandas_plots.egg-info → pandas_plots-0.12.11}/PKG-INFO +4 -3
- {pandas_plots-0.12.9 → pandas_plots-0.12.11}/README.md +3 -2
- {pandas_plots-0.12.9 → pandas_plots-0.12.11}/setup.cfg +1 -1
- {pandas_plots-0.12.9 → pandas_plots-0.12.11}/src/pandas_plots/hlp.py +24 -0
- {pandas_plots-0.12.9 → pandas_plots-0.12.11}/src/pandas_plots/pls.py +47 -54
- {pandas_plots-0.12.9 → pandas_plots-0.12.11/src/pandas_plots.egg-info}/PKG-INFO +4 -3
- {pandas_plots-0.12.9 → pandas_plots-0.12.11}/LICENSE +0 -0
- {pandas_plots-0.12.9 → pandas_plots-0.12.11}/pyproject.toml +0 -0
- {pandas_plots-0.12.9 → pandas_plots-0.12.11}/src/pandas_plots/pii.py +0 -0
- {pandas_plots-0.12.9 → pandas_plots-0.12.11}/src/pandas_plots/tbl.py +0 -0
- {pandas_plots-0.12.9 → pandas_plots-0.12.11}/src/pandas_plots/ven.py +0 -0
- {pandas_plots-0.12.9 → pandas_plots-0.12.11}/src/pandas_plots.egg-info/SOURCES.txt +0 -0
- {pandas_plots-0.12.9 → pandas_plots-0.12.11}/src/pandas_plots.egg-info/dependency_links.txt +0 -0
- {pandas_plots-0.12.9 → pandas_plots-0.12.11}/src/pandas_plots.egg-info/pii.py +0 -0
- {pandas_plots-0.12.9 → pandas_plots-0.12.11}/src/pandas_plots.egg-info/requires.txt +0 -0
- {pandas_plots-0.12.9 → pandas_plots-0.12.11}/src/pandas_plots.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: pandas-plots
|
3
|
-
Version: 0.12.
|
3
|
+
Version: 0.12.11
|
4
4
|
Summary: A collection of helper for table handling and visualization
|
5
5
|
Home-page: https://github.com/smeisegeier/pandas-plots
|
6
6
|
Author: smeisegeier
|
@@ -96,7 +96,7 @@ tbl.show_num_df(
|
|
96
96
|
- `plot_histogram()` histogram for one or more **numerical** columns
|
97
97
|
- `plot_joints()` a joint plot for **exactly two numerical** columns
|
98
98
|
- `plot_quadrants()` quickly shows a 2x2 heatmap
|
99
|
-
-
|
99
|
+
- `plot_facet_stacked_bars()` shows stacked bars for a facet value as subplots
|
100
100
|
<br>
|
101
101
|
|
102
102
|
- `ven` offers functions for _venn diagrams_
|
@@ -113,7 +113,8 @@ tbl.show_num_df(
|
|
113
113
|
- `add_datetime_col()` adds a datetime columns to a dataframe (chainable)
|
114
114
|
- `show_package_version` prints version of a list of packages
|
115
115
|
- `get_os` helps to identify and ensure operating system at runtime
|
116
|
-
-
|
116
|
+
- `add_bitmask_label()` adds a column to the data that resolves a bitmask column into human-readable labels
|
117
|
+
- `find_cols()` finds all columns in a list of columns that contain any of the given stubs
|
117
118
|
<br>
|
118
119
|
|
119
120
|
- `pii` has routines for handling of personally identifiable information
|
@@ -61,7 +61,7 @@ tbl.show_num_df(
|
|
61
61
|
- `plot_histogram()` histogram for one or more **numerical** columns
|
62
62
|
- `plot_joints()` a joint plot for **exactly two numerical** columns
|
63
63
|
- `plot_quadrants()` quickly shows a 2x2 heatmap
|
64
|
-
-
|
64
|
+
- `plot_facet_stacked_bars()` shows stacked bars for a facet value as subplots
|
65
65
|
<br>
|
66
66
|
|
67
67
|
- `ven` offers functions for _venn diagrams_
|
@@ -78,7 +78,8 @@ tbl.show_num_df(
|
|
78
78
|
- `add_datetime_col()` adds a datetime columns to a dataframe (chainable)
|
79
79
|
- `show_package_version` prints version of a list of packages
|
80
80
|
- `get_os` helps to identify and ensure operating system at runtime
|
81
|
-
-
|
81
|
+
- `add_bitmask_label()` adds a column to the data that resolves a bitmask column into human-readable labels
|
82
|
+
- `find_cols()` finds all columns in a list of columns that contain any of the given stubs
|
82
83
|
<br>
|
83
84
|
|
84
85
|
- `pii` has routines for handling of personally identifiable information
|
@@ -467,3 +467,27 @@ def add_bitmask_label(
|
|
467
467
|
# * extend objects to enable chaining
|
468
468
|
pd.DataFrame.add_bitmask_label = add_bitmask_label
|
469
469
|
ddb.DuckDBPyRelation.add_bitmask_label = add_bitmask_label
|
470
|
+
|
471
|
+
|
472
|
+
def find_cols(all_cols: list[str], stubs=list[str]):
|
473
|
+
"""
|
474
|
+
Find all columns in a list of columns that contain any of the given stubs.
|
475
|
+
|
476
|
+
Parameters
|
477
|
+
----------
|
478
|
+
all_cols : list[str]
|
479
|
+
List of columns to search in.
|
480
|
+
stubs : list[str]
|
481
|
+
List of strings to search for in column names.
|
482
|
+
|
483
|
+
Returns
|
484
|
+
-------
|
485
|
+
list[str]
|
486
|
+
List of columns that contain any of the given stubs.
|
487
|
+
"""
|
488
|
+
if all_cols is None or not stubs:
|
489
|
+
return "❌ empty lists"
|
490
|
+
return [col for col in all_cols if any(match in col for match in stubs)]
|
491
|
+
|
492
|
+
# * extend objects to enable chaining
|
493
|
+
pd.DataFrame.find_cols = find_cols
|
@@ -48,7 +48,7 @@ def aggregate_data(
|
|
48
48
|
sort_values (bool): Whether to sort values in descending order based on group sum. Defaults to False.
|
49
49
|
|
50
50
|
Returns:
|
51
|
-
pd.DataFrame: Aggregated and filtered dataset
|
51
|
+
pd.DataFrame: Aggregated and filtered dataset (but not sorted!)
|
52
52
|
"""
|
53
53
|
|
54
54
|
for col in ["index", "col", "facet"]: # Skip 'value' column (numeric)
|
@@ -65,6 +65,7 @@ def aggregate_data(
|
|
65
65
|
.sort_values(ascending=False)[:top_n_index or None]
|
66
66
|
.index
|
67
67
|
)
|
68
|
+
|
68
69
|
else:
|
69
70
|
top_indexes = aggregated_df["index"].sort_values().unique()[:top_n_index or None]
|
70
71
|
|
@@ -103,28 +104,6 @@ def aggregate_data(
|
|
103
104
|
|
104
105
|
aggregated_df = aggregated_df[aggregated_df["facet"].isin(top_facets)]
|
105
106
|
|
106
|
-
# * Ensure facets are sorted alphabetically
|
107
|
-
aggregated_df["facet"] = pd.Categorical(
|
108
|
-
values=aggregated_df["facet"],
|
109
|
-
categories=top_facets,
|
110
|
-
ordered=True,
|
111
|
-
)
|
112
|
-
|
113
|
-
aggregated_df["index"] = pd.Categorical(
|
114
|
-
values=aggregated_df["index"],
|
115
|
-
categories=top_indexes,
|
116
|
-
ordered=True,
|
117
|
-
)
|
118
|
-
|
119
|
-
aggregated_df["col"] = pd.Categorical(
|
120
|
-
values=aggregated_df["col"],
|
121
|
-
categories=top_colors,
|
122
|
-
ordered=True,
|
123
|
-
)
|
124
|
-
|
125
|
-
|
126
|
-
# aggregated_df = aggregated_df.sort_values(by="facet")
|
127
|
-
|
128
107
|
return aggregated_df
|
129
108
|
|
130
109
|
|
@@ -384,14 +363,6 @@ def plot_stacked_bars(
|
|
384
363
|
|
385
364
|
df = aggregated_df.copy()
|
386
365
|
|
387
|
-
columns = sorted(
|
388
|
-
df.groupby("col", observed=True)["value"]
|
389
|
-
.sum()
|
390
|
-
.sort_values(ascending=False)
|
391
|
-
.index.tolist()
|
392
|
-
)
|
393
|
-
column_colors = assign_column_colors(columns, color_palette, null_label)
|
394
|
-
|
395
366
|
caption = _set_caption(caption)
|
396
367
|
|
397
368
|
# * after grouping add cols for pct and formatting
|
@@ -405,35 +376,44 @@ def plot_stacked_bars(
|
|
405
376
|
lambda row: f"{row['cnt_str']}{divider2}({row['cnt_pct_only']})", axis=1
|
406
377
|
)
|
407
378
|
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
379
|
+
if sort_values_color:
|
380
|
+
colors_unique = (df
|
381
|
+
.groupby("col", observed=True)["value"]
|
382
|
+
.sum()
|
383
|
+
.sort_values(ascending=False)
|
384
|
+
.index.tolist()
|
385
|
+
)
|
386
|
+
else:
|
387
|
+
colors_unique = sorted(df["col"].unique().tolist())
|
415
388
|
|
416
|
-
|
389
|
+
if sort_values_index:
|
390
|
+
index_unique = (df
|
391
|
+
.groupby("index", observed=True)["value"]
|
392
|
+
.sum()
|
393
|
+
.sort_values(ascending=False)
|
394
|
+
.index.tolist()
|
395
|
+
)
|
396
|
+
else:
|
397
|
+
index_unique = sorted(df["index"].unique().tolist())
|
417
398
|
|
418
|
-
|
419
|
-
|
420
|
-
# # categories=sort_order,
|
421
|
-
# ordered=True,
|
422
|
-
# )
|
399
|
+
color_map = assign_column_colors(colors_unique, color_palette, null_label)
|
400
|
+
|
423
401
|
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
402
|
+
cat_orders = {
|
403
|
+
"index": index_unique,
|
404
|
+
"col": colors_unique,
|
405
|
+
}
|
406
|
+
|
407
|
+
# Ensure bl is categorical with the correct order
|
408
|
+
df["index"] = pd.Categorical(df["index"], categories=cat_orders["index"], ordered=True)
|
429
409
|
|
430
|
-
# display(df)
|
431
410
|
|
432
411
|
# * plot
|
433
412
|
fig = px.bar(
|
434
413
|
df,
|
435
414
|
x="index" if orientation == "v" else "value",
|
436
415
|
y="value" if orientation == "v" else "index",
|
416
|
+
# color=columns,
|
437
417
|
color="col",
|
438
418
|
text="cnt_pct_str" if normalize else "cnt_str",
|
439
419
|
orientation=orientation,
|
@@ -442,13 +422,15 @@ def plot_stacked_bars(
|
|
442
422
|
template="plotly_dark" if os.getenv("THEME") == "dark" else "plotly",
|
443
423
|
width=width,
|
444
424
|
height=height,
|
445
|
-
color_discrete_map=
|
446
|
-
category_orders=
|
447
|
-
col_index: list(df["index"].cat.categories)
|
448
|
-
}, # <- Add this line
|
425
|
+
color_discrete_map=color_map, # Use assigned colors
|
426
|
+
category_orders= cat_orders,
|
449
427
|
)
|
450
428
|
|
451
429
|
|
430
|
+
# print(cat_orders)
|
431
|
+
# print(color_map)
|
432
|
+
# display(df)
|
433
|
+
|
452
434
|
# * get longest bar
|
453
435
|
bar_max = (
|
454
436
|
df.groupby("index")["value"].sum().sort_values(ascending=False).iloc[0]
|
@@ -475,6 +457,9 @@ def plot_stacked_bars(
|
|
475
457
|
},
|
476
458
|
},
|
477
459
|
)
|
460
|
+
fig.update_layout(legend_traceorder="normal")
|
461
|
+
fig.update_layout(legend_title_text=col_color)
|
462
|
+
|
478
463
|
|
479
464
|
# * set dtick
|
480
465
|
if orientation == "h":
|
@@ -1278,6 +1263,7 @@ def plot_facet_stacked_bars(
|
|
1278
1263
|
sort_values_index: bool = False,
|
1279
1264
|
sort_values_color: bool = False,
|
1280
1265
|
sort_values_facet: bool = False,
|
1266
|
+
relative: bool = False,
|
1281
1267
|
|
1282
1268
|
) -> object:
|
1283
1269
|
"""
|
@@ -1301,6 +1287,7 @@ def plot_facet_stacked_bars(
|
|
1301
1287
|
sort_values_index (bool): If True, sorts index by group sum.
|
1302
1288
|
sort_values_color (bool): If True, sorts columns by group sum.
|
1303
1289
|
sort_values_facet (bool): If True, sorts facet by group sum.
|
1290
|
+
relative (bool): If True, show bars as relative proportions to 100%.
|
1304
1291
|
sort_values (bool): DEPRECATED
|
1305
1292
|
|
1306
1293
|
|
@@ -1356,6 +1343,11 @@ def plot_facet_stacked_bars(
|
|
1356
1343
|
subplot_titles=facets,
|
1357
1344
|
)
|
1358
1345
|
|
1346
|
+
# * relative?
|
1347
|
+
if relative:
|
1348
|
+
aggregated_df["value"] = aggregated_df.groupby(["facet", "index"])["value"].transform(lambda x: x / x.sum())
|
1349
|
+
fig.update_layout(yaxis_tickformat=".0%") # Show as percentage
|
1350
|
+
|
1359
1351
|
# * Ensure all categories appear in the legend by adding an invisible trace
|
1360
1352
|
for column in columns:
|
1361
1353
|
fig.add_trace(
|
@@ -1424,6 +1416,7 @@ def plot_facet_stacked_bars(
|
|
1424
1416
|
|
1425
1417
|
title = f"{caption} {', '.join(axis_details)}, n = {original_rows:_}"
|
1426
1418
|
template = "plotly_dark" if os.getenv("THEME") == "dark" else "plotly"
|
1419
|
+
|
1427
1420
|
fig.update_layout(
|
1428
1421
|
title=title,
|
1429
1422
|
barmode="stack",
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: pandas-plots
|
3
|
-
Version: 0.12.
|
3
|
+
Version: 0.12.11
|
4
4
|
Summary: A collection of helper for table handling and visualization
|
5
5
|
Home-page: https://github.com/smeisegeier/pandas-plots
|
6
6
|
Author: smeisegeier
|
@@ -96,7 +96,7 @@ tbl.show_num_df(
|
|
96
96
|
- `plot_histogram()` histogram for one or more **numerical** columns
|
97
97
|
- `plot_joints()` a joint plot for **exactly two numerical** columns
|
98
98
|
- `plot_quadrants()` quickly shows a 2x2 heatmap
|
99
|
-
-
|
99
|
+
- `plot_facet_stacked_bars()` shows stacked bars for a facet value as subplots
|
100
100
|
<br>
|
101
101
|
|
102
102
|
- `ven` offers functions for _venn diagrams_
|
@@ -113,7 +113,8 @@ tbl.show_num_df(
|
|
113
113
|
- `add_datetime_col()` adds a datetime columns to a dataframe (chainable)
|
114
114
|
- `show_package_version` prints version of a list of packages
|
115
115
|
- `get_os` helps to identify and ensure operating system at runtime
|
116
|
-
-
|
116
|
+
- `add_bitmask_label()` adds a column to the data that resolves a bitmask column into human-readable labels
|
117
|
+
- `find_cols()` finds all columns in a list of columns that contain any of the given stubs
|
117
118
|
<br>
|
118
119
|
|
119
120
|
- `pii` has routines for handling of personally identifiable information
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|