pandas-plots 0.12.23__tar.gz → 0.12.25__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pandas_plots-0.12.23/src/pandas_plots.egg-info → pandas_plots-0.12.25}/PKG-INFO +1 -4
- {pandas_plots-0.12.23 → pandas_plots-0.12.25}/README.md +0 -3
- {pandas_plots-0.12.23 → pandas_plots-0.12.25}/setup.cfg +1 -1
- {pandas_plots-0.12.23 → pandas_plots-0.12.25}/src/pandas_plots/hlp.py +1 -1
- {pandas_plots-0.12.23 → pandas_plots-0.12.25}/src/pandas_plots/pls.py +100 -134
- {pandas_plots-0.12.23 → pandas_plots-0.12.25/src/pandas_plots.egg-info}/PKG-INFO +1 -4
- {pandas_plots-0.12.23 → pandas_plots-0.12.25}/src/pandas_plots.egg-info/SOURCES.txt +0 -1
- pandas_plots-0.12.23/src/pandas_plots/pii.py +0 -76
- {pandas_plots-0.12.23 → pandas_plots-0.12.25}/LICENSE +0 -0
- {pandas_plots-0.12.23 → pandas_plots-0.12.25}/pyproject.toml +0 -0
- {pandas_plots-0.12.23 → pandas_plots-0.12.25}/src/pandas_plots/tbl.py +0 -0
- {pandas_plots-0.12.23 → pandas_plots-0.12.25}/src/pandas_plots/ven.py +0 -0
- {pandas_plots-0.12.23 → pandas_plots-0.12.25}/src/pandas_plots.egg-info/dependency_links.txt +0 -0
- {pandas_plots-0.12.23 → pandas_plots-0.12.25}/src/pandas_plots.egg-info/pii.py +0 -0
- {pandas_plots-0.12.23 → pandas_plots-0.12.25}/src/pandas_plots.egg-info/requires.txt +0 -0
- {pandas_plots-0.12.23 → pandas_plots-0.12.25}/src/pandas_plots.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: pandas-plots
|
3
|
-
Version: 0.12.
|
3
|
+
Version: 0.12.25
|
4
4
|
Summary: A collection of helper for table handling and visualization
|
5
5
|
Home-page: https://github.com/smeisegeier/pandas-plots
|
6
6
|
Author: smeisegeier
|
@@ -119,9 +119,6 @@ tbl.show_num_df(
|
|
119
119
|
- `add_measures_to_pyg_config()` adds measures to a pygwalker config file to avoid frequent manual update
|
120
120
|
<br>
|
121
121
|
|
122
|
-
- `pii` has routines for handling of personally identifiable information
|
123
|
-
- `remove_pii()` logs and deletes pii from a series
|
124
|
-
|
125
122
|
> note: theme setting can be controlled through all functions by setting the environment variable `THEME` to either light or dark
|
126
123
|
|
127
124
|
## more examples
|
@@ -83,9 +83,6 @@ tbl.show_num_df(
|
|
83
83
|
- `add_measures_to_pyg_config()` adds measures to a pygwalker config file to avoid frequent manual update
|
84
84
|
<br>
|
85
85
|
|
86
|
-
- `pii` has routines for handling of personally identifiable information
|
87
|
-
- `remove_pii()` logs and deletes pii from a series
|
88
|
-
|
89
86
|
> note: theme setting can be controlled through all functions by setting the environment variable `THEME` to either light or dark
|
90
87
|
|
91
88
|
## more examples
|
@@ -1273,7 +1273,6 @@ def plot_boxes(
|
|
1273
1273
|
|
1274
1274
|
return fig
|
1275
1275
|
|
1276
|
-
|
1277
1276
|
def plot_facet_stacked_bars(
|
1278
1277
|
df: pd.DataFrame,
|
1279
1278
|
subplots_per_row: int = 4,
|
@@ -1294,59 +1293,41 @@ def plot_facet_stacked_bars(
|
|
1294
1293
|
sort_values_color: bool = False,
|
1295
1294
|
sort_values_facet: bool = False,
|
1296
1295
|
relative: bool = False,
|
1297
|
-
|
1298
|
-
) ->
|
1299
|
-
"""
|
1300
|
-
Create a grid of stacked bar charts.
|
1296
|
+
show_pct: bool = False,
|
1297
|
+
) -> go.Figure:
|
1301
1298
|
|
1302
|
-
|
1303
|
-
|
1304
|
-
|
1305
|
-
|
1306
|
-
|
1307
|
-
|
1308
|
-
|
1309
|
-
subplot_size (int): Size of each subplot.
|
1310
|
-
color_palette (str): Name of the color palette.
|
1311
|
-
caption (str): Optional caption to prepend to the title.
|
1312
|
-
renderer (Optional[Literal["png", "svg"]]): Renderer for saving the image.
|
1313
|
-
annotations (bool): Whether to show annotations in the subplots.
|
1314
|
-
precision (int): Decimal precision for annotations.
|
1315
|
-
png_path (Optional[Path]): Path to save the image.
|
1316
|
-
show_other (bool): If True, adds an "<other>" bar for columns not in top_n_color.
|
1317
|
-
sort_values_index (bool): If True, sorts index by group sum.
|
1318
|
-
sort_values_color (bool): If True, sorts columns by group sum.
|
1319
|
-
sort_values_facet (bool): If True, sorts facet by group sum.
|
1320
|
-
relative (bool): If True, show bars as relative proportions to 100%.
|
1321
|
-
sort_values (bool): DEPRECATED
|
1322
|
-
|
1323
|
-
|
1324
|
-
Returns:
|
1325
|
-
plot object
|
1299
|
+
# --- ENFORCE show_pct RULES ---
|
1300
|
+
if not relative:
|
1301
|
+
# If bars are absolute, annotations MUST be absolute
|
1302
|
+
if show_pct:
|
1303
|
+
print("Warning: 'show_pct' cannot be True when 'relative' is False. Setting 'show_pct' to False.")
|
1304
|
+
show_pct = False
|
1305
|
+
# ------------------------------
|
1326
1306
|
|
1327
|
-
|
1328
|
-
|
1329
|
-
|
1330
|
-
|
1307
|
+
try:
|
1308
|
+
precision = int(precision)
|
1309
|
+
except (ValueError, TypeError):
|
1310
|
+
print(f"Warning: 'precision' received as {precision} (type: {type(precision)}). Defaulting to 0.")
|
1311
|
+
precision = 0
|
1331
1312
|
|
1332
|
-
|
1313
|
+
df_copy = df.copy()
|
1333
1314
|
|
1334
|
-
if not (
|
1315
|
+
if not (df_copy.shape[1] == 3 or df_copy.shape[1] == 4):
|
1335
1316
|
raise ValueError("Input DataFrame must have 3 or 4 columns.")
|
1336
1317
|
|
1337
|
-
original_column_names =
|
1338
|
-
original_rows = len(df)
|
1318
|
+
original_column_names = df_copy.columns.tolist()
|
1339
1319
|
|
1340
|
-
if
|
1341
|
-
|
1342
|
-
|
1343
|
-
elif
|
1344
|
-
|
1345
|
-
|
1346
|
-
n = df["value"].sum()
|
1320
|
+
if df_copy.shape[1] == 3:
|
1321
|
+
df_copy.columns = ["index", "col", "facet"]
|
1322
|
+
df_copy["value"] = 1
|
1323
|
+
elif df_copy.shape[1] == 4:
|
1324
|
+
df_copy.columns = ["index", "col", "facet", "value"]
|
1347
1325
|
|
1348
|
-
|
1349
|
-
|
1326
|
+
n = df_copy["value"].sum()
|
1327
|
+
original_rows = len(df_copy)
|
1328
|
+
|
1329
|
+
aggregated_df = aggregate_data( # Assumes aggregate_data is accessible
|
1330
|
+
df_copy,
|
1350
1331
|
top_n_index,
|
1351
1332
|
top_n_color,
|
1352
1333
|
top_n_facet,
|
@@ -1357,107 +1338,92 @@ def plot_facet_stacked_bars(
|
|
1357
1338
|
sort_values_facet=sort_values_facet,
|
1358
1339
|
)
|
1359
1340
|
|
1360
|
-
|
1361
|
-
|
1362
|
-
|
1341
|
+
aggregated_df['index'] = aggregated_df['index'].astype(str)
|
1342
|
+
aggregated_df['col'] = aggregated_df['col'].astype(str)
|
1343
|
+
aggregated_df['facet'] = aggregated_df['facet'].astype(str)
|
1363
1344
|
|
1364
|
-
|
1365
|
-
|
1366
|
-
|
1367
|
-
.sort_values(ascending=False)
|
1368
|
-
.index.tolist()
|
1369
|
-
)
|
1370
|
-
column_colors = assign_column_colors(columns, color_palette, null_label)
|
1371
|
-
|
1372
|
-
fig = make_subplots(
|
1373
|
-
rows=-(-len(facets) // subplots_per_row),
|
1374
|
-
cols=min(subplots_per_row, len(facets)),
|
1375
|
-
subplot_titles=facets,
|
1376
|
-
)
|
1345
|
+
# --- Store original 'value' for annotations before potential scaling ---
|
1346
|
+
aggregated_df['annotation_value'] = aggregated_df['value'].copy()
|
1347
|
+
# ----------------------------------------------------------------------
|
1377
1348
|
|
1378
|
-
# * relative?
|
1379
1349
|
if relative:
|
1350
|
+
# This transforms the bar heights (value column) to percentages (0-1 range)
|
1380
1351
|
aggregated_df["value"] = aggregated_df.groupby(["facet", "index"])["value"].transform(lambda x: x / x.sum())
|
1381
|
-
fig.update_layout(yaxis_tickformat=".0%") # Show as percentage
|
1382
|
-
|
1383
|
-
# * Ensure all categories appear in the legend by adding an invisible trace
|
1384
|
-
for column in columns:
|
1385
|
-
fig.add_trace(
|
1386
|
-
go.Bar(
|
1387
|
-
x=[None], # Invisible bar
|
1388
|
-
y=[None],
|
1389
|
-
name=column,
|
1390
|
-
marker=dict(color=column_colors[column]),
|
1391
|
-
showlegend=True, # Ensure it appears in the legend
|
1392
|
-
)
|
1393
|
-
)
|
1394
1352
|
|
1395
|
-
|
1396
|
-
for i, facet in enumerate(facets):
|
1397
|
-
facet_data = aggregated_df[aggregated_df["facet"] == facet]
|
1398
|
-
row = (i // subplots_per_row) + 1
|
1399
|
-
col = (i % subplots_per_row) + 1
|
1400
|
-
|
1401
|
-
for column in columns:
|
1402
|
-
column_data = facet_data[facet_data["col"] == column]
|
1403
|
-
|
1404
|
-
show_legend = column not in added_to_legend
|
1405
|
-
if show_legend:
|
1406
|
-
added_to_legend.add(column)
|
1407
|
-
|
1408
|
-
fig.add_trace(
|
1409
|
-
go.Bar(
|
1410
|
-
x=column_data["index"],
|
1411
|
-
y=column_data["value"],
|
1412
|
-
name=column,
|
1413
|
-
marker=dict(color=column_colors[column]),
|
1414
|
-
legendgroup=column, # Ensures multiple traces use the same legend entry
|
1415
|
-
showlegend=False, # suppress further legend items
|
1416
|
-
),
|
1417
|
-
row=row,
|
1418
|
-
col=col,
|
1419
|
-
)
|
1353
|
+
category_orders = {}
|
1420
1354
|
|
1421
|
-
|
1422
|
-
|
1423
|
-
|
1424
|
-
x=row_data["index"],
|
1425
|
-
y=row_data["value"],
|
1426
|
-
text=f"{row_data['value']:.{precision}f}",
|
1427
|
-
showarrow=False,
|
1428
|
-
row=row,
|
1429
|
-
col=col,
|
1430
|
-
)
|
1431
|
-
|
1432
|
-
unique_rows = len(aggregated_df)
|
1433
|
-
axis_details = []
|
1434
|
-
if top_n_index > 0:
|
1435
|
-
axis_details.append(f"TOP {top_n_index} [{original_column_names[0]}]")
|
1436
|
-
else:
|
1437
|
-
axis_details.append(f"[{original_column_names[0]}]")
|
1355
|
+
if sort_values_index:
|
1356
|
+
sum_by_index = aggregated_df.groupby('index')['value'].sum().sort_values(ascending=False)
|
1357
|
+
category_orders["index"] = sum_by_index.index.tolist()
|
1438
1358
|
|
1439
|
-
if
|
1440
|
-
|
1441
|
-
|
1442
|
-
|
1359
|
+
if sort_values_color:
|
1360
|
+
sum_by_col = aggregated_df.groupby('col')['value'].sum().sort_values(ascending=False)
|
1361
|
+
category_orders["col"] = sum_by_col.index.tolist()
|
1362
|
+
|
1363
|
+
if sort_values_facet:
|
1364
|
+
sum_by_facet = aggregated_df.groupby('facet')['value'].sum().sort_values(ascending=False)
|
1365
|
+
category_orders["facet"] = sum_by_facet.index.tolist()
|
1443
1366
|
|
1444
|
-
|
1445
|
-
|
1367
|
+
columns_for_color = sorted(aggregated_df["col"].unique().tolist())
|
1368
|
+
column_colors_map = assign_column_colors(columns_for_color, color_palette, null_label) # Assumes assign_column_colors is accessible
|
1369
|
+
|
1370
|
+
# --- Prepare the text series for annotations with 'show_pct' control ---
|
1371
|
+
if annotations:
|
1372
|
+
if show_pct:
|
1373
|
+
# When show_pct is True, use the scaled 'value' column (0-1) and format as percentage
|
1374
|
+
formatted_text_series = aggregated_df["value"].apply(lambda x: f"{x:.{precision}%}".replace('.', ','))
|
1375
|
+
else:
|
1376
|
+
# When show_pct is False, use the 'annotation_value' (original absolute) and format as absolute
|
1377
|
+
formatted_text_series = aggregated_df["annotation_value"].apply(lambda x: f"{x:_.{precision}f}".replace('.', ','))
|
1446
1378
|
else:
|
1447
|
-
|
1379
|
+
formatted_text_series = None
|
1380
|
+
# -----------------------------------------------------------------------
|
1448
1381
|
|
1449
|
-
|
1450
|
-
|
1451
|
-
|
1452
|
-
|
1453
|
-
|
1382
|
+
fig = px.bar(
|
1383
|
+
aggregated_df,
|
1384
|
+
x="index",
|
1385
|
+
y="value",
|
1386
|
+
color="col",
|
1387
|
+
facet_col="facet",
|
1388
|
+
facet_col_wrap=subplots_per_row,
|
1454
1389
|
barmode="stack",
|
1455
|
-
|
1456
|
-
|
1457
|
-
|
1458
|
-
|
1390
|
+
color_discrete_map=column_colors_map,
|
1391
|
+
category_orders=category_orders,
|
1392
|
+
text=formatted_text_series,
|
1393
|
+
text_auto=False,
|
1394
|
+
height=subplot_size * (-(-len(aggregated_df["facet"].unique()) // subplots_per_row)),
|
1395
|
+
title=f"{caption} {original_column_names[0]}, {original_column_names[1]}, {original_column_names[2]}",
|
1459
1396
|
)
|
1460
1397
|
|
1398
|
+
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
|
1399
|
+
|
1400
|
+
fig.update_xaxes(matches=None)
|
1401
|
+
for axis in fig.layout:
|
1402
|
+
if axis.startswith("xaxis"):
|
1403
|
+
fig.layout[axis].showticklabels = True
|
1404
|
+
|
1405
|
+
template = "plotly_dark" if os.getenv("THEME") == "dark" else "plotly"
|
1406
|
+
|
1407
|
+
layout_updates = {
|
1408
|
+
"title_text": f"{caption} "
|
1409
|
+
f"{'TOP ' + str(top_n_index) + ' ' if top_n_index > 0 else ''}[{original_column_names[0]}] "
|
1410
|
+
f"{'TOP ' + str(top_n_color) + ' ' if top_n_color > 0 else ''}[{original_column_names[1]}] "
|
1411
|
+
f"{'TOP ' + str(top_n_facet) + ' ' if top_n_facet > 0 else ''}[{original_column_names[2]}] "
|
1412
|
+
f", n = {original_rows:_} ({n:_})",
|
1413
|
+
"showlegend": True,
|
1414
|
+
"template": template,
|
1415
|
+
"width": subplot_size * subplots_per_row,
|
1416
|
+
}
|
1417
|
+
|
1418
|
+
if relative:
|
1419
|
+
layout_updates['yaxis_range'] = [0, 1.1]
|
1420
|
+
layout_updates['yaxis_tickformat'] = ".0%"
|
1421
|
+
|
1422
|
+
fig.update_layout(**layout_updates)
|
1423
|
+
|
1424
|
+
if relative:
|
1425
|
+
fig.update_yaxes(tickformat=".0%")
|
1426
|
+
|
1461
1427
|
if png_path:
|
1462
1428
|
png_path = Path(png_path)
|
1463
1429
|
fig.write_image(str(png_path))
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: pandas-plots
|
3
|
-
Version: 0.12.
|
3
|
+
Version: 0.12.25
|
4
4
|
Summary: A collection of helper for table handling and visualization
|
5
5
|
Home-page: https://github.com/smeisegeier/pandas-plots
|
6
6
|
Author: smeisegeier
|
@@ -119,9 +119,6 @@ tbl.show_num_df(
|
|
119
119
|
- `add_measures_to_pyg_config()` adds measures to a pygwalker config file to avoid frequent manual update
|
120
120
|
<br>
|
121
121
|
|
122
|
-
- `pii` has routines for handling of personally identifiable information
|
123
|
-
- `remove_pii()` logs and deletes pii from a series
|
124
|
-
|
125
122
|
> note: theme setting can be controlled through all functions by setting the environment variable `THEME` to either light or dark
|
126
123
|
|
127
124
|
## more examples
|
@@ -1,76 +0,0 @@
|
|
1
|
-
import pandas as pd
|
2
|
-
import re
|
3
|
-
|
4
|
-
|
5
|
-
def remove_pii(
|
6
|
-
series: pd.Series,
|
7
|
-
verbose: bool = True,
|
8
|
-
logging: bool = False,
|
9
|
-
custom_regex="",
|
10
|
-
) -> pd.Index:
|
11
|
-
"""
|
12
|
-
Remove personally identifiable information (PII) from the given column.
|
13
|
-
|
14
|
-
Parameters:
|
15
|
-
- series: A pandas Series representing a column in a DataFrame.
|
16
|
-
- verbose: If True, print pii items
|
17
|
-
- logging: If True, write pii items into the file .pii.log
|
18
|
-
- custom_regex: Regex that is injected into detection
|
19
|
-
|
20
|
-
Returns:
|
21
|
-
- index object with indexes of all pii items
|
22
|
-
|
23
|
-
Remarks:
|
24
|
-
- df.drop(axis=0, index=result, inplace=True)
|
25
|
-
"""
|
26
|
-
|
27
|
-
# * reject empty columns
|
28
|
-
assert len(series) > 0
|
29
|
-
|
30
|
-
col = series.copy()
|
31
|
-
|
32
|
-
# * na must be dropped to ensure processsing
|
33
|
-
col.dropna(inplace=True)
|
34
|
-
|
35
|
-
# * find terms
|
36
|
-
_terms = frozenset(["lösch", "herr", "frau", "strasse", "klinik"])
|
37
|
-
idx_terms = col[
|
38
|
-
col.str.contains(
|
39
|
-
"|".join(_terms),
|
40
|
-
case=False,
|
41
|
-
regex=True,
|
42
|
-
)
|
43
|
-
].index
|
44
|
-
|
45
|
-
# # * optional: search for terms in whole df
|
46
|
-
# df.apply(lambda row: row.astype(str).str.contains('test', case=False, regex=True).any(), axis=1)
|
47
|
-
|
48
|
-
# # * find dates
|
49
|
-
ptr_date = r"\d{2}\.\d{2}\.\d{4}"
|
50
|
-
idx_date = col[col.str.contains(ptr_date, regex=True)].index
|
51
|
-
|
52
|
-
# * dr
|
53
|
-
ptr_dr = r"[D|d][R|r]\. | Fr\. | Hr\. | PD "
|
54
|
-
idx_dr = col[col.str.contains(ptr_dr, regex=True)].index
|
55
|
-
|
56
|
-
# * custom
|
57
|
-
idx_custom = (
|
58
|
-
col[col.str.contains(custom_regex, regex=True)].index
|
59
|
-
if custom_regex
|
60
|
-
else pd.Index([])
|
61
|
-
)
|
62
|
-
|
63
|
-
idx_all = idx_terms.union(idx_date).union(idx_dr).union(idx_custom)
|
64
|
-
|
65
|
-
if verbose:
|
66
|
-
# print(f"found: {idx_dr.__len__()} dr | {idx_date.__len__()} date | {idx_terms.__len__()} terms")
|
67
|
-
print(f"found {idx_all.__len__():_} pii items:")
|
68
|
-
print(col.loc[idx_all].tolist())
|
69
|
-
|
70
|
-
if logging: # Assuming logging is defined and has the correct value
|
71
|
-
data = col.loc[idx_all] # Assuming col and idx_all are defined
|
72
|
-
with open(".pii.log", "w") as f:
|
73
|
-
# ! when using str(), it will give only a summary!
|
74
|
-
f.write(data.to_string(index=True))
|
75
|
-
|
76
|
-
return idx_all
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{pandas_plots-0.12.23 → pandas_plots-0.12.25}/src/pandas_plots.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|