pandas-plots 0.12.23__tar.gz → 0.12.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pandas-plots
3
- Version: 0.12.23
3
+ Version: 0.12.25
4
4
  Summary: A collection of helper for table handling and visualization
5
5
  Home-page: https://github.com/smeisegeier/pandas-plots
6
6
  Author: smeisegeier
@@ -119,9 +119,6 @@ tbl.show_num_df(
119
119
  - `add_measures_to_pyg_config()` adds measures to a pygwalker config file to avoid frequent manual update
120
120
  <br>
121
121
 
122
- - `pii` has routines for handling of personally identifiable information
123
- - `remove_pii()` logs and deletes pii from a series
124
-
125
122
  > note: theme setting can be controlled through all functions by setting the environment variable `THEME` to either light or dark
126
123
 
127
124
  ## more examples
@@ -83,9 +83,6 @@ tbl.show_num_df(
83
83
  - `add_measures_to_pyg_config()` adds measures to a pygwalker config file to avoid frequent manual update
84
84
  <br>
85
85
 
86
- - `pii` has routines for handling of personally identifiable information
87
- - `remove_pii()` logs and deletes pii from a series
88
-
89
86
  > note: theme setting can be controlled through all functions by setting the environment variable `THEME` to either light or dark
90
87
 
91
88
  ## more examples
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = pandas-plots
3
- version = 0.12.23
3
+ version = 0.12.25
4
4
  author = smeisegeier
5
5
  author_email = dexterDSDo@googlemail.com
6
6
  description = A collection of helper for table handling and visualization
@@ -333,7 +333,7 @@ def show_package_version(
333
333
  "numpy",
334
334
  "duckdb",
335
335
  "pandas-plots",
336
- "connection_helper",
336
+ "connection-helper",
337
337
  ]
338
338
  items = []
339
339
  items.append(f"🐍 {python_version()}")
@@ -1273,7 +1273,6 @@ def plot_boxes(
1273
1273
 
1274
1274
  return fig
1275
1275
 
1276
-
1277
1276
  def plot_facet_stacked_bars(
1278
1277
  df: pd.DataFrame,
1279
1278
  subplots_per_row: int = 4,
@@ -1294,59 +1293,41 @@ def plot_facet_stacked_bars(
1294
1293
  sort_values_color: bool = False,
1295
1294
  sort_values_facet: bool = False,
1296
1295
  relative: bool = False,
1297
-
1298
- ) -> object:
1299
- """
1300
- Create a grid of stacked bar charts.
1296
+ show_pct: bool = False,
1297
+ ) -> go.Figure:
1301
1298
 
1302
- Args:
1303
- df (pd.DataFrame): DataFrame with 3 or 4 columns.
1304
- subplots_per_row (int): Number of subplots per row.
1305
- top_n_index (int): top N index values to keep.
1306
- top_n_color (int): top N column values to keep.
1307
- top_n_facet (int): top N facet values to keep.
1308
- null_label (str): Label for null values.
1309
- subplot_size (int): Size of each subplot.
1310
- color_palette (str): Name of the color palette.
1311
- caption (str): Optional caption to prepend to the title.
1312
- renderer (Optional[Literal["png", "svg"]]): Renderer for saving the image.
1313
- annotations (bool): Whether to show annotations in the subplots.
1314
- precision (int): Decimal precision for annotations.
1315
- png_path (Optional[Path]): Path to save the image.
1316
- show_other (bool): If True, adds an "<other>" bar for columns not in top_n_color.
1317
- sort_values_index (bool): If True, sorts index by group sum.
1318
- sort_values_color (bool): If True, sorts columns by group sum.
1319
- sort_values_facet (bool): If True, sorts facet by group sum.
1320
- relative (bool): If True, show bars as relative proportions to 100%.
1321
- sort_values (bool): DEPRECATED
1322
-
1323
-
1324
- Returns:
1325
- plot object
1299
+ # --- ENFORCE show_pct RULES ---
1300
+ if not relative:
1301
+ # If bars are absolute, annotations MUST be absolute
1302
+ if show_pct:
1303
+ print("Warning: 'show_pct' cannot be True when 'relative' is False. Setting 'show_pct' to False.")
1304
+ show_pct = False
1305
+ # ------------------------------
1326
1306
 
1327
- Remarks:
1328
- If you need to include facets that have no data, fill up like this beforehand:
1329
- df.loc[len(df)]=[None, None, 12]
1330
- """
1307
+ try:
1308
+ precision = int(precision)
1309
+ except (ValueError, TypeError):
1310
+ print(f"Warning: 'precision' received as {precision} (type: {type(precision)}). Defaulting to 0.")
1311
+ precision = 0
1331
1312
 
1332
- df = df.copy() # Copy the input DataFrame to avoid modifying the original
1313
+ df_copy = df.copy()
1333
1314
 
1334
- if not (df.shape[1] == 3 or df.shape[1] == 4):
1315
+ if not (df_copy.shape[1] == 3 or df_copy.shape[1] == 4):
1335
1316
  raise ValueError("Input DataFrame must have 3 or 4 columns.")
1336
1317
 
1337
- original_column_names = df.columns.tolist()
1338
- original_rows = len(df)
1318
+ original_column_names = df_copy.columns.tolist()
1339
1319
 
1340
- if df.shape[1] == 3:
1341
- df.columns = ["index", "col", "facet"]
1342
- df["value"] = 1
1343
- elif df.shape[1] == 4:
1344
- df.columns = ["index", "col", "facet", "value"]
1345
-
1346
- n = df["value"].sum()
1320
+ if df_copy.shape[1] == 3:
1321
+ df_copy.columns = ["index", "col", "facet"]
1322
+ df_copy["value"] = 1
1323
+ elif df_copy.shape[1] == 4:
1324
+ df_copy.columns = ["index", "col", "facet", "value"]
1347
1325
 
1348
- aggregated_df = aggregate_data(
1349
- df,
1326
+ n = df_copy["value"].sum()
1327
+ original_rows = len(df_copy)
1328
+
1329
+ aggregated_df = aggregate_data( # Assumes aggregate_data is accessible
1330
+ df_copy,
1350
1331
  top_n_index,
1351
1332
  top_n_color,
1352
1333
  top_n_facet,
@@ -1357,107 +1338,92 @@ def plot_facet_stacked_bars(
1357
1338
  sort_values_facet=sort_values_facet,
1358
1339
  )
1359
1340
 
1360
- facets = sorted(
1361
- aggregated_df["facet"].unique()
1362
- ) # Ensure facets are sorted consistently
1341
+ aggregated_df['index'] = aggregated_df['index'].astype(str)
1342
+ aggregated_df['col'] = aggregated_df['col'].astype(str)
1343
+ aggregated_df['facet'] = aggregated_df['facet'].astype(str)
1363
1344
 
1364
- columns = sorted(
1365
- aggregated_df.groupby("col", observed=True)["value"]
1366
- .sum()
1367
- .sort_values(ascending=False)
1368
- .index.tolist()
1369
- )
1370
- column_colors = assign_column_colors(columns, color_palette, null_label)
1371
-
1372
- fig = make_subplots(
1373
- rows=-(-len(facets) // subplots_per_row),
1374
- cols=min(subplots_per_row, len(facets)),
1375
- subplot_titles=facets,
1376
- )
1345
+ # --- Store original 'value' for annotations before potential scaling ---
1346
+ aggregated_df['annotation_value'] = aggregated_df['value'].copy()
1347
+ # ----------------------------------------------------------------------
1377
1348
 
1378
- # * relative?
1379
1349
  if relative:
1350
+ # This transforms the bar heights (value column) to percentages (0-1 range)
1380
1351
  aggregated_df["value"] = aggregated_df.groupby(["facet", "index"])["value"].transform(lambda x: x / x.sum())
1381
- fig.update_layout(yaxis_tickformat=".0%") # Show as percentage
1382
-
1383
- # * Ensure all categories appear in the legend by adding an invisible trace
1384
- for column in columns:
1385
- fig.add_trace(
1386
- go.Bar(
1387
- x=[None], # Invisible bar
1388
- y=[None],
1389
- name=column,
1390
- marker=dict(color=column_colors[column]),
1391
- showlegend=True, # Ensure it appears in the legend
1392
- )
1393
- )
1394
1352
 
1395
- added_to_legend = set()
1396
- for i, facet in enumerate(facets):
1397
- facet_data = aggregated_df[aggregated_df["facet"] == facet]
1398
- row = (i // subplots_per_row) + 1
1399
- col = (i % subplots_per_row) + 1
1400
-
1401
- for column in columns:
1402
- column_data = facet_data[facet_data["col"] == column]
1403
-
1404
- show_legend = column not in added_to_legend
1405
- if show_legend:
1406
- added_to_legend.add(column)
1407
-
1408
- fig.add_trace(
1409
- go.Bar(
1410
- x=column_data["index"],
1411
- y=column_data["value"],
1412
- name=column,
1413
- marker=dict(color=column_colors[column]),
1414
- legendgroup=column, # Ensures multiple traces use the same legend entry
1415
- showlegend=False, # suppress further legend items
1416
- ),
1417
- row=row,
1418
- col=col,
1419
- )
1353
+ category_orders = {}
1420
1354
 
1421
- if annotations:
1422
- for _, row_data in column_data.iterrows():
1423
- fig.add_annotation(
1424
- x=row_data["index"],
1425
- y=row_data["value"],
1426
- text=f"{row_data['value']:.{precision}f}",
1427
- showarrow=False,
1428
- row=row,
1429
- col=col,
1430
- )
1431
-
1432
- unique_rows = len(aggregated_df)
1433
- axis_details = []
1434
- if top_n_index > 0:
1435
- axis_details.append(f"TOP {top_n_index} [{original_column_names[0]}]")
1436
- else:
1437
- axis_details.append(f"[{original_column_names[0]}]")
1355
+ if sort_values_index:
1356
+ sum_by_index = aggregated_df.groupby('index')['value'].sum().sort_values(ascending=False)
1357
+ category_orders["index"] = sum_by_index.index.tolist()
1438
1358
 
1439
- if top_n_color > 0:
1440
- axis_details.append(f"TOP {top_n_color} [{original_column_names[1]}]")
1441
- else:
1442
- axis_details.append(f"[{original_column_names[1]}]")
1359
+ if sort_values_color:
1360
+ sum_by_col = aggregated_df.groupby('col')['value'].sum().sort_values(ascending=False)
1361
+ category_orders["col"] = sum_by_col.index.tolist()
1362
+
1363
+ if sort_values_facet:
1364
+ sum_by_facet = aggregated_df.groupby('facet')['value'].sum().sort_values(ascending=False)
1365
+ category_orders["facet"] = sum_by_facet.index.tolist()
1443
1366
 
1444
- if top_n_facet > 0:
1445
- axis_details.append(f"TOP {top_n_facet} [{original_column_names[2]}]")
1367
+ columns_for_color = sorted(aggregated_df["col"].unique().tolist())
1368
+ column_colors_map = assign_column_colors(columns_for_color, color_palette, null_label) # Assumes assign_column_colors is accessible
1369
+
1370
+ # --- Prepare the text series for annotations with 'show_pct' control ---
1371
+ if annotations:
1372
+ if show_pct:
1373
+ # When show_pct is True, use the scaled 'value' column (0-1) and format as percentage
1374
+ formatted_text_series = aggregated_df["value"].apply(lambda x: f"{x:.{precision}%}".replace('.', ','))
1375
+ else:
1376
+ # When show_pct is False, use the 'annotation_value' (original absolute) and format as absolute
1377
+ formatted_text_series = aggregated_df["annotation_value"].apply(lambda x: f"{x:_.{precision}f}".replace('.', ','))
1446
1378
  else:
1447
- axis_details.append(f"[{original_column_names[2]}]")
1379
+ formatted_text_series = None
1380
+ # -----------------------------------------------------------------------
1448
1381
 
1449
- title = f"{caption} {', '.join(axis_details)}, n = {original_rows:_} ({n:_})"
1450
- template = "plotly_dark" if os.getenv("THEME") == "dark" else "plotly"
1451
-
1452
- fig.update_layout(
1453
- title=title,
1382
+ fig = px.bar(
1383
+ aggregated_df,
1384
+ x="index",
1385
+ y="value",
1386
+ color="col",
1387
+ facet_col="facet",
1388
+ facet_col_wrap=subplots_per_row,
1454
1389
  barmode="stack",
1455
- height=subplot_size * (-(-len(facets) // subplots_per_row)),
1456
- width=subplot_size * min(subplots_per_row, len(facets)),
1457
- showlegend=True,
1458
- template=template,
1390
+ color_discrete_map=column_colors_map,
1391
+ category_orders=category_orders,
1392
+ text=formatted_text_series,
1393
+ text_auto=False,
1394
+ height=subplot_size * (-(-len(aggregated_df["facet"].unique()) // subplots_per_row)),
1395
+ title=f"{caption} {original_column_names[0]}, {original_column_names[1]}, {original_column_names[2]}",
1459
1396
  )
1460
1397
 
1398
+ fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
1399
+
1400
+ fig.update_xaxes(matches=None)
1401
+ for axis in fig.layout:
1402
+ if axis.startswith("xaxis"):
1403
+ fig.layout[axis].showticklabels = True
1404
+
1405
+ template = "plotly_dark" if os.getenv("THEME") == "dark" else "plotly"
1406
+
1407
+ layout_updates = {
1408
+ "title_text": f"{caption} "
1409
+ f"{'TOP ' + str(top_n_index) + ' ' if top_n_index > 0 else ''}[{original_column_names[0]}] "
1410
+ f"{'TOP ' + str(top_n_color) + ' ' if top_n_color > 0 else ''}[{original_column_names[1]}] "
1411
+ f"{'TOP ' + str(top_n_facet) + ' ' if top_n_facet > 0 else ''}[{original_column_names[2]}] "
1412
+ f", n = {original_rows:_} ({n:_})",
1413
+ "showlegend": True,
1414
+ "template": template,
1415
+ "width": subplot_size * subplots_per_row,
1416
+ }
1417
+
1418
+ if relative:
1419
+ layout_updates['yaxis_range'] = [0, 1.1]
1420
+ layout_updates['yaxis_tickformat'] = ".0%"
1421
+
1422
+ fig.update_layout(**layout_updates)
1423
+
1424
+ if relative:
1425
+ fig.update_yaxes(tickformat=".0%")
1426
+
1461
1427
  if png_path:
1462
1428
  png_path = Path(png_path)
1463
1429
  fig.write_image(str(png_path))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pandas-plots
3
- Version: 0.12.23
3
+ Version: 0.12.25
4
4
  Summary: A collection of helper for table handling and visualization
5
5
  Home-page: https://github.com/smeisegeier/pandas-plots
6
6
  Author: smeisegeier
@@ -119,9 +119,6 @@ tbl.show_num_df(
119
119
  - `add_measures_to_pyg_config()` adds measures to a pygwalker config file to avoid frequent manual update
120
120
  <br>
121
121
 
122
- - `pii` has routines for handling of personally identifiable information
123
- - `remove_pii()` logs and deletes pii from a series
124
-
125
122
  > note: theme setting can be controlled through all functions by setting the environment variable `THEME` to either light or dark
126
123
 
127
124
  ## more examples
@@ -3,7 +3,6 @@ README.md
3
3
  pyproject.toml
4
4
  setup.cfg
5
5
  src/pandas_plots/hlp.py
6
- src/pandas_plots/pii.py
7
6
  src/pandas_plots/pls.py
8
7
  src/pandas_plots/tbl.py
9
8
  src/pandas_plots/ven.py
@@ -1,76 +0,0 @@
1
- import pandas as pd
2
- import re
3
-
4
-
5
- def remove_pii(
6
- series: pd.Series,
7
- verbose: bool = True,
8
- logging: bool = False,
9
- custom_regex="",
10
- ) -> pd.Index:
11
- """
12
- Remove personally identifiable information (PII) from the given column.
13
-
14
- Parameters:
15
- - series: A pandas Series representing a column in a DataFrame.
16
- - verbose: If True, print pii items
17
- - logging: If True, write pii items into the file .pii.log
18
- - custom_regex: Regex that is injected into detection
19
-
20
- Returns:
21
- - index object with indexes of all pii items
22
-
23
- Remarks:
24
- - df.drop(axis=0, index=result, inplace=True)
25
- """
26
-
27
- # * reject empty columns
28
- assert len(series) > 0
29
-
30
- col = series.copy()
31
-
32
- # * na must be dropped to ensure processsing
33
- col.dropna(inplace=True)
34
-
35
- # * find terms
36
- _terms = frozenset(["lösch", "herr", "frau", "strasse", "klinik"])
37
- idx_terms = col[
38
- col.str.contains(
39
- "|".join(_terms),
40
- case=False,
41
- regex=True,
42
- )
43
- ].index
44
-
45
- # # * optional: search for terms in whole df
46
- # df.apply(lambda row: row.astype(str).str.contains('test', case=False, regex=True).any(), axis=1)
47
-
48
- # # * find dates
49
- ptr_date = r"\d{2}\.\d{2}\.\d{4}"
50
- idx_date = col[col.str.contains(ptr_date, regex=True)].index
51
-
52
- # * dr
53
- ptr_dr = r"[D|d][R|r]\. | Fr\. | Hr\. | PD "
54
- idx_dr = col[col.str.contains(ptr_dr, regex=True)].index
55
-
56
- # * custom
57
- idx_custom = (
58
- col[col.str.contains(custom_regex, regex=True)].index
59
- if custom_regex
60
- else pd.Index([])
61
- )
62
-
63
- idx_all = idx_terms.union(idx_date).union(idx_dr).union(idx_custom)
64
-
65
- if verbose:
66
- # print(f"found: {idx_dr.__len__()} dr | {idx_date.__len__()} date | {idx_terms.__len__()} terms")
67
- print(f"found {idx_all.__len__():_} pii items:")
68
- print(col.loc[idx_all].tolist())
69
-
70
- if logging: # Assuming logging is defined and has the correct value
71
- data = col.loc[idx_all] # Assuming col and idx_all are defined
72
- with open(".pii.log", "w") as f:
73
- # ! when using str(), it will give only a summary!
74
- f.write(data.to_string(index=True))
75
-
76
- return idx_all
File without changes