pandas-plots 0.11.4__py3-none-any.whl → 0.11.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pandas_plots/hlp.py CHANGED
@@ -3,6 +3,11 @@ import numpy as np
3
3
  import scipy.stats
4
4
  import importlib.metadata as md
5
5
  from platform import python_version
6
+ from typing import Literal, List
7
+
8
+ from enum import Enum, auto
9
+ import platform
10
+ import os
6
11
 
7
12
  from io import BytesIO
8
13
  from matplotlib import pyplot as plt
@@ -149,10 +154,10 @@ def wrap_text(
149
154
  if is_text:
150
155
  # ! when splitting the text later by blanks, newlines are not correctly handled
151
156
  # * to detect them, they must be followed by a blank:
152
- pattern = r'(\n)(?=\S)' # *forward lookup for newline w/ no blank
157
+ pattern = r"(\n)(?=\S)" # *forward lookup for newline w/ no blank
153
158
  # * add blank after these newlines
154
159
  new_text = re.sub(pattern, r"\1 ", text)
155
- text=new_text
160
+ text = new_text
156
161
 
157
162
  # * then strip and build word list
158
163
  text = (
@@ -265,23 +270,78 @@ def add_datetime_columns(df: pd.DataFrame, date_column: str = None) -> pd.DataFr
265
270
  return df_
266
271
 
267
272
 
268
- def show_package_version(packages: list[str] = ["pandas","numpy","duckdb","pandas-plots", "connection_helper"], sep: str = " | ") -> None:
273
+ def show_package_version(
274
+ packages: list[str] = [],
275
+ sep: str = " | ",
276
+ include_demo_packages: bool = True,
277
+ ) -> None:
269
278
  """
270
279
  Display the versions of the specified packages.
271
280
 
272
281
  Parameters:
273
282
  packages (list[str], optional): A list of package names. Defaults to ["pandas","numpy","duckdb","pandas-plots", "connection_helper"].
274
283
  sep (str, optional): The separator to use when joining the package names and versions. Defaults to " | ".
284
+ include_demo_packages: If True, inlude all demo packages
275
285
 
276
286
  Returns:
277
287
  None
278
288
  """
289
+ if not isinstance(packages, List):
290
+ print(f"❌ A list of str must be provided")
291
+ return
292
+ demo = [
293
+ "pandas",
294
+ "numpy",
295
+ "duckdb",
296
+ "pandas-plots",
297
+ "connection_helper",
298
+ ]
279
299
  items = []
280
300
  items.append(f"🐍 {python_version()}")
301
+ if include_demo_packages:
302
+ packages.extend(demo)
303
+
281
304
  for item in packages:
282
305
  try:
283
306
  version = md.version(item)
284
307
  items.append(f"📦 {item}: {version}")
285
308
  except md.PackageNotFoundError:
286
- items.append(f"❌ {item}: Package not found")
287
- print(sep.join(items))
309
+ items.append(f"❌ {item}: Not found")
310
+ print(sep.join(items))
311
+ return
312
+
313
+ class OperatingSystem(Enum):
314
+ WINDOWS = auto()
315
+ LINUX = auto()
316
+ MAC = auto()
317
+
318
+
319
+ def get_os(desired_os: OperatingSystem = None) -> bool:
320
+ """
321
+ A function that checks the operating system and returns a boolean value based on the desired operating system.
322
+
323
+ Parameters:
324
+ desired_os (OperatingSystem): The desired operating system to check against. Defaults to None.
325
+ Values are
326
+ - OperatingSystem.WINDOWS
327
+ - OperatingSystem.LINUX
328
+ - OperatingSystem.MAC
329
+
330
+ Returns:
331
+ bool: True if the desired operating system matches the current operating system, False otherwise. Returns None if desired_os is None.
332
+ """
333
+ print(
334
+ f"💻 os: {os.name} | 🎯 system: {platform.system()} | 💽 release: {platform.release()}"
335
+ )
336
+
337
+ if desired_os is None:
338
+ return None
339
+
340
+ if desired_os == OperatingSystem.WINDOWS and platform.system() == "Windows":
341
+ return True
342
+ elif desired_os == OperatingSystem.LINUX and platform.system() == "Linux":
343
+ return True
344
+ elif desired_os == OperatingSystem.MAC and platform.system() == "Darwin":
345
+ return True
346
+ else:
347
+ return False
pandas_plots/pii.py ADDED
@@ -0,0 +1,71 @@
1
+ import pandas as pd
2
+ import re
3
+
4
+
5
+ def remove_pii(
6
+ series: pd.Series,
7
+ verbose: bool = True,
8
+ logging: bool = False,
9
+ custom_regex="",
10
+ ) -> pd.Index:
11
+ """
12
+ Remove personally identifiable information (PII) from the given column.
13
+
14
+ Parameters:
15
+ - series: A pandas Series representing a column in a DataFrame.
16
+ - verbose: If True, print pii items
17
+ - logging: If True, write pii items into .pii.log
18
+ - custom_regex: Regex that is injected into detection
19
+
20
+ Returns:
21
+ - the given series w/o detected pii elements
22
+ """
23
+
24
+ # * reject empty columns
25
+ assert len(series) > 0
26
+
27
+ col = series.copy()
28
+
29
+ # * na must be dropped to ensure processsing
30
+ col.dropna(inplace=True)
31
+
32
+ # * find terms
33
+ _terms = frozenset(["lösch", "herr", "frau", "strasse", "klinik"])
34
+ idx_terms = col[
35
+ col.str.contains(
36
+ "|".join(_terms),
37
+ case=False,
38
+ regex=True,
39
+ )
40
+ ].index
41
+
42
+ # # * optional: search for terms in whole df
43
+ # df.apply(lambda row: row.astype(str).str.contains('test', case=False, regex=True).any(), axis=1)
44
+
45
+ # # * find dates
46
+ ptr_date = r"\d{2}\.\d{2}\.\d{4}"
47
+ idx_date = col[col.str.contains(ptr_date, regex=True)].index
48
+
49
+ # * dr
50
+ ptr_dr = r"[D|d][R|r]\. | Fr\. | Hr\. | PD "
51
+ idx_dr = col[col.str.contains(ptr_dr, regex=True)].index
52
+
53
+ # * custom
54
+ idx_custom = (
55
+ col[col.str.contains(custom_regex, regex=True)].index
56
+ if custom_regex
57
+ else pd.Index([])
58
+ )
59
+
60
+ idx_all = idx_terms.union(idx_date).union(idx_dr).union(idx_custom)
61
+
62
+ if verbose:
63
+ # print(f"found: {idx_dr.__len__()} dr | {idx_date.__len__()} date | {idx_terms.__len__()} terms")
64
+ print(f"found {idx_all.__len__():_} pii items:")
65
+ print(col.loc[idx_all].tolist())
66
+
67
+ if logging:
68
+ with open(".pii.log", "w") as f:
69
+ f.write(str(col.loc[idx_all]))
70
+
71
+ return col.drop(idx_all)
pandas_plots/tbl.py CHANGED
@@ -229,6 +229,10 @@ def pivot_df(
229
229
  A function to pivot a DataFrame based on specified parameters hand over to the *show_num_df* function.
230
230
  It does not provide much added value since the built-in pivot_table function does the same thing.
231
231
  However, it can be useful in certain situations (applying top_n_index and top_n_columns).
232
+
233
+ First two must be [index] and [columns]
234
+ If 3 columns are given, last one must be the weights column.
235
+ If 2 columns are given, column 3 will be added as flat count.
232
236
 
233
237
  Args:
234
238
  df (pd.DataFrame): The input DataFrame to be pivoted.
@@ -262,6 +266,10 @@ def pivot_df(
262
266
  print(f"❌ axis not supported")
263
267
  return
264
268
 
269
+ # * if only 2 are provided, add cnt col
270
+ if len(df.columns) == 2:
271
+ df = df.assign(cnt=1)
272
+
265
273
  if len(df.columns) != 3:
266
274
  print("❌ df must have exactly 3 columns")
267
275
  return
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pandas-plots
3
- Version: 0.11.4
3
+ Version: 0.11.6
4
4
  Summary: A collection of helper for table handling and vizualization
5
5
  Home-page: https://github.com/smeisegeier/pandas-plots
6
6
  Author: smeisegeier
@@ -24,8 +24,8 @@ Requires-Dist: plotly >=5.18.0
24
24
  Requires-Dist: matplotlib >=3.8.2
25
25
  Requires-Dist: matplotlib-venn >=0.11.10
26
26
  Requires-Dist: seaborn >=0.13.2
27
- Requires-Dist: Jinja2 >=3.1.3
28
- Requires-Dist: requests >=2.31.0
27
+ Requires-Dist: Jinja2 >=3.1.4
28
+ Requires-Dist: requests >=2.32.0
29
29
  Requires-Dist: numpy <2.0.0
30
30
 
31
31
  # pandas-plots
@@ -77,16 +77,16 @@ tbl.show_num_df(
77
77
  - `tbl` utilities for table descriptions
78
78
  - 🌟`show_num_df()` displays a table as styled version with additional information
79
79
  - `describe_df()` an alternative version of pandas `describe()` function
80
- - `pivot_df()` gets a pivot table of a 3 column dataframe
80
+ - `pivot_df()` gets a pivot table of a 3 column dataframe (or 2 columns if no weights are given)
81
81
 
82
82
  - `pls` for plotly visualizations
83
83
  - `plot_box()` auto annotated boxplot w/ violin option
84
84
  - `plot_boxes()` multiple boxplots _(annotation is experimental)_
85
85
  - `plot_stacked_bars()` shortcut to stacked bars 😄
86
86
  - `plots_bars()` a standardized bar plot for a **categorical** column
87
- - features convidence intervals via `use_ci` option
88
- - 🆕 `plot_histogram()` histogram for one or more **numerical** columns
89
- - 🆕 `plot_joints()` a joint plot for **exactly two numerical** columns
87
+ - features confidence intervals via `use_ci` option
88
+ - `plot_histogram()` histogram for one or more **numerical** columns
89
+ - `plot_joints()` a joint plot for **exactly two numerical** columns
90
90
  - `plot_quadrants()` quickly shows a 2x2 heatmap
91
91
 
92
92
  - `ven` offers functions for _venn diagrams_
@@ -100,7 +100,11 @@ tbl.show_num_df(
100
100
  - `replace_delimiter_outside_quotes()` when manual import of csv files is needed: replaces delimiters only outside of quotes
101
101
  - `create_barcode_from_url()` creates a barcode from a given URL
102
102
  - `add_datetime_col()` adds a datetime columns to a dataframe
103
- - 🆕 `show_package_version` prints version of a list of packages
103
+ - `show_package_version` prints version of a list of packages
104
+ - `get_os` helps to identify and ensure operating system at runtime
105
+
106
+ - `pii` has routines for handling of personally identifiable information
107
+ - `remove_pii()` logs and deletes pii from a series
104
108
 
105
109
  > note: theme setting can be controlled through all functions by setting the environment variable `THEME` to either light or dark
106
110
 
@@ -0,0 +1,10 @@
1
+ pandas_plots/hlp.py,sha256=rlNCOHglkDZWNuf7aeNeatXvOXGLxuxd-iWQf5m0We0,11768
2
+ pandas_plots/pii.py,sha256=kCNCYZpQH_mcrgnEUR1elxgob9T2T997_3yX0YeTrtk,1931
3
+ pandas_plots/pls.py,sha256=BzZge7TnECjCs47MZ7P63_y2WU23P9sLaMl7SKB5h1Q,35043
4
+ pandas_plots/tbl.py,sha256=3mGLD11W6-KyD3XEL74F1OceyPGtqluqFvmL4Qv8PZo,23766
5
+ pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
6
+ pandas_plots-0.11.6.dist-info/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
7
+ pandas_plots-0.11.6.dist-info/METADATA,sha256=YJ7R67X0q4P01uuSMQAHk7JxRCDVIaQj2JVesVsOX3g,6819
8
+ pandas_plots-0.11.6.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
9
+ pandas_plots-0.11.6.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
10
+ pandas_plots-0.11.6.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (70.2.0)
2
+ Generator: setuptools (72.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,9 +0,0 @@
1
- pandas_plots/hlp.py,sha256=Fq6582i4tgKP8H-Ljp6MQLZkEdsUH935oOqkAxwswsA,10149
2
- pandas_plots/pls.py,sha256=BzZge7TnECjCs47MZ7P63_y2WU23P9sLaMl7SKB5h1Q,35043
3
- pandas_plots/tbl.py,sha256=7Ei-iD5ckhayYh7MFy4LFnta2mlnoPUkDfmVJjMEC8M,23482
4
- pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
5
- pandas_plots-0.11.4.dist-info/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
6
- pandas_plots-0.11.4.dist-info/METADATA,sha256=pJJ86NfZmICZxyLt_KdwITYSRKDsAynn5tMPWuQFB-M,6597
7
- pandas_plots-0.11.4.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
8
- pandas_plots-0.11.4.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
9
- pandas_plots-0.11.4.dist-info/RECORD,,