pandas-plots 0.11.5__py3-none-any.whl → 0.11.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pandas_plots/hlp.py CHANGED
@@ -3,6 +3,7 @@ import numpy as np
3
3
  import scipy.stats
4
4
  import importlib.metadata as md
5
5
  from platform import python_version
6
+ from typing import Literal, List
6
7
 
7
8
  from enum import Enum, auto
8
9
  import platform
@@ -153,10 +154,10 @@ def wrap_text(
153
154
  if is_text:
154
155
  # ! when splitting the text later by blanks, newlines are not correctly handled
155
156
  # * to detect them, they must be followed by a blank:
156
- pattern = r'(\n)(?=\S)' # *forward lookup for newline w/ no blank
157
+ pattern = r"(\n)(?=\S)" # *forward lookup for newline w/ no blank
157
158
  # * add blank after these newlines
158
159
  new_text = re.sub(pattern, r"\1 ", text)
159
- text=new_text
160
+ text = new_text
160
161
 
161
162
  # * then strip and build word list
162
163
  text = (
@@ -269,42 +270,56 @@ def add_datetime_columns(df: pd.DataFrame, date_column: str = None) -> pd.DataFr
269
270
  return df_
270
271
 
271
272
 
272
- def show_package_version(packages: list[str] = ["pandas","numpy","duckdb","pandas-plots", "connection_helper"], sep: str = " | ") -> None:
273
+ def show_package_version(
274
+ packages: list[str] = [],
275
+ sep: str = " | ",
276
+ include_demo_packages: bool = True,
277
+ ) -> None:
273
278
  """
274
279
  Display the versions of the specified packages.
275
280
 
276
281
  Parameters:
277
282
  packages (list[str], optional): A list of package names. Defaults to ["pandas","numpy","duckdb","pandas-plots", "connection_helper"].
278
283
  sep (str, optional): The separator to use when joining the package names and versions. Defaults to " | ".
284
+ include_demo_packages: If True, inlude all demo packages
279
285
 
280
286
  Returns:
281
287
  None
282
288
  """
289
+ if not isinstance(packages, List):
290
+ print(f"❌ A list of str must be provided")
291
+ return
292
+ demo = [
293
+ "pandas",
294
+ "numpy",
295
+ "duckdb",
296
+ "pandas-plots",
297
+ "connection_helper",
298
+ ]
283
299
  items = []
284
300
  items.append(f"🐍 {python_version()}")
301
+ if include_demo_packages:
302
+ packages.extend(demo)
303
+
285
304
  for item in packages:
286
305
  try:
287
306
  version = md.version(item)
288
307
  items.append(f"📦 {item}: {version}")
289
308
  except md.PackageNotFoundError:
290
- items.append(f"❌ {item}: Package not found")
309
+ items.append(f"❌ {item}: Not found")
291
310
  print(sep.join(items))
292
-
293
- from enum import Enum, auto
294
- import pandas as pd
295
- from typing import Literal
296
- import platform
297
- import os
311
+ return
298
312
 
299
313
  class OperatingSystem(Enum):
300
314
  WINDOWS = auto()
301
315
  LINUX = auto()
302
316
  MAC = auto()
303
317
 
318
+
304
319
  def get_os(desired_os: OperatingSystem = None) -> bool:
305
320
  """
306
321
  A function that checks the operating system and returns a boolean value based on the desired operating system.
307
-
322
+
308
323
  Parameters:
309
324
  desired_os (OperatingSystem): The desired operating system to check against. Defaults to None.
310
325
  Values are
@@ -315,16 +330,18 @@ def get_os(desired_os: OperatingSystem = None) -> bool:
315
330
  Returns:
316
331
  bool: True if the desired operating system matches the current operating system, False otherwise. Returns None if desired_os is None.
317
332
  """
318
- print(f'💻 os: {os.name} | 🎯 system: {platform.system()} | 💽 release: {platform.release()}')
319
-
333
+ print(
334
+ f"💻 os: {os.name} | 🎯 system: {platform.system()} | 💽 release: {platform.release()}"
335
+ )
336
+
320
337
  if desired_os is None:
321
338
  return None
322
-
323
- if desired_os == OperatingSystem.WINDOWS and platform.system() == 'Windows':
339
+
340
+ if desired_os == OperatingSystem.WINDOWS and platform.system() == "Windows":
324
341
  return True
325
- elif desired_os == OperatingSystem.LINUX and platform.system() == 'Linux':
342
+ elif desired_os == OperatingSystem.LINUX and platform.system() == "Linux":
326
343
  return True
327
- elif desired_os == OperatingSystem.MAC and platform.system() == 'Darwin':
344
+ elif desired_os == OperatingSystem.MAC and platform.system() == "Darwin":
328
345
  return True
329
346
  else:
330
347
  return False
pandas_plots/pii.py ADDED
@@ -0,0 +1,71 @@
1
+ import pandas as pd
2
+ import re
3
+
4
+
5
+ def remove_pii(
6
+ series: pd.Series,
7
+ verbose: bool = True,
8
+ logging: bool = False,
9
+ custom_regex="",
10
+ ) -> pd.Index:
11
+ """
12
+ Remove personally identifiable information (PII) from the given column.
13
+
14
+ Parameters:
15
+ - series: A pandas Series representing a column in a DataFrame.
16
+ - verbose: If True, print pii items
17
+ - logging: If True, write pii items into .pii.log
18
+ - custom_regex: Regex that is injected into detection
19
+
20
+ Returns:
21
+ - the given series w/o detected pii elements
22
+ """
23
+
24
+ # * reject empty columns
25
+ assert len(series) > 0
26
+
27
+ col = series.copy()
28
+
29
+ # * na must be dropped to ensure processsing
30
+ col.dropna(inplace=True)
31
+
32
+ # * find terms
33
+ _terms = frozenset(["lösch", "herr", "frau", "strasse", "klinik"])
34
+ idx_terms = col[
35
+ col.str.contains(
36
+ "|".join(_terms),
37
+ case=False,
38
+ regex=True,
39
+ )
40
+ ].index
41
+
42
+ # # * optional: search for terms in whole df
43
+ # df.apply(lambda row: row.astype(str).str.contains('test', case=False, regex=True).any(), axis=1)
44
+
45
+ # # * find dates
46
+ ptr_date = r"\d{2}\.\d{2}\.\d{4}"
47
+ idx_date = col[col.str.contains(ptr_date, regex=True)].index
48
+
49
+ # * dr
50
+ ptr_dr = r"[D|d][R|r]\. | Fr\. | Hr\. | PD "
51
+ idx_dr = col[col.str.contains(ptr_dr, regex=True)].index
52
+
53
+ # * custom
54
+ idx_custom = (
55
+ col[col.str.contains(custom_regex, regex=True)].index
56
+ if custom_regex
57
+ else pd.Index([])
58
+ )
59
+
60
+ idx_all = idx_terms.union(idx_date).union(idx_dr).union(idx_custom)
61
+
62
+ if verbose:
63
+ # print(f"found: {idx_dr.__len__()} dr | {idx_date.__len__()} date | {idx_terms.__len__()} terms")
64
+ print(f"found {idx_all.__len__():_} pii items:")
65
+ print(col.loc[idx_all].tolist())
66
+
67
+ if logging:
68
+ with open(".pii.log", "w") as f:
69
+ f.write(str(col.loc[idx_all]))
70
+
71
+ return col.drop(idx_all)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pandas-plots
3
- Version: 0.11.5
3
+ Version: 0.11.6
4
4
  Summary: A collection of helper for table handling and vizualization
5
5
  Home-page: https://github.com/smeisegeier/pandas-plots
6
6
  Author: smeisegeier
@@ -84,9 +84,9 @@ tbl.show_num_df(
84
84
  - `plot_boxes()` multiple boxplots _(annotation is experimental)_
85
85
  - `plot_stacked_bars()` shortcut to stacked bars 😄
86
86
  - `plots_bars()` a standardized bar plot for a **categorical** column
87
- - features convidence intervals via `use_ci` option
88
- - 🆕 `plot_histogram()` histogram for one or more **numerical** columns
89
- - 🆕 `plot_joints()` a joint plot for **exactly two numerical** columns
87
+ - features confidence intervals via `use_ci` option
88
+ - `plot_histogram()` histogram for one or more **numerical** columns
89
+ - `plot_joints()` a joint plot for **exactly two numerical** columns
90
90
  - `plot_quadrants()` quickly shows a 2x2 heatmap
91
91
 
92
92
  - `ven` offers functions for _venn diagrams_
@@ -100,7 +100,11 @@ tbl.show_num_df(
100
100
  - `replace_delimiter_outside_quotes()` when manual import of csv files is needed: replaces delimiters only outside of quotes
101
101
  - `create_barcode_from_url()` creates a barcode from a given URL
102
102
  - `add_datetime_col()` adds a datetime columns to a dataframe
103
- - 🆕 `show_package_version` prints version of a list of packages
103
+ - `show_package_version` prints version of a list of packages
104
+ - `get_os` helps to identify and ensure operating system at runtime
105
+
106
+ - `pii` has routines for handling of personally identifiable information
107
+ - `remove_pii()` logs and deletes pii from a series
104
108
 
105
109
  > note: theme setting can be controlled through all functions by setting the environment variable `THEME` to either light or dark
106
110
 
@@ -0,0 +1,10 @@
1
+ pandas_plots/hlp.py,sha256=rlNCOHglkDZWNuf7aeNeatXvOXGLxuxd-iWQf5m0We0,11768
2
+ pandas_plots/pii.py,sha256=kCNCYZpQH_mcrgnEUR1elxgob9T2T997_3yX0YeTrtk,1931
3
+ pandas_plots/pls.py,sha256=BzZge7TnECjCs47MZ7P63_y2WU23P9sLaMl7SKB5h1Q,35043
4
+ pandas_plots/tbl.py,sha256=3mGLD11W6-KyD3XEL74F1OceyPGtqluqFvmL4Qv8PZo,23766
5
+ pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
6
+ pandas_plots-0.11.6.dist-info/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
7
+ pandas_plots-0.11.6.dist-info/METADATA,sha256=YJ7R67X0q4P01uuSMQAHk7JxRCDVIaQj2JVesVsOX3g,6819
8
+ pandas_plots-0.11.6.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
9
+ pandas_plots-0.11.6.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
10
+ pandas_plots-0.11.6.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (71.1.0)
2
+ Generator: setuptools (72.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,9 +0,0 @@
1
- pandas_plots/hlp.py,sha256=fTlKtFhhIbZiFw7LetZzidQ8L4Nlz5bt-rSvANHG8dQ,11485
2
- pandas_plots/pls.py,sha256=BzZge7TnECjCs47MZ7P63_y2WU23P9sLaMl7SKB5h1Q,35043
3
- pandas_plots/tbl.py,sha256=3mGLD11W6-KyD3XEL74F1OceyPGtqluqFvmL4Qv8PZo,23766
4
- pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
5
- pandas_plots-0.11.5.dist-info/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
6
- pandas_plots-0.11.5.dist-info/METADATA,sha256=Vu30AJfT-K21s6FSuWMLHhkh_73evydHUKOaIFeUUuc,6636
7
- pandas_plots-0.11.5.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
8
- pandas_plots-0.11.5.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
9
- pandas_plots-0.11.5.dist-info/RECORD,,