pandas-plots 0.11.5__py3-none-any.whl → 0.11.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pandas_plots/hlp.py +34 -17
- pandas_plots/pii.py +74 -0
- {pandas_plots-0.11.5.dist-info → pandas_plots-0.11.7.dist-info}/METADATA +9 -5
- pandas_plots-0.11.7.dist-info/RECORD +10 -0
- {pandas_plots-0.11.5.dist-info → pandas_plots-0.11.7.dist-info}/WHEEL +1 -1
- pandas_plots-0.11.5.dist-info/RECORD +0 -9
- {pandas_plots-0.11.5.dist-info → pandas_plots-0.11.7.dist-info}/LICENSE +0 -0
- {pandas_plots-0.11.5.dist-info → pandas_plots-0.11.7.dist-info}/top_level.txt +0 -0
pandas_plots/hlp.py
CHANGED
@@ -3,6 +3,7 @@ import numpy as np
|
|
3
3
|
import scipy.stats
|
4
4
|
import importlib.metadata as md
|
5
5
|
from platform import python_version
|
6
|
+
from typing import Literal, List
|
6
7
|
|
7
8
|
from enum import Enum, auto
|
8
9
|
import platform
|
@@ -153,10 +154,10 @@ def wrap_text(
|
|
153
154
|
if is_text:
|
154
155
|
# ! when splitting the text later by blanks, newlines are not correctly handled
|
155
156
|
# * to detect them, they must be followed by a blank:
|
156
|
-
pattern = r
|
157
|
+
pattern = r"(\n)(?=\S)" # *forward lookup for newline w/ no blank
|
157
158
|
# * add blank after these newlines
|
158
159
|
new_text = re.sub(pattern, r"\1 ", text)
|
159
|
-
text=new_text
|
160
|
+
text = new_text
|
160
161
|
|
161
162
|
# * then strip and build word list
|
162
163
|
text = (
|
@@ -269,42 +270,56 @@ def add_datetime_columns(df: pd.DataFrame, date_column: str = None) -> pd.DataFr
|
|
269
270
|
return df_
|
270
271
|
|
271
272
|
|
272
|
-
def show_package_version(
|
273
|
+
def show_package_version(
|
274
|
+
packages: list[str] = [],
|
275
|
+
sep: str = " | ",
|
276
|
+
include_demo_packages: bool = True,
|
277
|
+
) -> None:
|
273
278
|
"""
|
274
279
|
Display the versions of the specified packages.
|
275
280
|
|
276
281
|
Parameters:
|
277
282
|
packages (list[str], optional): A list of package names. Defaults to ["pandas","numpy","duckdb","pandas-plots", "connection_helper"].
|
278
283
|
sep (str, optional): The separator to use when joining the package names and versions. Defaults to " | ".
|
284
|
+
include_demo_packages: If True, inlude all demo packages
|
279
285
|
|
280
286
|
Returns:
|
281
287
|
None
|
282
288
|
"""
|
289
|
+
if not isinstance(packages, List):
|
290
|
+
print(f"❌ A list of str must be provided")
|
291
|
+
return
|
292
|
+
demo = [
|
293
|
+
"pandas",
|
294
|
+
"numpy",
|
295
|
+
"duckdb",
|
296
|
+
"pandas-plots",
|
297
|
+
"connection_helper",
|
298
|
+
]
|
283
299
|
items = []
|
284
300
|
items.append(f"🐍 {python_version()}")
|
301
|
+
if include_demo_packages:
|
302
|
+
packages.extend(demo)
|
303
|
+
|
285
304
|
for item in packages:
|
286
305
|
try:
|
287
306
|
version = md.version(item)
|
288
307
|
items.append(f"📦 {item}: {version}")
|
289
308
|
except md.PackageNotFoundError:
|
290
|
-
items.append(f"❌ {item}:
|
309
|
+
items.append(f"❌ {item}: Not found")
|
291
310
|
print(sep.join(items))
|
292
|
-
|
293
|
-
from enum import Enum, auto
|
294
|
-
import pandas as pd
|
295
|
-
from typing import Literal
|
296
|
-
import platform
|
297
|
-
import os
|
311
|
+
return
|
298
312
|
|
299
313
|
class OperatingSystem(Enum):
|
300
314
|
WINDOWS = auto()
|
301
315
|
LINUX = auto()
|
302
316
|
MAC = auto()
|
303
317
|
|
318
|
+
|
304
319
|
def get_os(desired_os: OperatingSystem = None) -> bool:
|
305
320
|
"""
|
306
321
|
A function that checks the operating system and returns a boolean value based on the desired operating system.
|
307
|
-
|
322
|
+
|
308
323
|
Parameters:
|
309
324
|
desired_os (OperatingSystem): The desired operating system to check against. Defaults to None.
|
310
325
|
Values are
|
@@ -315,16 +330,18 @@ def get_os(desired_os: OperatingSystem = None) -> bool:
|
|
315
330
|
Returns:
|
316
331
|
bool: True if the desired operating system matches the current operating system, False otherwise. Returns None if desired_os is None.
|
317
332
|
"""
|
318
|
-
print(
|
319
|
-
|
333
|
+
print(
|
334
|
+
f"💻 os: {os.name} | 🎯 system: {platform.system()} | 💽 release: {platform.release()}"
|
335
|
+
)
|
336
|
+
|
320
337
|
if desired_os is None:
|
321
338
|
return None
|
322
|
-
|
323
|
-
if desired_os == OperatingSystem.WINDOWS and platform.system() ==
|
339
|
+
|
340
|
+
if desired_os == OperatingSystem.WINDOWS and platform.system() == "Windows":
|
324
341
|
return True
|
325
|
-
elif desired_os == OperatingSystem.LINUX and platform.system() ==
|
342
|
+
elif desired_os == OperatingSystem.LINUX and platform.system() == "Linux":
|
326
343
|
return True
|
327
|
-
elif desired_os == OperatingSystem.MAC and platform.system() ==
|
344
|
+
elif desired_os == OperatingSystem.MAC and platform.system() == "Darwin":
|
328
345
|
return True
|
329
346
|
else:
|
330
347
|
return False
|
pandas_plots/pii.py
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
import pandas as pd
|
2
|
+
import re
|
3
|
+
|
4
|
+
|
5
|
+
def remove_pii(
|
6
|
+
series: pd.Series,
|
7
|
+
verbose: bool = True,
|
8
|
+
logging: bool = False,
|
9
|
+
custom_regex="",
|
10
|
+
) -> pd.Index:
|
11
|
+
"""
|
12
|
+
Remove personally identifiable information (PII) from the given column.
|
13
|
+
|
14
|
+
Parameters:
|
15
|
+
- series: A pandas Series representing a column in a DataFrame.
|
16
|
+
- verbose: If True, print pii items
|
17
|
+
- logging: If True, write pii items into the file .pii.log
|
18
|
+
- custom_regex: Regex that is injected into detection
|
19
|
+
|
20
|
+
Returns:
|
21
|
+
- index object with indexes of all pii items
|
22
|
+
|
23
|
+
Remarks:
|
24
|
+
- df.drop(axis=0, index=result, inplace=True)
|
25
|
+
"""
|
26
|
+
|
27
|
+
# * reject empty columns
|
28
|
+
assert len(series) > 0
|
29
|
+
|
30
|
+
col = series.copy()
|
31
|
+
|
32
|
+
# * na must be dropped to ensure processsing
|
33
|
+
col.dropna(inplace=True)
|
34
|
+
|
35
|
+
# * find terms
|
36
|
+
_terms = frozenset(["lösch", "herr", "frau", "strasse", "klinik"])
|
37
|
+
idx_terms = col[
|
38
|
+
col.str.contains(
|
39
|
+
"|".join(_terms),
|
40
|
+
case=False,
|
41
|
+
regex=True,
|
42
|
+
)
|
43
|
+
].index
|
44
|
+
|
45
|
+
# # * optional: search for terms in whole df
|
46
|
+
# df.apply(lambda row: row.astype(str).str.contains('test', case=False, regex=True).any(), axis=1)
|
47
|
+
|
48
|
+
# # * find dates
|
49
|
+
ptr_date = r"\d{2}\.\d{2}\.\d{4}"
|
50
|
+
idx_date = col[col.str.contains(ptr_date, regex=True)].index
|
51
|
+
|
52
|
+
# * dr
|
53
|
+
ptr_dr = r"[D|d][R|r]\. | Fr\. | Hr\. | PD "
|
54
|
+
idx_dr = col[col.str.contains(ptr_dr, regex=True)].index
|
55
|
+
|
56
|
+
# * custom
|
57
|
+
idx_custom = (
|
58
|
+
col[col.str.contains(custom_regex, regex=True)].index
|
59
|
+
if custom_regex
|
60
|
+
else pd.Index([])
|
61
|
+
)
|
62
|
+
|
63
|
+
idx_all = idx_terms.union(idx_date).union(idx_dr).union(idx_custom)
|
64
|
+
|
65
|
+
if verbose:
|
66
|
+
# print(f"found: {idx_dr.__len__()} dr | {idx_date.__len__()} date | {idx_terms.__len__()} terms")
|
67
|
+
print(f"found {idx_all.__len__():_} pii items:")
|
68
|
+
print(col.loc[idx_all].tolist())
|
69
|
+
|
70
|
+
if logging:
|
71
|
+
with open(".pii.log", "w") as f:
|
72
|
+
f.write(str(col.loc[idx_all]))
|
73
|
+
|
74
|
+
return idx_all
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: pandas-plots
|
3
|
-
Version: 0.11.
|
3
|
+
Version: 0.11.7
|
4
4
|
Summary: A collection of helper for table handling and vizualization
|
5
5
|
Home-page: https://github.com/smeisegeier/pandas-plots
|
6
6
|
Author: smeisegeier
|
@@ -84,9 +84,9 @@ tbl.show_num_df(
|
|
84
84
|
- `plot_boxes()` multiple boxplots _(annotation is experimental)_
|
85
85
|
- `plot_stacked_bars()` shortcut to stacked bars 😄
|
86
86
|
- `plots_bars()` a standardized bar plot for a **categorical** column
|
87
|
-
- features
|
88
|
-
-
|
89
|
-
-
|
87
|
+
- features confidence intervals via `use_ci` option
|
88
|
+
- `plot_histogram()` histogram for one or more **numerical** columns
|
89
|
+
- `plot_joints()` a joint plot for **exactly two numerical** columns
|
90
90
|
- `plot_quadrants()` quickly shows a 2x2 heatmap
|
91
91
|
|
92
92
|
- `ven` offers functions for _venn diagrams_
|
@@ -100,7 +100,11 @@ tbl.show_num_df(
|
|
100
100
|
- `replace_delimiter_outside_quotes()` when manual import of csv files is needed: replaces delimiters only outside of quotes
|
101
101
|
- `create_barcode_from_url()` creates a barcode from a given URL
|
102
102
|
- `add_datetime_col()` adds a datetime columns to a dataframe
|
103
|
-
-
|
103
|
+
- `show_package_version` prints version of a list of packages
|
104
|
+
- `get_os` helps to identify and ensure operating system at runtime
|
105
|
+
|
106
|
+
- `pii` has routines for handling of personally identifiable information
|
107
|
+
- `remove_pii()` logs and deletes pii from a series
|
104
108
|
|
105
109
|
> note: theme setting can be controlled through all functions by setting the environment variable `THEME` to either light or dark
|
106
110
|
|
@@ -0,0 +1,10 @@
|
|
1
|
+
pandas_plots/hlp.py,sha256=rlNCOHglkDZWNuf7aeNeatXvOXGLxuxd-iWQf5m0We0,11768
|
2
|
+
pandas_plots/pii.py,sha256=9he5NsbyyYqsuQWmZYIvwYiyxCuKTZBPNNxBcQNxF_E,1998
|
3
|
+
pandas_plots/pls.py,sha256=BzZge7TnECjCs47MZ7P63_y2WU23P9sLaMl7SKB5h1Q,35043
|
4
|
+
pandas_plots/tbl.py,sha256=3mGLD11W6-KyD3XEL74F1OceyPGtqluqFvmL4Qv8PZo,23766
|
5
|
+
pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
|
6
|
+
pandas_plots-0.11.7.dist-info/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
|
7
|
+
pandas_plots-0.11.7.dist-info/METADATA,sha256=jI2HEo-3ClWKn053IIW9a-vXr1PSW8ExO5krLJAIKBo,6819
|
8
|
+
pandas_plots-0.11.7.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
|
9
|
+
pandas_plots-0.11.7.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
|
10
|
+
pandas_plots-0.11.7.dist-info/RECORD,,
|
@@ -1,9 +0,0 @@
|
|
1
|
-
pandas_plots/hlp.py,sha256=fTlKtFhhIbZiFw7LetZzidQ8L4Nlz5bt-rSvANHG8dQ,11485
|
2
|
-
pandas_plots/pls.py,sha256=BzZge7TnECjCs47MZ7P63_y2WU23P9sLaMl7SKB5h1Q,35043
|
3
|
-
pandas_plots/tbl.py,sha256=3mGLD11W6-KyD3XEL74F1OceyPGtqluqFvmL4Qv8PZo,23766
|
4
|
-
pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
|
5
|
-
pandas_plots-0.11.5.dist-info/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
|
6
|
-
pandas_plots-0.11.5.dist-info/METADATA,sha256=Vu30AJfT-K21s6FSuWMLHhkh_73evydHUKOaIFeUUuc,6636
|
7
|
-
pandas_plots-0.11.5.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
|
8
|
-
pandas_plots-0.11.5.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
|
9
|
-
pandas_plots-0.11.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|