pandas-plots 0.12.24__py3-none-any.whl → 0.12.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pandas_plots-0.12.24.dist-info → pandas_plots-0.12.25.dist-info}/METADATA +1 -4
- pandas_plots-0.12.25.dist-info/RECORD +10 -0
- {pandas_plots-0.12.24.dist-info → pandas_plots-0.12.25.dist-info}/WHEEL +1 -1
- pandas_plots/pii.py +0 -76
- pandas_plots-0.12.24.dist-info/RECORD +0 -11
- {pandas_plots-0.12.24.dist-info → pandas_plots-0.12.25.dist-info}/licenses/LICENSE +0 -0
- {pandas_plots-0.12.24.dist-info → pandas_plots-0.12.25.dist-info}/pii.py +0 -0
- {pandas_plots-0.12.24.dist-info → pandas_plots-0.12.25.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: pandas-plots
|
3
|
-
Version: 0.12.
|
3
|
+
Version: 0.12.25
|
4
4
|
Summary: A collection of helper for table handling and visualization
|
5
5
|
Home-page: https://github.com/smeisegeier/pandas-plots
|
6
6
|
Author: smeisegeier
|
@@ -119,9 +119,6 @@ tbl.show_num_df(
|
|
119
119
|
- `add_measures_to_pyg_config()` adds measures to a pygwalker config file to avoid frequent manual update
|
120
120
|
<br>
|
121
121
|
|
122
|
-
- `pii` has routines for handling of personally identifiable information
|
123
|
-
- `remove_pii()` logs and deletes pii from a series
|
124
|
-
|
125
122
|
> note: theme setting can be controlled through all functions by setting the environment variable `THEME` to either light or dark
|
126
123
|
|
127
124
|
## more examples
|
@@ -0,0 +1,10 @@
|
|
1
|
+
pandas_plots/hlp.py,sha256=kSqoGMEaOtC94wtTS7CMFXMgptv-2tSOMf5Zm7euhpI,20838
|
2
|
+
pandas_plots/pls.py,sha256=jFsHvjG8fvLBdHpaYOX_5TgpDrcA5bMWjAUtXb6bVXo,48629
|
3
|
+
pandas_plots/tbl.py,sha256=RJWBHeKGTAhGpVCY57TsS_dYR-FpInP-TOsKW_tU4V4,32556
|
4
|
+
pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
|
5
|
+
pandas_plots-0.12.25.dist-info/licenses/LICENSE,sha256=ltLbQWUCs-GBQlTPXbt5nHNBE9U5LzjjoS1Y8hHETM4,1051
|
6
|
+
pandas_plots-0.12.25.dist-info/METADATA,sha256=x-ge7rBcirp52MCfuDe0xiCjMpobadgi7YHYpISsj4s,7436
|
7
|
+
pandas_plots-0.12.25.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
8
|
+
pandas_plots-0.12.25.dist-info/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
|
9
|
+
pandas_plots-0.12.25.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
|
10
|
+
pandas_plots-0.12.25.dist-info/RECORD,,
|
pandas_plots/pii.py
DELETED
@@ -1,76 +0,0 @@
|
|
1
|
-
import pandas as pd
|
2
|
-
import re
|
3
|
-
|
4
|
-
|
5
|
-
def remove_pii(
|
6
|
-
series: pd.Series,
|
7
|
-
verbose: bool = True,
|
8
|
-
logging: bool = False,
|
9
|
-
custom_regex="",
|
10
|
-
) -> pd.Index:
|
11
|
-
"""
|
12
|
-
Remove personally identifiable information (PII) from the given column.
|
13
|
-
|
14
|
-
Parameters:
|
15
|
-
- series: A pandas Series representing a column in a DataFrame.
|
16
|
-
- verbose: If True, print pii items
|
17
|
-
- logging: If True, write pii items into the file .pii.log
|
18
|
-
- custom_regex: Regex that is injected into detection
|
19
|
-
|
20
|
-
Returns:
|
21
|
-
- index object with indexes of all pii items
|
22
|
-
|
23
|
-
Remarks:
|
24
|
-
- df.drop(axis=0, index=result, inplace=True)
|
25
|
-
"""
|
26
|
-
|
27
|
-
# * reject empty columns
|
28
|
-
assert len(series) > 0
|
29
|
-
|
30
|
-
col = series.copy()
|
31
|
-
|
32
|
-
# * na must be dropped to ensure processsing
|
33
|
-
col.dropna(inplace=True)
|
34
|
-
|
35
|
-
# * find terms
|
36
|
-
_terms = frozenset(["lösch", "herr", "frau", "strasse", "klinik"])
|
37
|
-
idx_terms = col[
|
38
|
-
col.str.contains(
|
39
|
-
"|".join(_terms),
|
40
|
-
case=False,
|
41
|
-
regex=True,
|
42
|
-
)
|
43
|
-
].index
|
44
|
-
|
45
|
-
# # * optional: search for terms in whole df
|
46
|
-
# df.apply(lambda row: row.astype(str).str.contains('test', case=False, regex=True).any(), axis=1)
|
47
|
-
|
48
|
-
# # * find dates
|
49
|
-
ptr_date = r"\d{2}\.\d{2}\.\d{4}"
|
50
|
-
idx_date = col[col.str.contains(ptr_date, regex=True)].index
|
51
|
-
|
52
|
-
# * dr
|
53
|
-
ptr_dr = r"[D|d][R|r]\. | Fr\. | Hr\. | PD "
|
54
|
-
idx_dr = col[col.str.contains(ptr_dr, regex=True)].index
|
55
|
-
|
56
|
-
# * custom
|
57
|
-
idx_custom = (
|
58
|
-
col[col.str.contains(custom_regex, regex=True)].index
|
59
|
-
if custom_regex
|
60
|
-
else pd.Index([])
|
61
|
-
)
|
62
|
-
|
63
|
-
idx_all = idx_terms.union(idx_date).union(idx_dr).union(idx_custom)
|
64
|
-
|
65
|
-
if verbose:
|
66
|
-
# print(f"found: {idx_dr.__len__()} dr | {idx_date.__len__()} date | {idx_terms.__len__()} terms")
|
67
|
-
print(f"found {idx_all.__len__():_} pii items:")
|
68
|
-
print(col.loc[idx_all].tolist())
|
69
|
-
|
70
|
-
if logging: # Assuming logging is defined and has the correct value
|
71
|
-
data = col.loc[idx_all] # Assuming col and idx_all are defined
|
72
|
-
with open(".pii.log", "w") as f:
|
73
|
-
# ! when using str(), it will give only a summary!
|
74
|
-
f.write(data.to_string(index=True))
|
75
|
-
|
76
|
-
return idx_all
|
@@ -1,11 +0,0 @@
|
|
1
|
-
pandas_plots/hlp.py,sha256=kSqoGMEaOtC94wtTS7CMFXMgptv-2tSOMf5Zm7euhpI,20838
|
2
|
-
pandas_plots/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
|
3
|
-
pandas_plots/pls.py,sha256=jFsHvjG8fvLBdHpaYOX_5TgpDrcA5bMWjAUtXb6bVXo,48629
|
4
|
-
pandas_plots/tbl.py,sha256=RJWBHeKGTAhGpVCY57TsS_dYR-FpInP-TOsKW_tU4V4,32556
|
5
|
-
pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
|
6
|
-
pandas_plots-0.12.24.dist-info/licenses/LICENSE,sha256=ltLbQWUCs-GBQlTPXbt5nHNBE9U5LzjjoS1Y8hHETM4,1051
|
7
|
-
pandas_plots-0.12.24.dist-info/METADATA,sha256=5519ufLPkBZEaylDrN6lC-D5Rtc7xr4tGQVNDtW_5Ms,7564
|
8
|
-
pandas_plots-0.12.24.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
9
|
-
pandas_plots-0.12.24.dist-info/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
|
10
|
-
pandas_plots-0.12.24.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
|
11
|
-
pandas_plots-0.12.24.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|