PyPI - pandas-plots - Versions diffs - 0.12.5__tar.gz → 0.12.6__tar.gz - Mend

pandas-plots 0.12.5tar.gz → 0.12.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

{pandas_plots-0.12.5/src/pandas_plots.egg-info → pandas_plots-0.12.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: pandas-plots
-Version: 0.12.5
+Version: 0.12.6
 Summary: A collection of helper for table handling and visualization
 Home-page: https://github.com/smeisegeier/pandas-plots
 Author: smeisegeier

{pandas_plots-0.12.5 → pandas_plots-0.12.6}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = pandas-plots
-version = 0.12.5
+version = 0.12.6
 author = smeisegeier
 author_email = dexterDSDo@googlemail.com
 description = A collection of helper for table handling and visualization

pandas_plots-0.12.6/src/pandas_plots/pii.py ADDED Viewed

@@ -0,0 +1,76 @@
+import pandas as pd
+import re
+def remove_pii(
+    series: pd.Series,
+    verbose: bool = True,
+    logging: bool = False,
+    custom_regex="",
+) -> pd.Index:
+    """
+    Remove personally identifiable information (PII) from the given column.
+    Parameters:
+    - series: A pandas Series representing a column in a DataFrame.
+    - verbose: If True, print pii items
+    - logging: If True, write pii items into the file .pii.log
+    - custom_regex: Regex that is injected into detection
+    Returns:
+    - index object with indexes of all pii items
+    Remarks:
+    - df.drop(axis=0, index=result, inplace=True)
+    """
+    # * reject empty columns
+    assert len(series) > 0
+    col = series.copy()
+    # * na must be dropped to ensure processsing
+    col.dropna(inplace=True)
+    # * find terms
+    _terms = frozenset(["lösch", "herr", "frau", "strasse", "klinik"])
+    idx_terms = col[
+        col.str.contains(
+            "|".join(_terms),
+            case=False,
+            regex=True,
+        )
+    ].index
+    # # * optional: search for terms in whole df
+    # df.apply(lambda row: row.astype(str).str.contains('test', case=False, regex=True).any(), axis=1)
+    # # * find dates
+    ptr_date = r"\d{2}\.\d{2}\.\d{4}"
+    idx_date = col[col.str.contains(ptr_date, regex=True)].index
+    # * dr
+    ptr_dr = r"[D|d][R|r]\. | Fr\. | Hr\. | PD "
+    idx_dr = col[col.str.contains(ptr_dr, regex=True)].index
+    # * custom
+    idx_custom = (
+        col[col.str.contains(custom_regex, regex=True)].index
+        if custom_regex
+        else pd.Index([])
+    )
+    idx_all = idx_terms.union(idx_date).union(idx_dr).union(idx_custom)
+    if verbose:
+        # print(f"found: {idx_dr.__len__()} dr | {idx_date.__len__()} date | {idx_terms.__len__()} terms")
+        print(f"found {idx_all.__len__():_} pii items:")
+        print(col.loc[idx_all].tolist())
+    if logging:  # Assuming logging is defined and has the correct value
+        data = col.loc[idx_all]  # Assuming col and idx_all are defined
+        with open(".pii.log", "w") as f:
+            # ! when using str(), it will give only a summary!
+            f.write(data.to_string(index=True))
+    return idx_all

{pandas_plots-0.12.5 → pandas_plots-0.12.6}/src/pandas_plots/pls.py RENAMED Viewed

@@ -256,16 +256,6 @@ def plot_stacked_bars(
     col_index = df.columns[0] if not swap else df.columns[1]
     col_color = df.columns[1] if not swap else df.columns[0]
-    # * assign colors to columns
-    unique_colors = sorted(df[col_color].unique())
-    column_colors = assign_column_colors(unique_colors, color_palette, null_label)
-    # * add total as aggregation of df
-    if show_total:
-        df_total = df.copy()
-        df_total[col_index] = " TOTAL"  # add space to make this item first
-        df = pd.concat([df, df_total])
     # * ensure df is grouped to prevent false aggregations
     df = (
         df.groupby([df.columns[0], df.columns[1]])
@@ -281,10 +271,31 @@ def plot_stacked_bars(
         )
     else:
         sort_order = sorted(df[col_index].unique())  # Alphabetical order
+    df[col_index] = pd.Categorical(df[col_index], categories=sort_order, ordered=True)
+    # * add total as aggregation of df
+    if show_total:
+        df_total = df.copy()
+        df_total[col_index] = " TOTAL"  # add space to make this item first
+        df = pd.concat([df, df_total])
     # * Convert to categorical with explicit ordering
     df[col_index] = pd.Categorical(df[col_index], categories=sort_order, ordered=True)
+    if top_n_index > 0 and len(sort_order) > top_n_index:
+        top_categories = sort_order[:top_n_index]
+        df[col_index] = df[col_index].apply(lambda x: x if x in top_categories else "<other>")
+    unique_colors = sorted(df[col_color].unique())
+    if top_n_color > 0 and len(unique_colors) > top_n_color:
+        top_colors = unique_colors[:top_n_color]
+        df[col_color] = df[col_color].apply(lambda x: x if x in top_colors else "<other>")
+    column_colors = assign_column_colors(sorted(df[col_color].unique()), color_palette, null_label)
+    # # * assign colors to columns
+    # unique_colors = sorted(df[col_color].unique())
+    # column_colors = assign_column_colors(unique_colors, color_palette, null_label)
     # * calculate n
     divider = 2 if show_total else 1
@@ -312,6 +323,8 @@ def plot_stacked_bars(
         height=height,
         color_discrete_map=column_colors,  # Use assigned colors
         category_orders={col_index: list(df[col_index].cat.categories)},  # <- Add this line
+        # category_orders={col_index: df[col_index].categories.tolist() if isinstance(df[col_index].dtype, pd.CategoricalDtype) else sorted(df[col_index].unique())}
     )
         # * get longest bar
     bar_max = (
@@ -344,13 +357,14 @@ def plot_stacked_bars(
     if orientation == "h":
         if relative:
             fig.update_xaxes(dtick=5)
-        elif normalize:
-            fig.update_xaxes(dtick=0.05)
+        # bug dticks are ultra dense
+        # elif normalize:
+        #     fig.update_xaxes(dtick=0.05)
     else:
         if relative:
             fig.update_yaxes(dtick=5)
-        elif normalize:
-            fig.update_yaxes(dtick=0.05)
+        # elif normalize:
+        #     fig.update_yaxes(dtick=0.05)
     # * show grids, set to smaller distance on pct scale
     fig.update_xaxes(showgrid=True, gridwidth=1)
@@ -961,7 +975,8 @@ def plot_box(
     fig.show("png")
     if summary:
-        print_summary(ser)
+        # * if only series is provided, col name is None
+        print_summary(ser.to_frame())
     # * save to png if path is provided
     if png_path is not None:
@@ -1175,8 +1190,23 @@ def plot_facet_stacked_bars(
     aggregated_df = aggregate_data(df, top_n_index, top_n_columns, top_n_facet, null_label)
-    facets = aggregated_df['facet'].unique()
-    columns = sorted(aggregated_df['col'].unique())
+    # facets = aggregated_df['facet'].unique()
+    facets = sorted(aggregated_df['facet'].unique())  # Ensure facets are sorted consistently
+    if top_n_columns > 0:
+        top_columns = aggregated_df.groupby('col', observed=True)['value'].sum().nlargest(top_n_columns).index.tolist()
+        # aggregated_df['col'] = aggregated_df['col'].apply(lambda x: x if x in top_columns else "<other>")
+        # aggregated_df['col'] = pd.Categorical(aggregated_df['col'], categories=top_columns + ["<other>"], ordered=True)
+        # aggregated_df['col'] = pd.Categorical(
+        #     aggregated_df['col'].map(lambda x: x if x in top_columns else "<other>"),
+        #     categories=top_columns + ["<other>"],
+        #     ordered=True
+        # )
+        aggregated_df['col'] = aggregated_df['col'].apply(lambda x: x if x in top_columns else "<other>")
+    # columns = sorted(aggregated_df['col'].unique())
+    columns = aggregated_df.groupby('col', observed=True)['value'].sum().sort_values(ascending=False).index.tolist()
     column_colors = assign_column_colors(columns, color_palette, null_label)
     fig = make_subplots(

{pandas_plots-0.12.5 → pandas_plots-0.12.6/src/pandas_plots.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: pandas-plots
-Version: 0.12.5
+Version: 0.12.6
 Summary: A collection of helper for table handling and visualization
 Home-page: https://github.com/smeisegeier/pandas-plots
 Author: smeisegeier

{pandas_plots-0.12.5 → pandas_plots-0.12.6}/src/pandas_plots.egg-info/SOURCES.txt RENAMED Viewed

@@ -10,5 +10,6 @@ src/pandas_plots/ven.py
 src/pandas_plots.egg-info/PKG-INFO
 src/pandas_plots.egg-info/SOURCES.txt
 src/pandas_plots.egg-info/dependency_links.txt
+src/pandas_plots.egg-info/pii.py
 src/pandas_plots.egg-info/requires.txt
 src/pandas_plots.egg-info/top_level.txt