PyPI - pandas-plots - Versions diffs - 0.12.4__py3-none-any.whl → 0.12.6__py3-none-any.whl - Mend

pandas-plots 0.12.4py3-none-any.whl → 0.12.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

pandas_plots/pls.py CHANGED Viewed

@@ -78,7 +78,7 @@ def assign_column_colors(columns, color_palette, null_label):
         raise ValueError(f"Invalid color palette: {color_palette}")
     colors = {col: palette[i % len(palette)] for i, col in enumerate(sorted(columns))}
-    colors[null_label] = "gray"
+    colors[null_label] = "lightgray"
     return colors
 ### main functions
@@ -192,12 +192,32 @@ def plot_stacked_bars(
 ) -> object:
     """
     Generates a stacked bar plot using the provided DataFrame.
-    Updated to assign colors using `assign_column_colors` with nulls colored grey.
     Parameters:
-    All parameters are similar to the original function, with the addition of:
-    - color_palette: str - Name of the color palette.
-    - null_label: str - Label for null values.
+    - df (pd.DataFrame): The input DataFrame with at least two categorical columns and one numerical column.
+    - top_n_index (int): Limit the number of categories displayed on the index axis.
+    - top_n_color (int): Limit the number of categories displayed in the color legend.
+    - dropna (bool): If True, removes rows with missing values; otherwise, replaces them with `null_label`.
+    - swap (bool): If True, swaps the first two columns.
+    - normalize (bool): If True, normalizes numerical values between 0 and 1.
+    - relative (bool): If True, normalizes the bars to a percentage scale.
+    - orientation (Literal["h", "v"]): Defines the orientation of the bars ("v" for vertical, "h" for horizontal).
+    - height (int): Height of the plot.
+    - width (int): Width of the plot.
+    - title (str): Custom title for the plot.
+    - renderer (Literal["png", "svg", None]): Defines the output format.
+    - caption (str): Optional caption for additional context.
+    - sort_values (bool):
+        - If True, sorts bars by the sum of their values (descending).
+        - If False, sorts bars alphabetically.
+    - show_total (bool): If True, adds a row with the total sum of all categories.
+    - precision (int): Number of decimal places for numerical values.
+    - png_path (Path | str): If specified, saves the plot as a PNG file.
+    - color_palette (str): Name of the color palette to use.
+    - null_label (str): Label for null values.
+    Returns:
+    - A Plotly figure object representing the stacked bar chart.
     """
     BAR_LENGTH_MULTIPLIER = 1.05
@@ -211,6 +231,8 @@ def plot_stacked_bars(
         print("❌ first 2 columns must be str")
         return
+    df = df.copy()  # Copy the input DataFrame to avoid modifying the original
     # * add count column[2] as a service if none is present
     if len(df.columns) == 2:
         df["cnt"] = 1
@@ -234,16 +256,6 @@ def plot_stacked_bars(
     col_index = df.columns[0] if not swap else df.columns[1]
     col_color = df.columns[1] if not swap else df.columns[0]
-    # * assign colors to columns
-    unique_colors = sorted(df[col_color].unique())
-    column_colors = assign_column_colors(unique_colors, color_palette, null_label)
-    # * add total as aggregation of df
-    if show_total:
-        df_total = df.copy()
-        df_total[col_index] = " TOTAL"  # add space to make this item first
-        df = pd.concat([df, df_total])
     # * ensure df is grouped to prevent false aggregations
     df = (
         df.groupby([df.columns[0], df.columns[1]])
@@ -251,6 +263,39 @@ def plot_stacked_bars(
         .sum()
         .reset_index()
     )
+    # * Sorting logic based on sort_values
+    if sort_values:
+        sort_order = (
+            df.groupby(col_index)[df.columns[2]].sum().sort_values(ascending=False).index
+        )
+    else:
+        sort_order = sorted(df[col_index].unique())  # Alphabetical order
+    df[col_index] = pd.Categorical(df[col_index], categories=sort_order, ordered=True)
+    # * add total as aggregation of df
+    if show_total:
+        df_total = df.copy()
+        df_total[col_index] = " TOTAL"  # add space to make this item first
+        df = pd.concat([df, df_total])
+    # * Convert to categorical with explicit ordering
+    df[col_index] = pd.Categorical(df[col_index], categories=sort_order, ordered=True)
+    if top_n_index > 0 and len(sort_order) > top_n_index:
+        top_categories = sort_order[:top_n_index]
+        df[col_index] = df[col_index].apply(lambda x: x if x in top_categories else "<other>")
+    unique_colors = sorted(df[col_color].unique())
+    if top_n_color > 0 and len(unique_colors) > top_n_color:
+        top_colors = unique_colors[:top_n_color]
+        df[col_color] = df[col_color].apply(lambda x: x if x in top_colors else "<other>")
+    column_colors = assign_column_colors(sorted(df[col_color].unique()), color_palette, null_label)
+    # # * assign colors to columns
+    # unique_colors = sorted(df[col_color].unique())
+    # column_colors = assign_column_colors(unique_colors, color_palette, null_label)
     # * calculate n
     divider = 2 if show_total else 1
@@ -264,7 +309,7 @@ def plot_stacked_bars(
     caption = _set_caption(caption)
     # * plot
-    _fig = px.bar(
+    fig = px.bar(
         df,
         x=col_index if orientation == "v" else df.columns[2],
         y=df.columns[2] if orientation == "v" else col_index,
@@ -277,6 +322,9 @@ def plot_stacked_bars(
         width=width,
         height=height,
         color_discrete_map=column_colors,  # Use assigned colors
+        category_orders={col_index: list(df[col_index].cat.categories)},  # <- Add this line
+        # category_orders={col_index: df[col_index].categories.tolist() if isinstance(df[col_index].dtype, pd.CategoricalDtype) else sorted(df[col_index].unique())}
     )
         # * get longest bar
     bar_max = (
@@ -286,14 +334,14 @@ def plot_stacked_bars(
     # * ignore if bar mode is on
     if not relative:
         if orientation == "v":
-            _fig.update_yaxes(range=[0, bar_max])
+            fig.update_yaxes(range=[0, bar_max])
         else:
-            _fig.update_xaxes(range=[0, bar_max])
+            fig.update_xaxes(range=[0, bar_max])
     else:
-        _fig.update_layout(barnorm="percent")
+        fig.update_layout(barnorm="percent")
     # * set title properties
-    _fig.update_layout(
+    fig.update_layout(
         title={
             # 'x': 0.1,
             "y": 0.95,
@@ -308,27 +356,27 @@ def plot_stacked_bars(
     # * set dtick
     if orientation == "h":
         if relative:
-            _fig.update_xaxes(dtick=5)
-        elif normalize:
-            _fig.update_xaxes(dtick=0.05)
+            fig.update_xaxes(dtick=5)
+        # bug dticks are ultra dense
+        # elif normalize:
+        #     fig.update_xaxes(dtick=0.05)
     else:
         if relative:
-            _fig.update_yaxes(dtick=5)
-        elif normalize:
-            _fig.update_yaxes(dtick=0.05)
+            fig.update_yaxes(dtick=5)
+        # elif normalize:
+        #     fig.update_yaxes(dtick=0.05)
     # * show grids, set to smaller distance on pct scale
-    _fig.update_xaxes(showgrid=True, gridwidth=1)
-    _fig.update_yaxes(showgrid=True, gridwidth=1)
+    fig.update_xaxes(showgrid=True, gridwidth=1)
+    fig.update_yaxes(showgrid=True, gridwidth=1)
     # * save to png if path is provided
     if png_path is not None:
-        _fig.write_image(Path(png_path).as_posix())
-    _fig.show(renderer)
+        fig.write_image(Path(png_path).as_posix())
-    return _fig
+    fig.show(renderer=renderer)
+    return fig
 def plot_bars(
@@ -927,7 +975,8 @@ def plot_box(
     fig.show("png")
     if summary:
-        print_summary(ser)
+        # * if only series is provided, col name is None
+        print_summary(ser.to_frame())
     # * save to png if path is provided
     if png_path is not None:
@@ -1141,8 +1190,23 @@ def plot_facet_stacked_bars(
     aggregated_df = aggregate_data(df, top_n_index, top_n_columns, top_n_facet, null_label)
-    facets = aggregated_df['facet'].unique()
-    columns = sorted(aggregated_df['col'].unique())
+    # facets = aggregated_df['facet'].unique()
+    facets = sorted(aggregated_df['facet'].unique())  # Ensure facets are sorted consistently
+    if top_n_columns > 0:
+        top_columns = aggregated_df.groupby('col', observed=True)['value'].sum().nlargest(top_n_columns).index.tolist()
+        # aggregated_df['col'] = aggregated_df['col'].apply(lambda x: x if x in top_columns else "<other>")
+        # aggregated_df['col'] = pd.Categorical(aggregated_df['col'], categories=top_columns + ["<other>"], ordered=True)
+        # aggregated_df['col'] = pd.Categorical(
+        #     aggregated_df['col'].map(lambda x: x if x in top_columns else "<other>"),
+        #     categories=top_columns + ["<other>"],
+        #     ordered=True
+        # )
+        aggregated_df['col'] = aggregated_df['col'].apply(lambda x: x if x in top_columns else "<other>")
+    # columns = sorted(aggregated_df['col'].unique())
+    columns = aggregated_df.groupby('col', observed=True)['value'].sum().sort_values(ascending=False).index.tolist()
     column_colors = assign_column_colors(columns, color_palette, null_label)
     fig = make_subplots(
@@ -1189,17 +1253,17 @@ def plot_facet_stacked_bars(
     unique_rows = len(aggregated_df)
     axis_details = []
     if top_n_index > 0:
-        axis_details.append(f"top {top_n_index} [{original_column_names[0]}]")
+        axis_details.append(f"TOP {top_n_index} [{original_column_names[0]}]")
     else:
         axis_details.append(f"[{original_column_names[0]}]")
     if top_n_columns > 0:
-        axis_details.append(f"top {top_n_columns} [{original_column_names[1]}]")
+        axis_details.append(f"TOP {top_n_columns} [{original_column_names[1]}]")
     else:
         axis_details.append(f"[{original_column_names[1]}]")
     if top_n_facet > 0:
-        axis_details.append(f"top {top_n_facet} [{original_column_names[2]}]")
+        axis_details.append(f"TOP {top_n_facet} [{original_column_names[2]}]")
     else:
         axis_details.append(f"[{original_column_names[2]}]")
@@ -1218,6 +1282,6 @@ def plot_facet_stacked_bars(
         png_path = Path(png_path)
         fig.write_image(str(png_path))
-    fig.show(renderer)
+    fig.show(renderer=renderer)
     return fig

{pandas_plots-0.12.4.dist-info → pandas_plots-0.12.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: pandas-plots
-Version: 0.12.4
+Version: 0.12.6
 Summary: A collection of helper for table handling and visualization
 Home-page: https://github.com/smeisegeier/pandas-plots
 Author: smeisegeier
@@ -20,7 +20,7 @@ Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: pandas>=2.0.0
-Requires-Dist: plotly>=5.18.0
+Requires-Dist: plotly<6
 Requires-Dist: matplotlib>=3.8.2
 Requires-Dist: matplotlib-venn==0.11.10
 Requires-Dist: seaborn>=0.13.2
@@ -96,7 +96,7 @@ tbl.show_num_df(
   - `plot_histogram()` histogram for one or more **numerical** columns
   - `plot_joints()` a joint plot for **exactly two numerical** columns
   - `plot_quadrants()` quickly shows a 2x2 heatmap
-  - 🆕 `plot_stacked_bars()` shows stacked bars for a facet value as subplots
+  - 🆕 `plot_facet_stacked_bars()` shows stacked bars for a facet value as subplots
 <br>
 - `ven` offers functions for _venn diagrams_

pandas_plots-0.12.6.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+pandas_plots/hlp.py,sha256=N6NrbFagVMMX-ZnV0rIBEz82SeSoOkksfMcCap55W7E,16588
+pandas_plots/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
+pandas_plots/pls.py,sha256=DsFnWbGNmMnZ8a2qnZFwXH6VekwPFaIwZEQ9TVp6xCg,43997
+pandas_plots/tbl.py,sha256=4VvjLisPT1gSvgsLClcrhC7LIJ-_FPNla8nomGflGag,30509
+pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
+pandas_plots-0.12.6.dist-info/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
+pandas_plots-0.12.6.dist-info/METADATA,sha256=-mCMgoWTwG6HSL8JtuYvwM1LCkzglJm3aIocaUMijO4,7358
+pandas_plots-0.12.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+pandas_plots-0.12.6.dist-info/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
+pandas_plots-0.12.6.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
+pandas_plots-0.12.6.dist-info/RECORD,,

pandas_plots-0.12.6.dist-info/pii.py ADDED Viewed

@@ -0,0 +1,76 @@
+import pandas as pd
+import re
+def remove_pii(
+    series: pd.Series,
+    verbose: bool = True,
+    logging: bool = False,
+    custom_regex="",
+) -> pd.Index:
+    """
+    Remove personally identifiable information (PII) from the given column.
+    Parameters:
+    - series: A pandas Series representing a column in a DataFrame.
+    - verbose: If True, print pii items
+    - logging: If True, write pii items into the file .pii.log
+    - custom_regex: Regex that is injected into detection
+    Returns:
+    - index object with indexes of all pii items
+    Remarks:
+    - df.drop(axis=0, index=result, inplace=True)
+    """
+    # * reject empty columns
+    assert len(series) > 0
+    col = series.copy()
+    # * na must be dropped to ensure processsing
+    col.dropna(inplace=True)
+    # * find terms
+    _terms = frozenset(["lösch", "herr", "frau", "strasse", "klinik"])
+    idx_terms = col[
+        col.str.contains(
+            "|".join(_terms),
+            case=False,
+            regex=True,
+        )
+    ].index
+    # # * optional: search for terms in whole df
+    # df.apply(lambda row: row.astype(str).str.contains('test', case=False, regex=True).any(), axis=1)
+    # # * find dates
+    ptr_date = r"\d{2}\.\d{2}\.\d{4}"
+    idx_date = col[col.str.contains(ptr_date, regex=True)].index
+    # * dr
+    ptr_dr = r"[D|d][R|r]\. | Fr\. | Hr\. | PD "
+    idx_dr = col[col.str.contains(ptr_dr, regex=True)].index
+    # * custom
+    idx_custom = (
+        col[col.str.contains(custom_regex, regex=True)].index
+        if custom_regex
+        else pd.Index([])
+    )
+    idx_all = idx_terms.union(idx_date).union(idx_dr).union(idx_custom)
+    if verbose:
+        # print(f"found: {idx_dr.__len__()} dr | {idx_date.__len__()} date | {idx_terms.__len__()} terms")
+        print(f"found {idx_all.__len__():_} pii items:")
+        print(col.loc[idx_all].tolist())
+    if logging:  # Assuming logging is defined and has the correct value
+        data = col.loc[idx_all]  # Assuming col and idx_all are defined
+        with open(".pii.log", "w") as f:
+            # ! when using str(), it will give only a summary!
+            f.write(data.to_string(index=True))
+    return idx_all

pandas_plots-0.12.4.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-pandas_plots/hlp.py,sha256=N6NrbFagVMMX-ZnV0rIBEz82SeSoOkksfMcCap55W7E,16588
-pandas_plots/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
-pandas_plots/pls.py,sha256=isveg6_frLZC3Gt3VEsdOLiLw7aTf3riUahmJLHiEq8,40265
-pandas_plots/tbl.py,sha256=4VvjLisPT1gSvgsLClcrhC7LIJ-_FPNla8nomGflGag,30509
-pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
-pandas_plots-0.12.4.dist-info/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
-pandas_plots-0.12.4.dist-info/METADATA,sha256=WZUfWOid_eYMtuS2V_P_C_ChaD1dTqDfuectlxzAJe8,7358
-pandas_plots-0.12.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-pandas_plots-0.12.4.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
-pandas_plots-0.12.4.dist-info/RECORD,,

{pandas_plots-0.12.4.dist-info → pandas_plots-0.12.6.dist-info}/LICENSE RENAMED Viewed

File without changes

{pandas_plots-0.12.4.dist-info → pandas_plots-0.12.6.dist-info}/WHEEL RENAMED Viewed

File without changes

{pandas_plots-0.12.4.dist-info → pandas_plots-0.12.6.dist-info}/top_level.txt RENAMED Viewed

File without changes

pandas-plots 0.12.4__py3-none-any.whl → 0.12.6__py3-none-any.whl

pandas-plots 0.12.4py3-none-any.whl → 0.12.6py3-none-any.whl