PyPI - pandas-plots - Versions diffs - 0.8.6__py3-none-any.whl → 0.8.8__py3-none-any.whl - Mend

pandas-plots 0.8.6py3-none-any.whl → 0.8.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

pandas_plots/tbl.py CHANGED Viewed

@@ -1,24 +1,25 @@
 import warnings
-warnings.filterwarnings('ignore')
-from scipy import stats
-from typing import Literal
+warnings.filterwarnings("ignore")
+import math
+import os
+from typing import Literal, get_args
 import numpy as np
 import pandas as pd
 import plotly.express as px
-import pandas as pd
-import math
-import os
 from plotly.subplots import make_subplots
+from scipy import stats
 # pd.options.mode.chained_assignment = None
 from . import txt
-# ! check pandas version
-assert pd.__version__ > '2.0.0', 'pandas version must be >= 2.0.0'
+AGG_FUNC=Literal["sum", "mean", "median", "min", "max", "std", "var", "skew", "kurt"]
 def describe_df(
     df: pd.DataFrame,
-    caption: str,
+    caption: str,
     use_plot: bool = True,
     use_columns: bool = True,
     renderer: Literal["png", "svg", None] = "png",
@@ -44,7 +45,7 @@ def describe_df(
     sort_mode (Literal["value", "index"]): sort by value or index
     top_n_uniques (int): number of uniques to display
     top_n_chars_in_index (int): number of characters to display on plot axis
     usage:
     describe_df(
         df=df,
@@ -56,12 +57,12 @@ def describe_df(
         fig_offset=None,
         sort_mode="value",
     )
     hint: skewness may not properly work if the columns is float and/or has only 1 value
     """
     # * copy df, df col types are modified
     df = df.copy()
     # * check if df is empty
     if len(df) == 0:
         print(f"DataFrame is empty!")
@@ -73,10 +74,11 @@ def describe_df(
     print(f"🟣 missings: {dict(df.isna().sum())}")
     print("--- column uniques (all)")
     print(f"🟠 index {txt.wrap(df.index.tolist()[:top_n_uniques])}")
     def get_uniques_header(col: str):
         # * sorting has issues when col is of mixed type (object)
-        if df[col].dtype=='object':
-            df[col]=df[col].astype(str)
+        if df[col].dtype == "object":
+            df[col] = df[col].astype(str)
         # * get unique values
         # unis = df[col].sort_values().unique()
         unis = list(df[col].value_counts().sort_index().index)
@@ -89,16 +91,18 @@ def describe_df(
         _u, _h = get_uniques_header(col)
         if use_columns:
             # * check col type
-            is_str=df.loc[:,col].dtype.kind == 'O'
+            is_str = df.loc[:, col].dtype.kind == "O"
             # * wrap output
-            print(f"{_h} {txt.wrap(_u[:top_n_uniques], max_items_in_line=70, apo=is_str)}")
+            print(
+                f"{_h} {txt.wrap(_u[:top_n_uniques], max_items_in_line=70, apo=is_str)}"
+            )
             # print(f"{_h} {_u[:top_n_uniques]}")
         else:
             print(f"{_h}")
     print("--- column stats (numeric)")
     # * only show numerics
-    for col in df.select_dtypes('number').columns:
+    for col in df.select_dtypes("number").columns:
         _u, _h = get_uniques_header(col)
         # * extra care for scipy metrics, these are very vulnarable to nan
@@ -117,7 +121,7 @@ def describe_df(
     cols = df.iloc[:, :fig_offset].columns
     cols_num = df.select_dtypes(np.number).columns.tolist()
     # cols_str = list(set(df.columns) - set(cols_num))
     # * set constant column count, calc rows
     fig_rows = math.ceil(len(cols) / fig_cols)
@@ -145,17 +149,21 @@ def describe_df(
             figsub = px.box(df, x=col, points="outliers")
         else:
             # * only respect 100 items (fixed value)
-            x=span.iloc[:100].index
-            y=span.iloc[:100].values
+            x = span.iloc[:100].index
+            y = span.iloc[:100].values
             # * cut long strings
-            if x.dtype=='object' and top_n_chars_in_index > 0:
-                x=x.astype(str).tolist()
-                _cut = lambda s: s[:top_n_chars_in_index] + '..' if len(s) > top_n_chars_in_index else s[:top_n_chars_in_index]
-                x=[_cut(item) for item in x]
+            if x.dtype == "object" and top_n_chars_in_index > 0:
+                x = x.astype(str).tolist()
+                _cut = lambda s: (
+                    s[:top_n_chars_in_index] + ".."
+                    if len(s) > top_n_chars_in_index
+                    else s[:top_n_chars_in_index]
+                )
+                x = [_cut(item) for item in x]
             figsub = px.bar(
                 x=x,
                 y=y,
-                )
+            )
         # * grid position
         _row = math.floor((i) / fig_cols) + 1
         _col = i % fig_cols + 1
@@ -164,7 +172,9 @@ def describe_df(
         fig.add_trace(figsub["data"][0], row=_row, col=_col)
     # * set template
-    fig.update_layout(template="plotly_dark" if os.getenv("THEME") == "dark" else "plotly")
+    fig.update_layout(
+        template="plotly_dark" if os.getenv("THEME") == "dark" else "plotly"
+    )
     fig.show(renderer)
@@ -177,12 +187,13 @@ def pivot_df(
     data_bar_axis: Literal["x", "y", "xy", None] = "xy",
     pct_axis: Literal["x", "xy", None] = "xy",
     precision: int = 0,
-    show_totals: bool = True,
-    heatmap_axis: Literal["x","y","xy", None] = None,
+    heatmap_axis: Literal["x", "y", "xy", None] = None,
+    total_mode: AGG_FUNC = "sum",
+    total_axis: Literal["x", "y", "xy", None] = "xy",
 ) -> pd.DataFrame:
     """
     A function to pivot a DataFrame based on specified parameters and return the result as a new DataFrame.
     Args:
         df (pd.DataFrame): The input DataFrame to be pivoted.
         dropna (bool, optional): Whether to drop NaN values. Defaults to False.
@@ -192,14 +203,17 @@ def pivot_df(
         data_bar_axis (Literal["x", "y", "xy", None], optional): The axis for displaying data bars. Defaults to "xy".
         pct_axis (Literal["x", "xy", None], optional): The axis for displaying percentages. Defaults to None.
         precision (int, optional): The precision for displaying values. Defaults to 0.
-        show_totals (bool, optional): Whether to show totals in the result. Defaults to False.
         heatmap_axis (Literal["x","y","xy", None], optional): The axis for displaying heatmaps. Defaults to None.
+        total_mode (Literal["sum", "mean", "median", "min", "max", "std", "var", "skew", "kurt"], optional): The aggregation mode for displaying totals. Defaults to "sum".
+        total_axis (Literal["x", "y", "xy", None], optional): The axis for displaying totals. Defaults to "xy".
     Returns:
         pd.DataFrame: The pivoted DataFrame.
     """
     # * ensure arguments match parameter definition
-    if (pct_axis and pct_axis not in ["x", "xy"]) or (data_bar_axis and  data_bar_axis not in ["x","y","xy"]):
+    if (pct_axis and pct_axis not in ["x", "xy"]) or (
+        data_bar_axis and data_bar_axis not in ["x", "y", "xy"]
+    ):
         print(f"❌ axis not supported")
         return
@@ -210,6 +224,10 @@ def pivot_df(
     if not pd.api.types.is_numeric_dtype(df.iloc[:, 2]):
         print("❌ 3rd column must be numeric")
         return
+    if total_mode and total_mode not in get_args(AGG_FUNC):
+        print(f"❌ total_mode '{total_mode}' not supported")
+        return
     df = df.copy()
@@ -257,25 +275,36 @@ def pivot_df(
     )
     df = df.fillna(0)  # .astype(_type)
-    return show_num_df(df, show_totals=show_totals, data_bar_axis=data_bar_axis, pct_axis=pct_axis, swap=swap, precision=precision, heatmap_axis=heatmap_axis)
+    return show_num_df(
+        df,
+        total_mode=total_mode,
+        total_axis=total_axis,
+        data_bar_axis=data_bar_axis,
+        pct_axis=pct_axis,
+        swap=swap,
+        precision=precision,
+        heatmap_axis=heatmap_axis,
+    )
 def show_num_df(
     df,
-    show_total: bool = False,
-    total_mode: Literal["sum", "mean", "median", "min", "max", "std", "var", "skew", "kurt"] = "sum",
-    heatmap_axis: Literal["x","y","xy", None] = None,
-    data_bar_axis: Literal["x","y","xy", None] = None,
+    total_mode: AGG_FUNC = "sum",
+    total_axis: Literal["x", "y", "xy", None] = "xy",
+    heatmap_axis: Literal["x", "y", "xy", None] = None,
+    data_bar_axis: Literal["x", "y", "xy", None] = None,
     pct_axis: Literal["x", "xy", None] = None,
     swap: bool = False,
-    precision: int=0,
+    precision: int = 0,
 ):
     """
-    A function to display a DataFrame with various options for styling and formatting, including the ability to show totals, apply data bar coloring, and control the display precision.
+    A function to display a DataFrame with various options for styling and formatting, including the ability to show totals, apply data bar coloring, and control the display precision.
     Parameters:
     - df: the DataFrame to display
-    - show_total: a boolean indicating whether to show totals
     - total_mode: a Literal indicating the mode for aggregating totals ["sum", "mean", "median", "min", "max", "std", "var", "skew", "kurt"]
+    - total_axis (Literal["x", "y", "xy", None], optional): The axis for displaying totals. Defaults to "xy".
     - heatmap_axis (Literal["x","y","xy", None], optional): The axis for displaying heatmaps. Defaults to None.
     - data_bar_axis: a Literal indicating the axis for applying data bar coloring ["x","y","xy", None]
     - pct_axis: a Literal indicating the directions for displaying percentages ["x","xy", None]. "x" means sum up pct per column
@@ -285,43 +314,48 @@ def show_num_df(
     The function returns a styled representation of the DataFrame.
     """
     # * ensure arguments match parameter definition
-    if any([df[col].dtype.kind not in ['i','u','f'] for col in df.columns]) == True:
+    if any([df[col].dtype.kind not in ["i", "u", "f"] for col in df.columns]) == True:
         print(f"❌ table must contain numeric data only")
         return
-    if (pct_axis and pct_axis not in ["x", "xy"]) or (data_bar_axis and  data_bar_axis not in ["x","y","xy"]) or (heatmap_axis and heatmap_axis not in ["x","y","xy"]):
+    if (
+        (pct_axis and pct_axis not in ["x", "xy"])
+        or (data_bar_axis and data_bar_axis not in ["x", "y", "xy"])
+        or (heatmap_axis and heatmap_axis not in ["x", "y", "xy"])
+    ):
         print(f"❌ axis not supported")
         return
-    if (total_mode and total_mode not in ["sum", "mean", "median", "min", "max", "std", "var", "skew", "kurt"]) :
-        print(f"❌ total mode '{total_mode}' not supported")
+    if total_mode and total_mode not in get_args(AGG_FUNC):
+        print(f"❌ total_mode '{total_mode}' not supported")
         return
     theme = os.getenv("THEME") or "light"
     # * copy df, do not reference original
     df_ = df.copy() if not swap else df.T.copy()
-    # * alter _df, add totals
-    if show_total:
-        df_.loc["Total"] = df_.agg(total_mode,axis=0)
+    # * alter df_, add totals
+    if total_mode and total_axis in ['x','xy']:
+        df_.loc["Total"] = df_.agg(total_mode, axis=0)
+    if total_mode and total_axis in ['y','xy']:
         df_.loc[:, "Total"] = df_.agg(total_mode, axis=1)
     # * derive style
     out = df_.style
-    color_highlight = "lightblue" if theme == "light" else "darkgrey"
+    color_highlight = "lightblue" if theme == "light" else "#666666"
     color_zeros = "grey" if theme == "light" else "grey"
     color_pct = "grey" if theme == "light" else "yellow"
     color_values = "black" if theme == "light" else "white"
     color_minus = "red" if theme == "light" else "red"
-    cmap_heat="Blues" if theme == "light" else "copper"
+    cmap_heat = "Blues" if theme == "light" else "copper"
     # * apply data bar coloring
     if data_bar_axis:
         out.bar(
             color=f"{color_highlight}",
-            axis= 0 if data_bar_axis == "x" else 1 if data_bar_axis == "y" else None,
+            axis=0 if data_bar_axis == "x" else 1 if data_bar_axis == "y" else None,
         )
     # * all cell formatting in one place
@@ -334,17 +368,18 @@ def show_num_df(
         # * here cell > 0
         if show_pct:
             return f'{cell:_.{precision}f} <span style="color: {color_pct}">({(cell /sum):.1%})</span>'
-        return f'{cell:_.{precision}f}'
+        return f"{cell:_.{precision}f}"
     # * build pct formatting
-    if pct_axis =='x':
+    if pct_axis == "x":
         # * totals on either axis influence the sum
-        divider = 2 if show_total else 1
+        divider = 2 if total_axis in ['x','xy'] else 1
         # * cell formatting to each column instead of altering values w/ df.apply
         # * uses dictionary comprehension, and a lambda function with two input variables
         col_sums = df_.sum() / divider
         formatter = {
-            col: lambda x, col=col: format_cell(x, col_sums[col], pct_axis) for col in df_.columns
+            col: lambda x, col=col: format_cell(x, col_sums[col], pct_axis)
+            for col in df_.columns
         }
     # ? y is not implemented, needs row wise formatting
@@ -354,14 +389,14 @@ def show_num_df(
     #         row: lambda x, row=row: format_cell(x, row_sums[row]) for row in _df.index
     #     }
-    elif pct_axis=='xy':
-        divider = 4 if show_total else 1
+    elif pct_axis == "xy":
+        divider = 4 if total_axis == 'xy' else 2 if total_axis in ['x','y'] else 1
         n = df_.sum().sum() / divider
         formatter = {
             col: lambda x, col=col: format_cell(x, n, pct_axis) for col in df_.columns
         }
     else:
-        # *
+        # *
         formatter = {
             col: lambda x, col=col: format_cell(x, x, False) for col in df_.columns
         }
@@ -369,15 +404,15 @@ def show_num_df(
     out.format(formatter=formatter)
     # * apply fonts for cells
-    out.set_properties(**{'font-family': 'Courier'})
+    out.set_properties(**{"font-family": "Courier"})
     # * apply fonts for th (inkl. index)
-    _props=[
-                # ("font-size", "10pt"),
-                # ("font-weight", "bold"),
-                # ("font-family", "Courier"),
-                ("text-align", "right")
-                ]
+    _props = [
+        # ("font-size", "10pt"),
+        # ("font-weight", "bold"),
+        # ("font-family", "Courier"),
+        ("text-align", "right")
+    ]
     out.set_table_styles(
         [
             dict(selector="th", props=_props),
@@ -386,6 +421,9 @@ def show_num_df(
     )
     if heatmap_axis:
-        out.background_gradient(cmap=cmap_heat, axis=None if heatmap_axis=="xy" else 0 if heatmap_axis=="y" else 1)
+        out.background_gradient(
+            cmap=cmap_heat,
+            axis=None if heatmap_axis == "xy" else 0 if heatmap_axis == "y" else 1,
+        )
-    return out
+    return out

{pandas_plots-0.8.6.dist-info → pandas_plots-0.8.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pandas-plots
-Version: 0.8.6
+Version: 0.8.8
 Summary: A collection of helper for table handling and vizualization
 Home-page: https://github.com/smeisegeier/pandas-plots
 Author: smeisegeier

{pandas_plots-0.8.6.dist-info → pandas_plots-0.8.8.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,11 @@
 pandas_plots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pandas_plots/pls.py,sha256=CUh2lskJ6eLO_ccAg_UXXpRoWvZ7-Q3xKcUSEnKhK9U,23349
 pandas_plots/sql.py,sha256=SHrmwhmzq0QYygvaoKwv7neiwf_Rv87VmdUkADYPdR8,2485
-pandas_plots/tbl.py,sha256=cW5U58SIsfHnqwuNUWFtA2-KEDuAOUJw5SJ9wNrXav4,15302
+pandas_plots/tbl.py,sha256=-Clf01gUetNw3KieqjpFRL0-2MJpIB3mfKU36Tzeij0,16027
 pandas_plots/txt.py,sha256=LnW9OF3mSX2fp9JajefF3Mz3LuCA8MaqlFZYjT_jaQw,1537
 pandas_plots/ven.py,sha256=nDKS7cTIHOJhIXKnAxAkEoqPgVZCUPJld5CvSiB2JC4,11721
-pandas_plots-0.8.6.dist-info/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
-pandas_plots-0.8.6.dist-info/METADATA,sha256=pufU6ACWcupMNBJqCAiHzTTRQfp1GQ1XBZaSvAH23D0,5478
-pandas_plots-0.8.6.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-pandas_plots-0.8.6.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
-pandas_plots-0.8.6.dist-info/RECORD,,
+pandas_plots-0.8.8.dist-info/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
+pandas_plots-0.8.8.dist-info/METADATA,sha256=1r7P6xc2coVUrwDz8b0e5cEwJVw7T38jFnxEnrmZwJE,5478
+pandas_plots-0.8.8.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+pandas_plots-0.8.8.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
+pandas_plots-0.8.8.dist-info/RECORD,,

{pandas_plots-0.8.6.dist-info → pandas_plots-0.8.8.dist-info}/LICENSE RENAMED Viewed

File without changes

{pandas_plots-0.8.6.dist-info → pandas_plots-0.8.8.dist-info}/WHEEL RENAMED Viewed

File without changes

{pandas_plots-0.8.6.dist-info → pandas_plots-0.8.8.dist-info}/top_level.txt RENAMED Viewed

File without changes

pandas-plots 0.8.6__py3-none-any.whl → 0.8.8__py3-none-any.whl

pandas-plots 0.8.6py3-none-any.whl → 0.8.8py3-none-any.whl