PyPI - pandas-plots - Versions diffs - 0.11.14__tar.gz → 0.11.16__tar.gz - Mend

pandas-plots 0.11.14tar.gz → 0.11.16tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

{pandas_plots-0.11.14/src/pandas_plots.egg-info → pandas_plots-0.11.16}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pandas-plots
-Version: 0.11.14
+Version: 0.11.16
 Summary: A collection of helper for table handling and vizualization
 Home-page: https://github.com/smeisegeier/pandas-plots
 Author: smeisegeier
@@ -22,7 +22,7 @@ License-File: LICENSE
 Requires-Dist: pandas>=2.0.0
 Requires-Dist: plotly>=5.18.0
 Requires-Dist: matplotlib>=3.8.2
-Requires-Dist: matplotlib-venn>=0.11.10
+Requires-Dist: matplotlib-venn==0.11.10
 Requires-Dist: seaborn>=0.13.2
 Requires-Dist: Jinja2>=3.1.4
 Requires-Dist: requests>=2.32.0
@@ -83,6 +83,7 @@ tbl.show_num_df(
   - `describe_df()` an alternative version of pandas `describe()` function
   - `descr_db()` a very short descr for a `duckdb` relation
   - `pivot_df()` gets a pivot table of a 3 column dataframe (or 2 columns if no weights are given)
+  - `print_summary()` shows statistics for a pandas DataFrame or Series
 - `pls` for plotly visualizations
   - `plot_box()` auto annotated boxplot w/ violin option

{pandas_plots-0.11.14 → pandas_plots-0.11.16}/README.md RENAMED Viewed

@@ -49,6 +49,7 @@ tbl.show_num_df(
   - `describe_df()` an alternative version of pandas `describe()` function
   - `descr_db()` a very short descr for a `duckdb` relation
   - `pivot_df()` gets a pivot table of a 3 column dataframe (or 2 columns if no weights are given)
+  - `print_summary()` shows statistics for a pandas DataFrame or Series
 - `pls` for plotly visualizations
   - `plot_box()` auto annotated boxplot w/ violin option

{pandas_plots-0.11.14 → pandas_plots-0.11.16}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = pandas-plots
-version = 0.11.14
+version = 0.11.16
 author = smeisegeier
 author_email = dexterDSDo@googlemail.com
 description = A collection of helper for table handling and vizualization
@@ -28,7 +28,7 @@ install_requires =
 	pandas >= 2.0.0
 	plotly >= 5.18.0
 	matplotlib >= 3.8.2
-	matplotlib-venn >= 0.11.10
+	matplotlib-venn == 0.11.10
 	seaborn >= 0.13.2
 	Jinja2 >= 3.1.4
 	requests >= 2.32.0

{pandas_plots-0.11.14 → pandas_plots-0.11.16}/src/pandas_plots/pls.py RENAMED Viewed

@@ -11,6 +11,7 @@ from matplotlib import pyplot as plt
 from plotly import express as px
 from .hlp import *
+from .tbl import print_summary
 def _set_caption(caption: str) -> str:
@@ -757,6 +758,7 @@ def plot_box(
     height: int = 200,
     width: int = 1200,
     annotations: bool = True,
+    summary: bool = True,
     caption: str = None,
     title: str = None,
     violin: bool = False,
@@ -776,6 +778,7 @@ def plot_box(
         violin: Use violin plot or not
         x_min: The minimum value for the x-axis scale (max and min must be set)
         x_max: The maximum value for the x-axis scale (max and min must be set)
+        summary: Whether to add a summary table to the plot
     Returns:
         None
@@ -885,6 +888,8 @@ def plot_box(
         )
     fig.show("png")
+    if summary:
+        print_summary(ser)
     return
@@ -896,6 +901,7 @@ def plot_boxes(
     height: int = 600,
     width: int = 800,
     annotations: bool = True,
+    summary: bool = True,
     title: str = None,
 ) -> None:
     """
@@ -909,6 +915,7 @@ def plot_boxes(
         height (int): The height of the plot.
         width (int): The width of the plot.
         annotations (bool): Whether to add annotations to the plot.
+        summary (bool): Whether to add a summary to the plot.
     Returns:
         None
@@ -1022,6 +1029,8 @@ def plot_boxes(
     fig.update_yaxes(title_text=df.columns[1])
     fig.show("png")
+    if summary:
+        print_summary(df)
     return

{pandas_plots-0.11.14 → pandas_plots-0.11.16}/src/pandas_plots/tbl.py RENAMED Viewed

@@ -112,18 +112,19 @@ def describe_df(
         header = f"🟠 {col}({len(unis):_}|{df[col].dtype})"
         return unis, header
-    # * show all columns
+    # hack this block somehow interferes with the plotly renderer. so its run even when use_columns=False
     if use_columns:
         print("--- column uniques (all)")
         print(f"🟠 index {wrap_text(df.index.tolist()[:top_n_uniques])}")
-        for col in df.columns[:]:
-            _u, _h = get_uniques_header(col)
-            # * check col type
-            is_str = df.loc[:, col].dtype.kind == "O"
-            # * wrap output
-            print(
-                f"{_h} {wrap_text(_u[:top_n_uniques], max_items_in_line=70, use_apo=is_str)}"
-            )
+    for col in df.columns[:]:
+        _u, _h = get_uniques_header(col)
+        # * check col type
+        is_str = df.loc[:, col].dtype.kind == "O"
+        # * wrap output
+        if use_columns:
+                print(
+                    f"{_h} {wrap_text(_u[:top_n_uniques], max_items_in_line=70, use_apo=is_str)}"
+                )
     print("--- column stats (numeric)")
     # * only show numerics
@@ -131,9 +132,10 @@ def describe_df(
         _u, _h = get_uniques_header(col)
         # * extra care for scipy metrics, these are very vulnarable to nan
-        print(
-            f"{_h} min: {round(df[col].min(),3):_} | max: {round(df[col].max(),3):_} | median: {round(df[col].median(),3):_} | mean: {round(df[col].mean(),3):_} | std: {round(df[col].std(),3):_} | cv: {round(df[col].std() / df[col].mean(),3):_} | sum: {round(df[col].sum(),3):_} | skew: {round(stats.skew(df[col].dropna().tolist()),3)} | kurto: {round(stats.kurtosis(df[col].dropna().tolist()),3)}"
-        )
+        # print(
+        #     f"{_h} min: {round(df[col].min(),3):_} | max: {round(df[col].max(),3):_} | median: {round(df[col].median(),3):_} | mean: {round(df[col].mean(),3):_} | std: {round(df[col].std(),3):_} | cv: {round(df[col].std() / df[col].mean(),3):_} | sum: {round(df[col].sum(),3):_} | skew: {round(stats.skew(df[col].dropna().tolist()),3)} | kurto: {round(stats.kurtosis(df[col].dropna().tolist()),3)}"
+        # )
+        print_summary(df[col], _h)
     #  * show first 3 rows
     display(df[:3])
@@ -619,3 +621,57 @@ def show_num_df(
         )
     return out
+def print_summary(df: pd.DataFrame | pd.Series, name: str="🟠 "):
+    """
+    Print statistical summary for a pandas DataFrame or Series.
+    The function computes and prints various statistics for each numeric column in a DataFrame
+    or for a Series. Statistics include minimum, lower bound, 25th percentile (Q1), median, mean,
+    75th percentile (Q3), upper bound, maximum, standard deviation, coefficient of variation,
+    sum, skewness, and kurtosis. The interquartile range (IQR) is used to compute the lower
+    and upper bounds, which are adjusted not to exceed the min and max of the data.
+    Args:
+        df (Union[pd.DataFrame, pd.Series]): Input DataFrame or Series. Only numeric columns
+        in DataFrame are considered.
+    """
+    if df.empty:
+        return
+    def print_summary_ser(ser: pd.Series, name: str=""):
+        # Calculate IQR and pass `rng=(25, 75)` to get the interquartile range
+        iqr_value = stats.iqr(ser)
+        # Using the iqr function, we still calculate the bounds manually
+        q1 = stats.scoreatpercentile(ser, 25)
+        q3 = stats.scoreatpercentile(ser, 75)
+        # Calculate upper bound directly
+        min = round(ser.min(),3)
+        med = round(ser.median(),3)
+        upper = round(q3 + 1.5 * iqr_value,3)
+        lower = round(q1 - 1.5 * iqr_value,3)
+        mean = round(ser.mean(),3)
+        std = round(ser.std(),3)
+        cv = round(ser.std() / ser.mean(),3)
+        max = round(ser.max(),3)
+        sum = round(ser.sum(),3)
+        skew = round(stats.skew(ser.dropna().tolist()),3)
+        kurto = round(stats.kurtosis(ser.dropna().tolist()),3)
+        lower = min if lower < min else lower
+        upper = max if upper > max else upper
+        # * extra care for scipy metrics, these are very vulnarable to nan
+        print(
+            f"""{name} min: {min:_} | lower: {lower:_} | q25: {q1:_} | median: {med:_} | mean: {mean:_} | q75: {q3:_} | upper: {upper:_} | max: {max:_} | std: {std:_} | cv: {cv:_} | sum: {sum:_} | skew: {skew} | kurto: {kurto}""")
+    if isinstance(df, pd.Series):
+        print_summary_ser(df, name)
+        return
+    if isinstance(df, pd.DataFrame):
+        # * only show numerics
+        for col in df.select_dtypes("number").columns:
+            print_summary_ser(ser=df[col], name=col)
+    return

{pandas_plots-0.11.14 → pandas_plots-0.11.16/src/pandas_plots.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pandas-plots
-Version: 0.11.14
+Version: 0.11.16
 Summary: A collection of helper for table handling and vizualization
 Home-page: https://github.com/smeisegeier/pandas-plots
 Author: smeisegeier
@@ -22,7 +22,7 @@ License-File: LICENSE
 Requires-Dist: pandas>=2.0.0
 Requires-Dist: plotly>=5.18.0
 Requires-Dist: matplotlib>=3.8.2
-Requires-Dist: matplotlib-venn>=0.11.10
+Requires-Dist: matplotlib-venn==0.11.10
 Requires-Dist: seaborn>=0.13.2
 Requires-Dist: Jinja2>=3.1.4
 Requires-Dist: requests>=2.32.0
@@ -83,6 +83,7 @@ tbl.show_num_df(
   - `describe_df()` an alternative version of pandas `describe()` function
   - `descr_db()` a very short descr for a `duckdb` relation
   - `pivot_df()` gets a pivot table of a 3 column dataframe (or 2 columns if no weights are given)
+  - `print_summary()` shows statistics for a pandas DataFrame or Series
 - `pls` for plotly visualizations
   - `plot_box()` auto annotated boxplot w/ violin option

{pandas_plots-0.11.14 → pandas_plots-0.11.16}/src/pandas_plots.egg-info/requires.txt RENAMED Viewed

@@ -1,7 +1,7 @@
 pandas>=2.0.0
 plotly>=5.18.0
 matplotlib>=3.8.2
-matplotlib-venn>=0.11.10
+matplotlib-venn==0.11.10
 seaborn>=0.13.2
 Jinja2>=3.1.4
 requests>=2.32.0