PyPI - pandas-plots - Versions diffs - 0.11.22__py3-none-any.whl → 0.11.24__py3-none-any.whl - Mend

pandas-plots 0.11.22py3-none-any.whl → 0.11.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

pandas_plots/hlp.py CHANGED Viewed

@@ -1,19 +1,19 @@
-import pandas as pd
-import numpy as np
-import scipy.stats
 import importlib.metadata as md
-from platform import python_version
-from typing import Literal, List
-from enum import Enum, auto
-import platform
 import os
+import platform
+import re
+from enum import Enum, auto
 from io import BytesIO
+from platform import python_version
+from typing import List, Literal
+import duckdb as ddb
+import numpy as np
+import pandas as pd
+import requests
+import scipy.stats
 from matplotlib import pyplot as plt
 from PIL import Image
-import requests
-import re
 # from devtools import debug
@@ -32,7 +32,7 @@ def mean_confidence_interval(df, confidence=0.95):
     Returns:
     tuple: A tuple containing the mean, interval, lower bound, and upper bound.
     """
-    df = df_to_series(df)
+    df = to_series(df)
     if df is None:
         return None
     a = 1.0 * np.array(df)
@@ -53,7 +53,7 @@ def mean_confidence_interval(df, confidence=0.95):
     #     return dist.mean - h, dist.mean + h
-def df_to_series(df) -> pd.Series | None:
+def to_series(df) -> pd.Series | None:
     """
     Converts a pandas DataFrame to a pandas Series.
@@ -103,6 +103,10 @@ def df_to_series(df) -> pd.Series | None:
         s.name = _data_col.name
         return s
+# * extend objects to enable chaining
+pd.DataFrame.to_series = to_series
+pd.Series.to_series = to_series
 def replace_delimiter_outside_quotes(
     input: str, delimiter_old: str = ",", delimiter_new: str = ";", quotechar: str = '"'
@@ -234,6 +238,26 @@ def create_barcode_from_url(
 def add_datetime_columns(df: pd.DataFrame, date_column: str = None) -> pd.DataFrame:
+    """
+    Add datetime columns to a given DataFrame.
+    Adds the following columns to the given DataFrame:
+        - YYYY: Year of date_column
+        - MM: Month of date_column
+        - Q: Quarter of date_column
+        - YYYY-MM: Year-month of date_column
+        - YYYYQ: Year-quarter of date_column
+        - YYYY-WW: Year-week of date_column
+        - DDD: Day of the week of date_column
+    Args:
+        df (pd.DataFrame): The DataFrame to add datetime columns to.
+        date_column (str, optional): The column to base the added datetime columns off of. Defaults to None.
+    Returns:
+        pd.DataFrame: The DataFrame with the added datetime columns.
+        This command can be chained.
+    """
     df_ = df.copy()
     if not date_column:
         date_column = [
@@ -269,6 +293,9 @@ def add_datetime_columns(df: pd.DataFrame, date_column: str = None) -> pd.DataFr
     return df_
+# * extend objects to enable chaining
+pd.DataFrame.add_datetime_columns = add_datetime_columns
 def show_package_version(
     packages: list[str] = None,
@@ -289,7 +316,7 @@ def show_package_version(
     # ! avoid empty list in signature, it will NOT be empty in runtime
     if packages is None:
         packages = []
     if not isinstance(packages, List):
         print(f"❌ A list of str must be provided")
         return
@@ -315,6 +342,7 @@ def show_package_version(
     print(out)
     return
 class OperatingSystem(Enum):
     WINDOWS = auto()
     LINUX = auto()
@@ -333,7 +361,7 @@ def get_os(is_os: OperatingSystem = None, verbose: bool = False) -> bool | str:
             - OperatingSystem.MAC
     Returns:
-        bool: True if the desired operating system matches the current operating system, False otherwise.
+        bool: True if the desired operating system matches the current operating system, False otherwise.
         str: Returns the current operating system (platform.system()) if is_os is None.
     """
     if verbose:
@@ -352,3 +380,90 @@ def get_os(is_os: OperatingSystem = None, verbose: bool = False) -> bool | str:
         return True
     else:
         return False
+def add_bitmask_label(
+    data: pd.DataFrame | pd.Series | ddb.DuckDBPyRelation,
+    bitmask_col: str,
+    labels: list[str],
+    separator: str = "|",
+    zero_code: str = "-",
+    keep_col: bool = True,
+    con: ddb.DuckDBPyConnection = None,
+) -> pd.DataFrame | ddb.DuckDBPyRelation:
+    """
+    adds a column to the data (DataFrame, Series, or DuckDB Relation) that resolves a bitmask column into human-readable labels.
+    - bitmask_col must have been generated before. its value must be constructed as a bitmask, e.g:
+    - a red, green, blue combination is rendered into binary 110, which means it has green and blue
+    - its value is 6, which will resolved into "g|b" if the list ["r","g","b"] is given
+    if the bitmask value is 0, it will be replaced with the zero_code.
+    the method can be chained in pandas as well as in duckdb: df.add_bitmask_label(...)
+    Parameters:
+    - data (pd.DataFrame | pd.Series | duckdb.DuckDBPyRelation): Input data.
+    - bitmask_col (str): The name of the column containing bitmask values (ignored if input is Series).
+    - labels (list[str]): Labels corresponding to the bits, in the correct order.
+    - separator (str): Separator for combining labels. Default is "|".
+    - zero_code (str): Value to return for bitmask value 0. Default is "-".
+    - keep_col (bool): If True, retains the bitmask column. If False, removes it. Default is True.
+    - con (duckdb.Connection): DuckDB connection object. Required if data is a DuckDB Relation.
+    Returns:
+    - pd.DataFrame | duckdb.DuckDBPyRelation: The modified data with the new column added.
+    """
+    # * check possible input formats
+    if isinstance(data, ddb.DuckDBPyRelation):
+        if con is None:
+            raise ValueError(
+                "A DuckDB connection must be provided when the input is a DuckDB Relation."
+            )
+        data = data.df()  # * Convert DuckDB Relation to DataFrame
+    if isinstance(data, pd.Series):
+        bitmask_col = data.name if data.name else "bitmask"
+        data = data.to_frame(name=bitmask_col)
+    if not isinstance(data, pd.DataFrame):
+        raise ValueError(
+            "Input must be a pandas DataFrame, Series, or DuckDB Relation."
+        )
+    # * get max allowed value by bitshift, eg for 4 labels its 2^4 -1 = 15
+    max_allowable_value = (1 << len(labels)) - 1
+    # * compare against max in col
+    max_value_in_column = data[bitmask_col].max()
+    if max_value_in_column > max_allowable_value:
+        raise ValueError(
+            f"The maximum value in column '{bitmask_col}' ({max_value_in_column}) exceeds "
+            f"the maximum allowable value for {len(labels)} labels ({max_allowable_value}). "
+            f"Ensure the number of labels matches the possible bitmask range."
+        )
+    # ? Core logic
+    # * exit if 0
+    def decode_bitmask(value):
+        if value == 0:
+            return zero_code
+        # * iterate over each value as bitfield, on binary 1 fetch assigned label from [labels]
+        return separator.join(
+            [label for i, label in enumerate(labels) if value & (1 << i)]
+        )
+    label_col = f"{bitmask_col}_label"
+    data[label_col] = data[bitmask_col].apply(decode_bitmask)
+    # * drop value col if not to be kept
+    if not keep_col:
+        data = data.drop(columns=[bitmask_col])
+    # * Convert back to DuckDB Relation if original input was a Relation
+    if isinstance(data, pd.DataFrame) and con is not None:
+        return con.from_df(data)
+    return data
+# * extend objects to enable chaining
+pd.DataFrame.add_bitmask_label = add_bitmask_label
+ddb.DuckDBPyRelation.add_bitmask_label = add_bitmask_label

pandas_plots/pls.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from pathlib import Path
 import warnings
 warnings.filterwarnings("ignore")
@@ -22,7 +23,8 @@ def plot_quadrants(
     df: pd.DataFrame,
     title: str = None,
     caption: str = None,
-) -> None:
+    png_path: Path | str = None,
+) -> object:
     """
     Plot a heatmap for the given dataframe, with options for title and caption.
@@ -35,6 +37,7 @@ def plot_quadrants(
             df columns must contain 2 values
         title (str, optional): The title for the heatmap to override the default.
         caption (str, optional): The caption for the heatmap. Defaults to None.
+        png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
     Returns:
         q1, q2, q3, q4, n: The values for each quadrant and the total count.
@@ -93,6 +96,10 @@ def plot_quadrants(
     q3 = heat_wide_out.iloc[0, 0]
     q4 = heat_wide_out.iloc[0, 1]
+    # * save to png if path is provided
+    if png_path is not None:
+        plt.savefig(Path(png_path).as_posix(), format='png')
     return q1, q2, q3, q4, n
     # * plotly express is not used for the heatmap, although it does not need the derived wide format.
     # * but theres no option to alter inner values in the heatmap
@@ -115,7 +122,8 @@ def plot_stacked_bars(
     sort_values: bool = False,
     show_total: bool = False,
     precision: int = 0,
-) -> None:
+    png_path: Path | str = None,
+) -> object:
     """
     Generates a stacked bar plot using the provided DataFrame.
     df *must* comprise the columns (order matters):
@@ -140,9 +148,10 @@ def plot_stacked_bars(
     - sort_values: bool = False - Sort axis by index (default) or values
     - show_total: bool = False - Whether to show the total value
     - precision: int = 0 - The number of decimal places to round to
+    - png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
     Returns:
-    None
+    plot object
     """
     BAR_LENGTH_MULTIPLIER = 1.05
@@ -322,7 +331,12 @@ def plot_stacked_bars(
         _fig.update_layout(yaxis={"categoryorder": "category descending"})
     _fig.show(renderer)
-    return
+    # * save to png if path is provided
+    if png_path is not None:
+        _fig.write_image(Path(png_path).as_posix())
+    return _fig
 def plot_bars(
@@ -340,7 +354,8 @@ def plot_bars(
     use_ci: bool = False,
     precision: int = 0,
     renderer: Literal["png", "svg", None] = "png",
-) -> None:
+    png_path: Path | str = None,
+) -> object:
     """
     A function to plot a bar chart based on a *categorical* column (must be string or bool) and a numerical value.
     Accepts:
@@ -366,9 +381,10 @@ def plot_bars(
         - enforces dropna=True
     - precision: An integer indicating the number of decimal places to round the values to. Default is 0.
     - renderer: A string indicating the renderer to use for displaying the chart. It can be "png", "svg", or None. Default is "png".
+    - png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
     Returns:
-    - None
+    - plot object
     """
     # * if series, apply value_counts, deselect use_ci
     if isinstance(df_in, pd.Series):
@@ -568,7 +584,12 @@ def plot_bars(
         textposition="outside" if not use_ci else "auto", error_y=dict(thickness=5)
     )
     _fig.show(renderer)
-    return
+    # * save to png if path is provided
+    if png_path is not None:
+        _fig.write_image(Path(png_path).as_posix())
+    return _fig
 def plot_histogram(
@@ -586,7 +607,8 @@ def plot_histogram(
     renderer: Literal["png", "svg", None] = "png",
     caption: str = None,
     title: str = None,
-) -> None:
+    png_path: Path | str = None,
+) -> object:
     """
     A function to plot a histogram based on *numeric* columns in a DataFrame.
     Accepts:
@@ -606,16 +628,18 @@ def plot_histogram(
         renderer (Literal["png", "svg", None]): The renderer for displaying the plot. Default is "png".
         caption (str): The caption for the plot. Default is None.
         title (str): The title of the plot. Default is None.
+        png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
     Returns:
-        None
+        plot object
     """
     # * convert to df if series
     if isinstance(df_ser, pd.Series):
         df = df_ser.to_frame()
     else:
-        df=df_ser
+        df = df_ser
     col_not_num = df.select_dtypes(exclude="number").columns
     if any(col_not_num):
@@ -628,7 +652,7 @@ def plot_histogram(
     df = df.applymap(lambda x: round(x, precision))
     # ! plot
-    _caption=_set_caption(caption)
+    _caption = _set_caption(caption)
     fig = px.histogram(
         data_frame=df,
         histnorm=histnorm,
@@ -653,11 +677,16 @@ def plot_histogram(
                 "size": 24,
             },
         },
-        showlegend=False if df.shape[1]==1 else True,
+        showlegend=False if df.shape[1] == 1 else True,
     )
     fig.show(renderer)
-    return
+    # * save to png if path is provided
+    if png_path is not None:
+        fig.write_image(Path(png_path).as_posix())
+    return fig
 def plot_joint(
@@ -668,7 +697,8 @@ def plot_joint(
     dropna: bool = False,
     caption: str = "",
     title: str = "",
-) -> None:
+    png_path: Path | str = None,
+) -> object:
     """
     Generate a seaborn joint plot for *two numeric* columns of a given DataFrame.
@@ -680,9 +710,10 @@ def plot_joint(
         - dropna: Whether to drop NA values before plotting (default is False).
         - caption: A caption for the plot.
         - title: The title of the plot.
+        - png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
     Returns:
-        None
+        plot object
     """
     if df.shape[1] != 2:
@@ -702,7 +733,7 @@ def plot_joint(
     # * set theme and palette
     sb.set_theme(style="darkgrid", palette="tab10")
     if os.getenv("THEME") == "dark":
-        _style = "dark_background"
+        _style = "dark_background"
         _cmap = "rocket"
     else:
         _style = "bmh"
@@ -720,19 +751,21 @@ def plot_joint(
         "dropna": dropna,
         # "title": f"{caption}[{ser.name}], n = {len(ser):_}" if not title else title,
     }
-    dict_hex={"cmap": _cmap}
-    dict_kde={"fill": True, "cmap": _cmap}
-    if kind=="hex":
+    dict_hex = {"cmap": _cmap}
+    dict_kde = {"fill": True, "cmap": _cmap}
+    if kind == "hex":
         fig = sb.jointplot(**dict_base, **dict_hex)
-    elif kind=="kde":
+    elif kind == "kde":
         fig = sb.jointplot(**dict_base, **dict_kde)
     else:
         fig = sb.jointplot(**dict_base)
     # * emojis dont work in good ol seaborn
-    _caption="" if not caption else f"#{caption}, "
-    fig.figure.suptitle(title or f"{_caption}[{df.columns[0]}] vs [{df.columns[1]}], n = {len(df):_}")
+    _caption = "" if not caption else f"#{caption}, "
+    fig.figure.suptitle(
+        title or f"{_caption}[{df.columns[0]}] vs [{df.columns[1]}], n = {len(df):_}"
+    )
     # * leave some room for the title
     fig.figure.tight_layout()
     fig.figure.subplots_adjust(top=0.90)
@@ -748,7 +781,11 @@ def plot_joint(
     #     dropna=dropna,
     #     cmap=_cmap,
     # )
-    return
+    # * save to png if path is provided
+    if png_path is not None:
+        fig.savefig(Path(png_path).as_posix())
+    return fig
 def plot_box(
@@ -764,7 +801,8 @@ def plot_box(
     violin: bool = False,
     x_min: float = None,
     x_max: float = None,
-) -> None:
+    png_path: Path | str = None,
+) -> object:
     """
     Plots a horizontal box plot for the given pandas Series.
@@ -779,14 +817,15 @@ def plot_box(
         x_min: The minimum value for the x-axis scale (max and min must be set)
         x_max: The maximum value for the x-axis scale (max and min must be set)
         summary: Whether to add a summary table to the plot
+        png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
     Returns:
-        None
+        plot object
     """
-    ser = df_to_series(ser)
+    ser = to_series(ser)
     if ser is None:
         return
     # * drop na to keep scipy sane
     n_ = len(ser)
     ser.dropna(inplace=True)
@@ -894,9 +933,15 @@ def plot_box(
         )
     fig.show("png")
     if summary:
         print_summary(ser)
-    return
+    # * save to png if path is provided
+    if png_path is not None:
+        fig.write_image(Path(png_path).as_posix())
+    return fig
 def plot_boxes(
@@ -909,7 +954,8 @@ def plot_boxes(
     annotations: bool = True,
     summary: bool = True,
     title: str = None,
-) -> None:
+    png_path: Path | str = None,
+) -> object:
     """
     [Experimental] Plot vertical boxes for each unique item in the DataFrame and add annotations for statistics.
@@ -922,9 +968,10 @@ def plot_boxes(
         width (int): The width of the plot.
         annotations (bool): Whether to add annotations to the plot.
         summary (bool): Whether to add a summary to the plot.
+        png_path (Path | str, optional): The path to save the image as a png file. Defaults to None.
     Returns:
-        None
+        plot object
     """
     if (
@@ -1037,7 +1084,12 @@ def plot_boxes(
     fig.show("png")
     if summary:
         print_summary(df)
-    return
+    # * save to png if path is provided
+    if png_path is not None:
+        fig.write_image(Path(png_path).as_posix())
+    return fig
 # def plot_ci_bars_DEPR(df: pd.DataFrame, dropna: bool = True, precision: int = 2) -> None:

{pandas_plots-0.11.22.dist-info → pandas_plots-0.11.24.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pandas-plots
-Version: 0.11.22
+Version: 0.11.24
 Summary: A collection of helper for table handling and vizualization
 Home-page: https://github.com/smeisegeier/pandas-plots
 Author: smeisegeier
@@ -100,14 +100,15 @@ tbl.show_num_df(
   - `show_venn3()` displays a venn diagram for 3 sets
 - `hlp` contains some (variety) helper functions
-  - `df_to_series()` converts a dataframe to a series
+  - `to_series()` converts a dataframe to a series (`🚨 breaking change`)
   - `mean_confidence_interval()` calculates mean and confidence interval for a series
   - `wrap_text()` formats strings or lists to a given width to fit nicely on the screen
   - `replace_delimiter_outside_quotes()` when manual import of csv files is needed: replaces delimiters only outside of quotes
   - `create_barcode_from_url()` creates a barcode from a given URL
-  - `add_datetime_col()` adds a datetime columns to a dataframe
+  - `add_datetime_col()` adds a datetime columns to a dataframe (chainable)
   - `show_package_version` prints version of a list of packages
   - `get_os` helps to identify and ensure operating system at runtime
+  - `🆕 add_bitmask_label()` adds a column to the data that resolves a bitmask column into human-readable labels
 - `pii` has routines for handling of personally identifiable information
   - `remove_pii()` logs and deletes pii from a series

pandas_plots-0.11.24.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+pandas_plots/hlp.py,sha256=N6NrbFagVMMX-ZnV0rIBEz82SeSoOkksfMcCap55W7E,16588
+pandas_plots/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
+pandas_plots/pls.py,sha256=U-tjh0DnYQYg-n02hh_HyvObMerkGCBP8tirKFIEEn4,37376
+pandas_plots/tbl.py,sha256=A1SqvssDA4ofI_WJ-sdWIb9Bo5X-sELD8pley22Y4X4,28380
+pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
+pandas_plots-0.11.24.dist-info/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
+pandas_plots-0.11.24.dist-info/METADATA,sha256=rapR9ocNOI-6U2PyTtDOTJu2EpZaGxXlywAtOBVHdoA,7220
+pandas_plots-0.11.24.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
+pandas_plots-0.11.24.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
+pandas_plots-0.11.24.dist-info/RECORD,,

{pandas_plots-0.11.22.dist-info → pandas_plots-0.11.24.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.6.0)
+Generator: setuptools (75.7.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

pandas_plots-0.11.22.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-pandas_plots/hlp.py,sha256=wrvy36rnSdg1I4uQjIzzwGmjcN0gvSfKylRf_7GKpXs,12001
-pandas_plots/pii.py,sha256=2WKE-W9s285jPdsTqCgt1uxuW4lj1PYCVOYB2fYDNwQ,2195
-pandas_plots/pls.py,sha256=C-EUvt9u7aXd6va7BGamf6HSODOnvbERwxu2Gb8PgbQ,35449
-pandas_plots/tbl.py,sha256=A1SqvssDA4ofI_WJ-sdWIb9Bo5X-sELD8pley22Y4X4,28380
-pandas_plots/ven.py,sha256=2x3ACo2vSfO3q6fv-UdDQ0h1SJyt8WChBGgE5SDCdCk,11673
-pandas_plots-0.11.22.dist-info/LICENSE,sha256=6KQ5KVAAhRaB-JJKpX4cefKvRZRgI7GUPc92_2d31XY,1051
-pandas_plots-0.11.22.dist-info/METADATA,sha256=YgJjD4QfPZkLutuYg4_5orNjoVwNH2jx9nTsSwYqIlk,7071
-pandas_plots-0.11.22.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
-pandas_plots-0.11.22.dist-info/top_level.txt,sha256=XnaNuIHBqMmCeh_U7nKOYTwFue_SIA0wxuDgdPmnnSk,13
-pandas_plots-0.11.22.dist-info/RECORD,,

{pandas_plots-0.11.22.dist-info → pandas_plots-0.11.24.dist-info}/LICENSE RENAMED Viewed

File without changes

{pandas_plots-0.11.22.dist-info → pandas_plots-0.11.24.dist-info}/top_level.txt RENAMED Viewed

File without changes

pandas-plots 0.11.22__py3-none-any.whl → 0.11.24__py3-none-any.whl

pandas-plots 0.11.22py3-none-any.whl → 0.11.24py3-none-any.whl