PyPI - pandas-plots - Versions diffs - 0.11.22__tar.gz → 0.11.23__tar.gz - Mend

pandas-plots 0.11.22tar.gz → 0.11.23tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

{pandas_plots-0.11.22/src/pandas_plots.egg-info → pandas_plots-0.11.23}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pandas-plots
-Version: 0.11.22
+Version: 0.11.23
 Summary: A collection of helper for table handling and vizualization
 Home-page: https://github.com/smeisegeier/pandas-plots
 Author: smeisegeier
@@ -100,14 +100,15 @@ tbl.show_num_df(
   - `show_venn3()` displays a venn diagram for 3 sets
 - `hlp` contains some (variety) helper functions
-  - `df_to_series()` converts a dataframe to a series
+  - `to_series()` converts a dataframe to a series (`🚨 breaking change`)
   - `mean_confidence_interval()` calculates mean and confidence interval for a series
   - `wrap_text()` formats strings or lists to a given width to fit nicely on the screen
   - `replace_delimiter_outside_quotes()` when manual import of csv files is needed: replaces delimiters only outside of quotes
   - `create_barcode_from_url()` creates a barcode from a given URL
-  - `add_datetime_col()` adds a datetime columns to a dataframe
+  - `add_datetime_col()` adds a datetime columns to a dataframe (chainable)
   - `show_package_version` prints version of a list of packages
   - `get_os` helps to identify and ensure operating system at runtime
+  - `🆕 add_bitmask_label()` adds a column to the data that resolves a bitmask column into human-readable labels
 - `pii` has routines for handling of personally identifiable information
   - `remove_pii()` logs and deletes pii from a series

{pandas_plots-0.11.22 → pandas_plots-0.11.23}/README.md RENAMED Viewed

@@ -66,14 +66,15 @@ tbl.show_num_df(
   - `show_venn3()` displays a venn diagram for 3 sets
 - `hlp` contains some (variety) helper functions
-  - `df_to_series()` converts a dataframe to a series
+  - `to_series()` converts a dataframe to a series (`🚨 breaking change`)
   - `mean_confidence_interval()` calculates mean and confidence interval for a series
   - `wrap_text()` formats strings or lists to a given width to fit nicely on the screen
   - `replace_delimiter_outside_quotes()` when manual import of csv files is needed: replaces delimiters only outside of quotes
   - `create_barcode_from_url()` creates a barcode from a given URL
-  - `add_datetime_col()` adds a datetime columns to a dataframe
+  - `add_datetime_col()` adds a datetime columns to a dataframe (chainable)
   - `show_package_version` prints version of a list of packages
   - `get_os` helps to identify and ensure operating system at runtime
+  - `🆕 add_bitmask_label()` adds a column to the data that resolves a bitmask column into human-readable labels
 - `pii` has routines for handling of personally identifiable information
   - `remove_pii()` logs and deletes pii from a series

{pandas_plots-0.11.22 → pandas_plots-0.11.23}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = pandas-plots
-version = 0.11.22
+version = 0.11.23
 author = smeisegeier
 author_email = dexterDSDo@googlemail.com
 description = A collection of helper for table handling and vizualization

{pandas_plots-0.11.22 → pandas_plots-0.11.23}/src/pandas_plots/hlp.py RENAMED Viewed

@@ -1,19 +1,19 @@
-import pandas as pd
-import numpy as np
-import scipy.stats
 import importlib.metadata as md
-from platform import python_version
-from typing import Literal, List
-from enum import Enum, auto
-import platform
 import os
+import platform
+import re
+from enum import Enum, auto
 from io import BytesIO
+from platform import python_version
+from typing import List, Literal
+import duckdb as ddb
+import numpy as np
+import pandas as pd
+import requests
+import scipy.stats
 from matplotlib import pyplot as plt
 from PIL import Image
-import requests
-import re
 # from devtools import debug
@@ -32,7 +32,7 @@ def mean_confidence_interval(df, confidence=0.95):
     Returns:
     tuple: A tuple containing the mean, interval, lower bound, and upper bound.
     """
-    df = df_to_series(df)
+    df = to_series(df)
     if df is None:
         return None
     a = 1.0 * np.array(df)
@@ -53,7 +53,7 @@ def mean_confidence_interval(df, confidence=0.95):
     #     return dist.mean - h, dist.mean + h
-def df_to_series(df) -> pd.Series | None:
+def to_series(df) -> pd.Series | None:
     """
     Converts a pandas DataFrame to a pandas Series.
@@ -103,6 +103,10 @@ def df_to_series(df) -> pd.Series | None:
         s.name = _data_col.name
         return s
+# * extend objects to enable chaining
+pd.DataFrame.to_series = to_series
+pd.Series.to_series = to_series
 def replace_delimiter_outside_quotes(
     input: str, delimiter_old: str = ",", delimiter_new: str = ";", quotechar: str = '"'
@@ -234,6 +238,26 @@ def create_barcode_from_url(
 def add_datetime_columns(df: pd.DataFrame, date_column: str = None) -> pd.DataFrame:
+    """
+    Add datetime columns to a given DataFrame.
+    Adds the following columns to the given DataFrame:
+        - YYYY: Year of date_column
+        - MM: Month of date_column
+        - Q: Quarter of date_column
+        - YYYY-MM: Year-month of date_column
+        - YYYYQ: Year-quarter of date_column
+        - YYYY-WW: Year-week of date_column
+        - DDD: Day of the week of date_column
+    Args:
+        df (pd.DataFrame): The DataFrame to add datetime columns to.
+        date_column (str, optional): The column to base the added datetime columns off of. Defaults to None.
+    Returns:
+        pd.DataFrame: The DataFrame with the added datetime columns.
+        This command can be chained.
+    """
     df_ = df.copy()
     if not date_column:
         date_column = [
@@ -269,6 +293,9 @@ def add_datetime_columns(df: pd.DataFrame, date_column: str = None) -> pd.DataFr
     return df_
+# * extend objects to enable chaining
+pd.DataFrame.add_datetime_columns = add_datetime_columns
 def show_package_version(
     packages: list[str] = None,
@@ -289,7 +316,7 @@ def show_package_version(
     # ! avoid empty list in signature, it will NOT be empty in runtime
     if packages is None:
         packages = []
     if not isinstance(packages, List):
         print(f"❌ A list of str must be provided")
         return
@@ -315,6 +342,7 @@ def show_package_version(
     print(out)
     return
 class OperatingSystem(Enum):
     WINDOWS = auto()
     LINUX = auto()
@@ -333,7 +361,7 @@ def get_os(is_os: OperatingSystem = None, verbose: bool = False) -> bool | str:
             - OperatingSystem.MAC
     Returns:
-        bool: True if the desired operating system matches the current operating system, False otherwise.
+        bool: True if the desired operating system matches the current operating system, False otherwise.
         str: Returns the current operating system (platform.system()) if is_os is None.
     """
     if verbose:
@@ -352,3 +380,90 @@ def get_os(is_os: OperatingSystem = None, verbose: bool = False) -> bool | str:
         return True
     else:
         return False
+def add_bitmask_label(
+    data: pd.DataFrame | pd.Series | ddb.DuckDBPyRelation,
+    bitmask_col: str,
+    labels: list[str],
+    separator: str = "|",
+    zero_code: str = "-",
+    keep_col: bool = True,
+    con: ddb.DuckDBPyConnection = None,
+) -> pd.DataFrame | ddb.DuckDBPyRelation:
+    """
+    adds a column to the data (DataFrame, Series, or DuckDB Relation) that resolves a bitmask column into human-readable labels.
+    - bitmask_col must have been generated before. its value must be constructed as a bitmask, e.g:
+    - a red, green, blue combination is rendered into binary 110, which means it has green and blue
+    - its value is 6, which will resolved into "g|b" if the list ["r","g","b"] is given
+    if the bitmask value is 0, it will be replaced with the zero_code.
+    the method can be chained in pandas as well as in duckdb: df.add_bitmask_label(...)
+    Parameters:
+    - data (pd.DataFrame | pd.Series | duckdb.DuckDBPyRelation): Input data.
+    - bitmask_col (str): The name of the column containing bitmask values (ignored if input is Series).
+    - labels (list[str]): Labels corresponding to the bits, in the correct order.
+    - separator (str): Separator for combining labels. Default is "|".
+    - zero_code (str): Value to return for bitmask value 0. Default is "-".
+    - keep_col (bool): If True, retains the bitmask column. If False, removes it. Default is True.
+    - con (duckdb.Connection): DuckDB connection object. Required if data is a DuckDB Relation.
+    Returns:
+    - pd.DataFrame | duckdb.DuckDBPyRelation: The modified data with the new column added.
+    """
+    # * check possible input formats
+    if isinstance(data, ddb.DuckDBPyRelation):
+        if con is None:
+            raise ValueError(
+                "A DuckDB connection must be provided when the input is a DuckDB Relation."
+            )
+        data = data.df()  # * Convert DuckDB Relation to DataFrame
+    if isinstance(data, pd.Series):
+        bitmask_col = data.name if data.name else "bitmask"
+        data = data.to_frame(name=bitmask_col)
+    if not isinstance(data, pd.DataFrame):
+        raise ValueError(
+            "Input must be a pandas DataFrame, Series, or DuckDB Relation."
+        )
+    # * get max allowed value by bitshift, eg for 4 labels its 2^4 -1 = 15
+    max_allowable_value = (1 << len(labels)) - 1
+    # * compare against max in col
+    max_value_in_column = data[bitmask_col].max()
+    if max_value_in_column > max_allowable_value:
+        raise ValueError(
+            f"The maximum value in column '{bitmask_col}' ({max_value_in_column}) exceeds "
+            f"the maximum allowable value for {len(labels)} labels ({max_allowable_value}). "
+            f"Ensure the number of labels matches the possible bitmask range."
+        )
+    # ? Core logic
+    # * exit if 0
+    def decode_bitmask(value):
+        if value == 0:
+            return zero_code
+        # * iterate over each value as bitfield, on binary 1 fetch assigned label from [labels]
+        return separator.join(
+            [label for i, label in enumerate(labels) if value & (1 << i)]
+        )
+    label_col = f"{bitmask_col}_label"
+    data[label_col] = data[bitmask_col].apply(decode_bitmask)
+    # * drop value col if not to be kept
+    if not keep_col:
+        data = data.drop(columns=[bitmask_col])
+    # * Convert back to DuckDB Relation if original input was a Relation
+    if isinstance(data, pd.DataFrame) and con is not None:
+        return con.from_df(data)
+    return data
+# * extend objects to enable chaining
+pd.DataFrame.add_bitmask_label = add_bitmask_label
+ddb.DuckDBPyRelation.add_bitmask_label = add_bitmask_label

{pandas_plots-0.11.22 → pandas_plots-0.11.23}/src/pandas_plots/pls.py RENAMED Viewed

@@ -610,12 +610,12 @@ def plot_histogram(
     Returns:
         None
     """
     # * convert to df if series
     if isinstance(df_ser, pd.Series):
         df = df_ser.to_frame()
     else:
-        df=df_ser
+        df = df_ser
     col_not_num = df.select_dtypes(exclude="number").columns
     if any(col_not_num):
@@ -628,7 +628,7 @@ def plot_histogram(
     df = df.applymap(lambda x: round(x, precision))
     # ! plot
-    _caption=_set_caption(caption)
+    _caption = _set_caption(caption)
     fig = px.histogram(
         data_frame=df,
         histnorm=histnorm,
@@ -653,7 +653,7 @@ def plot_histogram(
                 "size": 24,
             },
         },
-        showlegend=False if df.shape[1]==1 else True,
+        showlegend=False if df.shape[1] == 1 else True,
     )
     fig.show(renderer)
@@ -702,7 +702,7 @@ def plot_joint(
     # * set theme and palette
     sb.set_theme(style="darkgrid", palette="tab10")
     if os.getenv("THEME") == "dark":
-        _style = "dark_background"
+        _style = "dark_background"
         _cmap = "rocket"
     else:
         _style = "bmh"
@@ -720,19 +720,21 @@ def plot_joint(
         "dropna": dropna,
         # "title": f"{caption}[{ser.name}], n = {len(ser):_}" if not title else title,
     }
-    dict_hex={"cmap": _cmap}
-    dict_kde={"fill": True, "cmap": _cmap}
-    if kind=="hex":
+    dict_hex = {"cmap": _cmap}
+    dict_kde = {"fill": True, "cmap": _cmap}
+    if kind == "hex":
         fig = sb.jointplot(**dict_base, **dict_hex)
-    elif kind=="kde":
+    elif kind == "kde":
         fig = sb.jointplot(**dict_base, **dict_kde)
     else:
         fig = sb.jointplot(**dict_base)
     # * emojis dont work in good ol seaborn
-    _caption="" if not caption else f"#{caption}, "
-    fig.figure.suptitle(title or f"{_caption}[{df.columns[0]}] vs [{df.columns[1]}], n = {len(df):_}")
+    _caption = "" if not caption else f"#{caption}, "
+    fig.figure.suptitle(
+        title or f"{_caption}[{df.columns[0]}] vs [{df.columns[1]}], n = {len(df):_}"
+    )
     # * leave some room for the title
     fig.figure.tight_layout()
     fig.figure.subplots_adjust(top=0.90)
@@ -783,10 +785,10 @@ def plot_box(
     Returns:
         None
     """
-    ser = df_to_series(ser)
+    ser = to_series(ser)
     if ser is None:
         return
     # * drop na to keep scipy sane
     n_ = len(ser)
     ser.dropna(inplace=True)

{pandas_plots-0.11.22 → pandas_plots-0.11.23/src/pandas_plots.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pandas-plots
-Version: 0.11.22
+Version: 0.11.23
 Summary: A collection of helper for table handling and vizualization
 Home-page: https://github.com/smeisegeier/pandas-plots
 Author: smeisegeier
@@ -100,14 +100,15 @@ tbl.show_num_df(
   - `show_venn3()` displays a venn diagram for 3 sets
 - `hlp` contains some (variety) helper functions
-  - `df_to_series()` converts a dataframe to a series
+  - `to_series()` converts a dataframe to a series (`🚨 breaking change`)
   - `mean_confidence_interval()` calculates mean and confidence interval for a series
   - `wrap_text()` formats strings or lists to a given width to fit nicely on the screen
   - `replace_delimiter_outside_quotes()` when manual import of csv files is needed: replaces delimiters only outside of quotes
   - `create_barcode_from_url()` creates a barcode from a given URL
-  - `add_datetime_col()` adds a datetime columns to a dataframe
+  - `add_datetime_col()` adds a datetime columns to a dataframe (chainable)
   - `show_package_version` prints version of a list of packages
   - `get_os` helps to identify and ensure operating system at runtime
+  - `🆕 add_bitmask_label()` adds a column to the data that resolves a bitmask column into human-readable labels
 - `pii` has routines for handling of personally identifiable information
   - `remove_pii()` logs and deletes pii from a series