PyPI - pyretailscience - Versions diffs - 0.3.0__tar.gz → 0.3.2__tar.gz - Mend

pyretailscience 0.3.0tar.gz → 0.3.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

{pyretailscience-0.3.0 → pyretailscience-0.3.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pyretailscience
-Version: 0.3.0
+Version: 0.3.2
 Summary: Retail Data Science Tools
 License: Elastic-2.0
 Author: Murray Vanwyk

{pyretailscience-0.3.0 → pyretailscience-0.3.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pyretailscience"
-version = "0.3.0"
+version = "0.3.2"
 description = "Retail Data Science Tools"
 authors = ["Murray Vanwyk <2493311+mvanwyk@users.noreply.github.com>"]
 readme = "README.md"

pyretailscience-0.3.2/pyretailscience/assets/fonts/Poppins-Bold.ttf ADDED Viewed

Binary file

pyretailscience-0.3.2/pyretailscience/assets/fonts/Poppins-LightItalic.ttf ADDED Viewed

Binary file

pyretailscience-0.3.2/pyretailscience/assets/fonts/Poppins-Medium.ttf ADDED Viewed

Binary file

pyretailscience-0.3.2/pyretailscience/assets/fonts/Poppins-Regular.ttf ADDED Viewed

Binary file

pyretailscience-0.3.2/pyretailscience/assets/fonts/Poppins-SemiBold.ttf ADDED Viewed

Binary file

{pyretailscience-0.3.0 → pyretailscience-0.3.2}/pyretailscience/customer.py RENAMED Viewed

@@ -6,6 +6,7 @@ from matplotlib.axes import Axes, SubplotBase
 from pyretailscience.data.contracts import TransactionItemLevelContract
 from pyretailscience.style.graph_utils import GraphStyles as gs
+import pyretailscience.style.graph_utils as gu
 from pyretailscience.style.graph_utils import human_format, standard_graph_styles
 from pyretailscience.style.tailwind import COLORS
@@ -36,8 +37,8 @@ class PurchasesPerCustomer:
         percentile_line: float = 0.5,
         source_text: str | None = None,
         title: str | None = None,
-        xlabel: str | None = None,
-        ylabel: str | None = None,
+        x_label: str | None = None,
+        y_label: str | None = None,
         **kwargs: dict[str, any],
     ) -> SubplotBase:
         """Plot the distribution of the number of purchases per customer.
@@ -58,8 +59,8 @@ class PurchasesPerCustomer:
         if cumlative:
             density = True
-        if xlabel is None:
-            xlabel = "Number of purchases"
+        if x_label is None:
+            x_label = "Number of purchases"
         ax = self.cust_purchases_s.hist(
             bins=bins,
@@ -70,27 +71,38 @@ class PurchasesPerCustomer:
             **kwargs,
         )
-        ax.set_xlabel(xlabel, fontsize=gs.DEFAULT_AXIS_LABEL_FONT_SIZE, labelpad=10)
+        ax.set_xlabel(
+            x_label,
+            fontproperties=gs.POPPINS_REG,
+            fontsize=gs.DEFAULT_AXIS_LABEL_FONT_SIZE,
+            labelpad=gs.DEFAULT_AXIS_LABEL_PAD,
+        )
         ax.xaxis.set_major_formatter(lambda x, pos: human_format(x, pos, decimals=0))
         ax = standard_graph_styles(ax)
         if cumlative:
-            if title is None:
-                title = "Number of Purchases Cumulative Distribution"
-            if ylabel is None:
-                ylabel = "Percentage of customers"
+            default_title = "Number of Purchases Cumulative Distribution"
+            default_y_label = "Percentage of customers"
             ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1, decimals=0))
         else:
-            if title is None:
-                title = "Number of Purchases Distribution"
-            if ylabel is None:
-                ylabel = "Number of customers"
+            default_title = "Number of Purchases Distribution"
+            default_y_label = "Number of customers"
             ax.yaxis.set_major_formatter(lambda x, pos: human_format(x, pos, decimals=0))
-        ax.set_title(title, fontsize=gs.DEFAULT_TITLE_FONT_SIZE, pad=15)
-        ax.set_ylabel(ylabel, fontsize=gs.DEFAULT_AXIS_LABEL_FONT_SIZE, labelpad=10)
+        ax.set_title(
+            gu.not_none(title, default_title),
+            fontproperties=gs.POPPINS_SEMI_BOLD,
+            fontsize=gs.DEFAULT_TITLE_FONT_SIZE,
+            pad=gs.DEFAULT_TITLE_PAD,
+        )
+        ax.set_ylabel(
+            gu.not_none(y_label, default_y_label),
+            fontproperties=gs.POPPINS_REG,
+            fontsize=gs.DEFAULT_AXIS_LABEL_FONT_SIZE,
+            labelpad=gs.DEFAULT_AXIS_LABEL_PAD,
+        )
         if draw_percentile_line:
             if percentile_line > 1 or percentile_line < 0:
@@ -112,6 +124,8 @@ class PurchasesPerCustomer:
                 ha="left",
                 va="center",
                 fontsize=gs.DEFAULT_SOURCE_FONT_SIZE,
+                fontproperties=gs.POPPINS_LIGHT_ITALIC,
+                color="dimgray",
             )
         return ax
@@ -192,8 +206,8 @@ class DaysBetweenPurchases:
         draw_percentile_line: bool = False,
         percentile_line: float = 0.5,
         title: str | None = None,
-        xlabel: str | None = None,
-        ylabel: str | None = None,
+        x_label: str | None = None,
+        y_label: str | None = None,
         source_text: str = None,
         **kwargs: dict[str, any],
     ) -> SubplotBase:
@@ -223,29 +237,38 @@ class DaysBetweenPurchases:
             **kwargs,
         )
-        if xlabel is None:
-            xlabel = "Average Number of Days Between Purchases"
-        ax.set_xlabel(xlabel, fontsize=gs.DEFAULT_AXIS_LABEL_FONT_SIZE, labelpad=10)
+        ax.set_xlabel(
+            gu.not_none(x_label, "Average Number of Days Between Purchases"),
+            fontproperties=gs.POPPINS_REG,
+            fontsize=gs.DEFAULT_AXIS_LABEL_FONT_SIZE,
+            labelpad=gs.DEFAULT_AXIS_LABEL_PAD,
+        )
         ax.xaxis.set_major_formatter(lambda x, pos: human_format(x, pos, decimals=0))
         ax = standard_graph_styles(ax)
         if cumlative:
-            if title is None:
-                title = "Average Days Between Purchases Cumulative Distribution"
-            if ylabel is None:
-                ylabel = "Percentage of Customers"
+            default_title = "Average Days Between Purchases Cumulative Distribution"
+            default_y_label = "Percentage of Customers"
             ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1, decimals=0))
         else:
-            if title is None:
-                title = "Average Days Between Purchases Distribution"
-            if ylabel is None:
-                ylabel = "Number of Customers"
+            default_title = "Average Days Between Purchases Distribution"
+            default_y_label = "Number of Customers"
             ax.yaxis.set_major_formatter(lambda x, pos: human_format(x, pos, decimals=0))
-        ax.set_title(title, fontsize=gs.DEFAULT_TITLE_FONT_SIZE, pad=15)
-        ax.set_ylabel(ylabel, fontsize=gs.DEFAULT_AXIS_LABEL_FONT_SIZE, labelpad=10)
+        ax.set_title(
+            gu.not_none(title, default_title),
+            fontproperties=gs.POPPINS_SEMI_BOLD,
+            fontsize=gs.DEFAULT_TITLE_FONT_SIZE,
+            pad=gs.DEFAULT_TITLE_PAD,
+        )
+        ax.set_ylabel(
+            gu.not_none(y_label, default_y_label),
+            fontproperties=gs.POPPINS_REG,
+            fontsize=gs.DEFAULT_AXIS_LABEL_FONT_SIZE,
+            labelpad=gs.DEFAULT_AXIS_LABEL_PAD,
+        )
         if draw_percentile_line:
             if percentile_line > 1 or percentile_line < 0:
@@ -268,6 +291,8 @@ class DaysBetweenPurchases:
                 ha="left",
                 va="center",
                 fontsize=gs.DEFAULT_SOURCE_FONT_SIZE,
+                fontproperties=gs.POPPINS_LIGHT_ITALIC,
+                color="dimgray",
             )
         return ax
@@ -335,8 +360,8 @@ class TransactionChurn:
         cumlative: bool = False,
         ax: Axes | None = None,
         title: str | None = None,
-        xlabel: str | None = None,
-        ylabel: str | None = None,
+        x_label: str | None = None,
+        y_label: str | None = None,
         source_text: str = None,
         **kwargs: dict[str, any],
     ) -> SubplotBase:
@@ -366,17 +391,25 @@ class TransactionChurn:
         standard_graph_styles(ax)
-        if title is None:
-            title = "Churn Rate by Number of Purchases"
-        if xlabel is None:
-            xlabel = "Number of Purchases"
-        if ylabel is None:
-            ylabel = "% Churned"
         ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1.0))
-        ax.set_xlabel(xlabel, fontsize=gs.DEFAULT_AXIS_LABEL_FONT_SIZE, labelpad=10)
-        ax.set_ylabel(ylabel, fontsize=gs.DEFAULT_AXIS_LABEL_FONT_SIZE, labelpad=10)
-        ax.set_title(title, fontsize=gs.DEFAULT_TITLE_FONT_SIZE, pad=15)
+        ax.set_xlabel(
+            gu.not_none(x_label, "Number of Purchases"),
+            fontproperties=gs.POPPINS_REG,
+            fontsize=gs.DEFAULT_AXIS_LABEL_FONT_SIZE,
+            labelpad=gs.DEFAULT_AXIS_LABEL_PAD,
+        )
+        ax.set_ylabel(
+            gu.not_none(y_label, "% Churned"),
+            fontproperties=gs.POPPINS_REG,
+            fontsize=gs.DEFAULT_AXIS_LABEL_FONT_SIZE,
+            labelpad=gs.DEFAULT_AXIS_LABEL_PAD,
+        )
+        ax.set_title(
+            gu.not_none(title, "Churn Rate by Number of Purchases"),
+            fontproperties=gs.POPPINS_SEMI_BOLD,
+            fontsize=gs.DEFAULT_TITLE_FONT_SIZE,
+            pad=gs.DEFAULT_TITLE_PAD,
+        )
         if source_text:
             ax.annotate(
@@ -386,6 +419,8 @@ class TransactionChurn:
                 ha="left",
                 va="center",
                 fontsize=gs.DEFAULT_SOURCE_FONT_SIZE,
+                fontproperties=gs.POPPINS_LIGHT_ITALIC,
+                color="dimgray",
             )
         return ax

{pyretailscience-0.3.0 → pyretailscience-0.3.2}/pyretailscience/data/contracts.py RENAMED Viewed

@@ -83,6 +83,51 @@ class PyRetailSciencePandasDataset(PandasDataset):
         }
+def build_expected_columns(columns: list[str]) -> list[ExpectationConfiguration]:
+    """A helper function that builds a list of expectations for the columns to exist.
+    Args:
+        columns (list[str]): A list of columns to build the expectations for.
+    Returns:
+        list[ExpectationConfiguration]: A list of expectations for the columns to exist.
+    """
+    return [
+        ExpectationConfiguration(expectation_type="expect_column_to_exist", kwargs={"column": column})
+        for column in columns
+    ]
+def build_expected_unique_columns(columns: list[str]) -> list[ExpectationConfiguration]:
+    """A helper function that builds a list of expectations for the columns to have unique values.
+    Args:
+        columns (list[str]): A list of columns to build the expectations for.
+    Returns:
+        list[ExpectationConfiguration]: A list of expectations for the columns to have unique values.
+    """
+    return [
+        ExpectationConfiguration(expectation_type="expect_column_values_to_be_unique", kwargs={"column": column})
+        for column in columns
+    ]
+def build_non_null_columns(columns: list[list[str]]) -> list[ExpectationConfiguration]:
+    """A helper function that builds a list of expectations for the columns to have no null values.
+    Args:
+        columns (list[list[str]]): A list of columns to build the expectations for.
+    Returns:
+        list[ExpectationConfiguration]: A list of expectations for the columns to have no null values.
+    """
+    return [
+        ExpectationConfiguration(expectation_type="expect_column_values_to_not_be_null", kwargs={"column": column})
+        for column in columns
+    ]
 class ContractBase(abc.ABC):
     """Base class for data contracts. It contains the basic and extended expectations for the data, as well as the
     validation state and the result of the last validation. It also contains a method to validate the data.
@@ -180,65 +225,29 @@ class TransactionLevelContract(ContractBase):
         validation_result (dict): The result of the last validation.
     """
-    basic_expectations = [
-        ExpectationConfiguration(
-            expectation_type="expect_column_to_exist",
-            kwargs={"column": "transaction_id"},
-        ),
-        ExpectationConfiguration(
-            expectation_type="expect_column_to_exist",
-            kwargs={"column": "transaction_datetime"},
-        ),
-        ExpectationConfiguration(expectation_type="expect_column_to_exist", kwargs={"column": "customer_id"}),
-        ExpectationConfiguration(expectation_type="expect_column_to_exist", kwargs={"column": "total_price"}),
-        ExpectationConfiguration(expectation_type="expect_column_to_exist", kwargs={"column": "store_id"}),
-    ]
+    basic_expectations = build_expected_columns(
+        ["transaction_id", "transaction_datetime", "customer_id", "total_price", "store_id"]
+    )
-    extended_expectations = [
-        ExpectationConfiguration(
-            expectation_type="expect_column_values_to_be_unique",
-            kwargs={"column": "transaction_id"},
-        ),
-        ExpectationConfiguration(
-            expectation_type="expect_compound_columns_to_be_unique",
-            kwargs={
-                "column_list": [
-                    "transaction_id",
-                    "transaction_datetime",
-                    "customer_id",
-                    "store_id",
-                ]
-            },
-        ),
-        ExpectationConfiguration(
-            expectation_type="expect_column_values_to_be_between",
-            kwargs={
-                "column": "transaction_datetime",
-                "min_value": "1970-01-01",
-                "max_value": "2029-12-31",
-            },
-        ),
-        ExpectationConfiguration(
-            expectation_type="expect_column_values_to_not_be_null",
-            kwargs={"column": "transaction_id"},
-        ),
-        ExpectationConfiguration(
-            expectation_type="expect_column_values_to_not_be_null",
-            kwargs={"column": "transaction_datetime"},
-        ),
-        ExpectationConfiguration(
-            expectation_type="expect_column_values_to_not_be_null",
-            kwargs={"column": "customer_id"},
-        ),
-        ExpectationConfiguration(
-            expectation_type="expect_column_values_to_not_be_null",
-            kwargs={"column": "total_price"},
-        ),
-        ExpectationConfiguration(
-            expectation_type="expect_column_values_to_not_be_null",
-            kwargs={"column": "store_id"},
-        ),
-    ]
+    extended_expectations = (
+        build_expected_unique_columns(
+            [
+                "transaction_id",
+                ["transaction_datetime", "customer_id", "total_price", "store_id"],
+            ]
+        )
+        + [
+            ExpectationConfiguration(
+                expectation_type="expect_column_values_to_be_between",
+                kwargs={
+                    "column": "transaction_datetime",
+                    "min_value": "1970-01-01",
+                    "max_value": "2029-12-31",
+                },
+            )
+        ]
+        + build_non_null_columns(["transaction_id", "transaction_datetime", "customer_id", "total_price", "store_id"])
+    )
 class TransactionItemLevelContract(ContractBase):
@@ -263,23 +272,19 @@ class TransactionItemLevelContract(ContractBase):
             that these columns are not null.
     """
-    basic_expectations = [
-        ExpectationConfiguration(
-            expectation_type="expect_column_to_exist",
-            kwargs={"column": "transaction_id"},
-        ),
-        ExpectationConfiguration(
-            expectation_type="expect_column_to_exist",
-            kwargs={"column": "transaction_datetime"},
-        ),
-        ExpectationConfiguration(expectation_type="expect_column_to_exist", kwargs={"column": "customer_id"}),
-        ExpectationConfiguration(expectation_type="expect_column_to_exist", kwargs={"column": "total_price"}),
-        ExpectationConfiguration(expectation_type="expect_column_to_exist", kwargs={"column": "store_id"}),
-        ExpectationConfiguration(expectation_type="expect_column_to_exist", kwargs={"column": "product_id"}),
-        ExpectationConfiguration(expectation_type="expect_column_to_exist", kwargs={"column": "product_name"}),
-        ExpectationConfiguration(expectation_type="expect_column_to_exist", kwargs={"column": "unit_price"}),
-        ExpectationConfiguration(expectation_type="expect_column_to_exist", kwargs={"column": "quantity"}),
-    ]
+    basic_expectations = build_expected_columns(
+        [
+            "transaction_id",
+            "transaction_datetime",
+            "customer_id",
+            "total_price",
+            "store_id",
+            "product_id",
+            "product_name",
+            "unit_price",
+            "quantity",
+        ]
+    )
     extended_expectations = [
         ExpectationConfiguration(
@@ -297,44 +302,18 @@ class TransactionItemLevelContract(ContractBase):
             expectation_type="expect_transaction_product_quantity_sign_to_be_unique",
             kwargs={},
         ),
-        # Null expectations
-        ExpectationConfiguration(
-            expectation_type="expect_column_values_to_not_be_null",
-            kwargs={"column": "transaction_id"},
-        ),
-        ExpectationConfiguration(
-            expectation_type="expect_column_values_to_not_be_null",
-            kwargs={"column": "transaction_datetime"},
-        ),
-        ExpectationConfiguration(
-            expectation_type="expect_column_values_to_not_be_null",
-            kwargs={"column": "customer_id"},
-        ),
-        ExpectationConfiguration(
-            expectation_type="expect_column_values_to_not_be_null",
-            kwargs={"column": "total_price"},
-        ),
-        ExpectationConfiguration(
-            expectation_type="expect_column_values_to_not_be_null",
-            kwargs={"column": "store_id"},
-        ),
-        ExpectationConfiguration(
-            expectation_type="expect_column_values_to_not_be_null",
-            kwargs={"column": "product_id"},
-        ),
-        ExpectationConfiguration(
-            expectation_type="expect_column_values_to_not_be_null",
-            kwargs={"column": "product_name"},
-        ),
-        ExpectationConfiguration(
-            expectation_type="expect_column_values_to_not_be_null",
-            kwargs={"column": "unit_price"},
-        ),
-        ExpectationConfiguration(
-            expectation_type="expect_column_values_to_not_be_null",
-            kwargs={"column": "quantity"},
-        ),
-    ]
+    ] + build_non_null_columns(
+        [
+            "transaction_id",
+            "transaction_datetime",
+            "customer_id",
+            "store_id",
+            "product_id",
+            "product_name",
+            "unit_price",
+            "quantity",
+        ]
+    )
     def __init__(self, df: pd.DataFrame) -> None:
         # If category or brand columns are present, add expectations for them
@@ -393,3 +372,37 @@ class CustomerLevelContract(ContractBase):
             kwargs={"column": "customer_id"},
         ),
     ]
+class CustomContract(ContractBase):
+    """A helper class to construct contracts for specific use cases.
+    Args:
+        df (pd.DataFrame): The input DataFrame.
+        basic_expectations (list[ExpectationConfiguration] | None, optional): A list of basic expectation
+            configurations. Defaults to None. At least one basic or extended expectation must be supplied.
+        extended_expectations (list[ExpectationConfiguration] | None, optional): A list of extended expectation
+            configurations. Defaults to None. At least one basic or extended expectation must be supplied.
+    Raises:
+        ValueError: If both basic_expectations and extended_expectations are None.
+    Attributes:
+        basic_expectations (list[ExpectationConfiguration]): A list of basic expectation configurations.
+        extended_expectations (list[ExpectationConfiguration]): A list of extended expectation configurations.
+    """
+    def __init__(
+        self,
+        df: pd.DataFrame,
+        basic_expectations: list[ExpectationConfiguration] | None = None,
+        extended_expectations: list[ExpectationConfiguration] | None = None,
+    ) -> None:
+        if basic_expectations is None and extended_expectations is None:
+            raise ValueError("At least one of basic_expectations or extended_expectations must be provided.")
+        self.basic_expectations = basic_expectations or []
+        self.extended_expectations = extended_expectations or []
+        super().__init__(df)

{pyretailscience-0.3.0 → pyretailscience-0.3.2}/pyretailscience/range_planning.py RENAMED Viewed

@@ -7,7 +7,7 @@ from matplotlib.axes import Axes, SubplotBase
 from scipy.cluster.hierarchy import dendrogram, linkage
 import pyretailscience.style.graph_utils as gu
-from pyretailscience.data.contracts import TransactionItemLevelContract
+from pyretailscience.data.contracts import CustomContract, build_expected_columns, build_non_null_columns
 from pyretailscience.style.graph_utils import GraphStyles as gs
@@ -40,8 +40,17 @@ class CustomerDecisionHierarchy:
             ValueError: If the dataframe does not comply with the TransactionItemLevelContract.
         """
-        if TransactionItemLevelContract(df).validate() is False:
-            raise ValueError("The dataframe does not comply with the TransactionItemLevelContract")
+        cdh_contract = CustomContract(
+            df,
+            basic_expectations=build_expected_columns(columns=["customer_id", "transaction_id", "product_name"]),
+            extended_expectations=build_non_null_columns(columns=["customer_id", "transaction_id", "product_name"]),
+        )
+        if cdh_contract.validate() is False:
+            raise ValueError(
+                "The dataframe requires the columns 'customer_id', 'transaction_id', and 'product_name' and they must "
+                "be non-null"
+            )
         self.random_state = random_state
         self.pairs_df = self._get_pairs(df, exclude_same_transaction_products)
@@ -254,18 +263,21 @@ class CustomerDecisionHierarchy:
         ax.set_title(
             title,
+            fontproperties=gs.POPPINS_SEMI_BOLD,
             fontsize=gs.DEFAULT_TITLE_FONT_SIZE,
-            pad=15,
+            pad=gs.DEFAULT_TITLE_PAD + 5,
         )
         ax.set_xlabel(
-            gu.not_none(y_label, default_x_label),
+            gu.not_none(x_label, default_x_label),
+            fontproperties=gs.POPPINS_REG,
             fontsize=gs.DEFAULT_AXIS_LABEL_FONT_SIZE,
-            labelpad=10,
+            labelpad=gs.DEFAULT_AXIS_LABEL_PAD,
         )
         ax.set_ylabel(
-            gu.not_none(x_label, default_y_label),
+            gu.not_none(y_label, default_y_label),
+            fontproperties=gs.POPPINS_REG,
             fontsize=gs.DEFAULT_AXIS_LABEL_FONT_SIZE,
-            labelpad=10,
+            labelpad=gs.DEFAULT_AXIS_LABEL_PAD,
         )
         # Set the y label to be on the right side of the plot
@@ -300,11 +312,21 @@ class CustomerDecisionHierarchy:
                 ha="left",
                 va="center",
                 fontsize=gs.DEFAULT_SOURCE_FONT_SIZE,
+                fontproperties=gs.POPPINS_LIGHT_ITALIC,
+                color="dimgray",
             )
+        ax.xaxis.set_tick_params(labelsize=gs.DEFAULT_TICK_LABEL_FONT_SIZE)
+        ax.yaxis.set_tick_params(labelsize=gs.DEFAULT_TICK_LABEL_FONT_SIZE)
+        # Rotate the x-axis labels if they are too long
         if orientation in ["top", "bottom"]:
-            ax.xaxis.set_tick_params(labelsize=gs.DEFAULT_TICK_LABEL_FONT_SIZE)
-        else:
-            ax.yaxis.set_tick_params(labelsize=gs.DEFAULT_TICK_LABEL_FONT_SIZE)
+            plt.setp(ax.get_xticklabels(), rotation=45, ha="right")
+        # Set the font properties for the tick labels
+        for tick in ax.get_xticklabels():
+            tick.set_fontproperties(gs.POPPINS_REG)
+        for tick in ax.get_yticklabels():
+            tick.set_fontproperties(gs.POPPINS_REG)
         return ax

{pyretailscience-0.3.0 → pyretailscience-0.3.2}/pyretailscience/segmentation.py RENAMED Viewed

@@ -4,37 +4,108 @@ import pandas as pd
 from matplotlib.axes import Axes, SubplotBase
 import pyretailscience.style.graph_utils as gu
-from pyretailscience.data.contracts import TransactionItemLevelContract, TransactionLevelContract
+from pyretailscience.data.contracts import (
+    TransactionItemLevelContract,
+    TransactionLevelContract,
+    CustomContract,
+    build_expected_columns,
+    build_non_null_columns,
+    build_expected_unique_columns,
+)
+from pyretailscience.style.graph_utils import GraphStyles as gs
 from pyretailscience.style.tailwind import COLORS
-class HMLSegmentation:
-    def __init__(self, df: pd.DataFrame, value_col: str = "total_price") -> None:
+class BaseSegmentation:
+    def add_segment(self, df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Adds the segment to the dataframe based on the customer_id column.
+        Args:
+            df (pd.DataFrame): The dataframe to add the segment to. The dataframe must have a customer_id column.
+        Returns:
+            pd.DataFrame: The dataframe with the segment added.
+        Raises:
+            ValueError: If the number of rows before and after the merge do not match.
+        """
+        rows_before = len(df)
+        df = df.merge(self.df[["segment_name", "segment_id"]], how="left", left_on="customer_id", right_index=True)
+        rows_after = len(df)
+        if rows_before != rows_after:
+            raise ValueError("The number of rows before and after the merge do not match. This should not happen.")
+        return df
+class ExistingSegmentation(BaseSegmentation):
+    def __init__(self, df: pd.DataFrame) -> None:
+        """
+        Segments customers based on an existing segment in the dataframe.
+        Args:
+            df (pd.DataFrame): A dataframe with the customer_id, segment_name and segment_id columns.
+        Raises:
+            ValueError: If the dataframe does not have the columns customer_id, segment_name and segment_id.
+        """
+        required_cols = "customer_id", "segment_name", "segment_id"
+        contract = CustomContract(
+            df,
+            basic_expectations=build_expected_columns(columns=required_cols),
+            extended_expectations=build_non_null_columns(columns=required_cols)
+            + build_expected_unique_columns(columns=[required_cols]),
+        )
+        if contract.validate() is False:
+            raise ValueError(
+                f"The dataframe requires the columns {required_cols} and they must be non-null and unique."
+            )
+        self.df = df[["customer_id", "segment_name", "segment_id"]].set_index("customer_id")
+class HMLSegmentation(BaseSegmentation):
+    def __init__(
+        self,
+        df: pd.DataFrame,
+        value_col: str = "total_price",
+        zero_value_customers: Literal["separate_segment", "exclude", "include_with_light"] = "separate_segment",
+    ) -> None:
         """
         Segments customers into Heavy, Medium, Light and Zero spenders based on the total spend.
         Args:
-            df (pd.DataFrame): A dataframe with the transaction data. The dataframe must comply with the
-                TransactionItemLevelContract or the TransactionLevelContract.
+            df (pd.DataFrame): A dataframe with the transaction data. The dataframe must contain a customer_id column.
             value_col (str, optional): The column to use for the segmentation. Defaults to "total_price".
         Raises:
-            ValueError: If the dataframe does not comply with the TransactionItemLevelContract or
-                TransactionLevelContract.
+            ValueError: If the dataframe is missing the columns "customer_id" or `value_col`, or these columns contain
+                null values.
         """
+        required_cols = ["customer_id", value_col]
+        contract = CustomContract(
+            df,
+            basic_expectations=build_expected_columns(columns=required_cols),
+            extended_expectations=build_non_null_columns(columns=required_cols),
+        )
-        if TransactionItemLevelContract(df).validate() is False and TransactionLevelContract(df).validate() is False:
-            raise ValueError("The dataframe does not comply with the TransactionItemLevelContract")
+        if contract.validate() is False:
+            raise ValueError(f"The dataframe requires the columns {required_cols} and they must be non-null")
         # Group by customer_id and calculate total_spend
         grouped_df = df.groupby("customer_id")[value_col].sum().to_frame(value_col)
         # Separate customers with zero spend
-        zero_idx = grouped_df[value_col] == 0
-        zero_cust_df = grouped_df[zero_idx]
-        zero_cust_df["segment_name"] = "Zero"
+        hml_df = grouped_df
+        if zero_value_customers in ["separate_segment", "exclude"]:
+            zero_idx = grouped_df[value_col] == 0
+            zero_cust_df = grouped_df[zero_idx]
+            zero_cust_df["segment_name"] = "Zero"
-        hml_df = grouped_df[~zero_idx]
+            hml_df = grouped_df[~zero_idx]
         # Create a new column 'segment' based on the total_spend
         hml_df["segment_name"] = pd.qcut(
@@ -43,38 +114,14 @@ class HMLSegmentation:
             labels=["Light", "Medium", "Heavy"],
         )
-        hml_df = pd.concat([hml_df, zero_cust_df])
+        if zero_value_customers == "separate_segment":
+            hml_df = pd.concat([hml_df, zero_cust_df])
         segment_code_map = {"Light": "L", "Medium": "M", "Heavy": "H", "Zero": "Z"}
         hml_df["segment_id"] = hml_df["segment_name"].map(segment_code_map)
-        self.df = grouped_df
-    def add_segment(self, df: pd.DataFrame) -> pd.DataFrame:
-        """
-        Adds the segment to the dataframe based on the customer_id column.
-        Args:
-            df (pd.DataFrame): The dataframe to add the segment to. The dataframe must have a customer_id column.
-        Returns:
-            pd.DataFrame: The dataframe with the segment added.
-        Raises:
-            ValueError: If the number of rows before and after the merge do not match.
-        """
-        # TODO: Add a contract that ensures there's a customer ID column or matches one or more of a set of contracts
-        # efficently - Eg checks all the quick validations and then tries the extended validations
-        rows_before = len(df)
-        df = df.merge(self.df[["segment_name", "segment_id"]], how="left", left_on="customer_id", right_index=True)
-        rows_after = len(df)
-        if rows_before != rows_after:
-            raise ValueError("The number of rows before and after the merge do not match. This should not happen.")
-        return df
+        self.df = hml_df
 class SegTransactionStats:
@@ -195,18 +242,41 @@ class SegTransactionStats:
             decimals = gu.get_decimals(ax.get_xlim(), ax.get_xticks())
             ax.xaxis.set_major_formatter(lambda x, pos: gu.human_format(x, pos, decimals=decimals))
-        ax.set_title(gu.not_none(title, default_title))
-        ax.set_ylabel(plot_y_label)
-        ax.set_xlabel(plot_x_label)
+        ax.set_title(
+            gu.not_none(title, default_title),
+            fontproperties=gs.POPPINS_SEMI_BOLD,
+            fontsize=gs.DEFAULT_TITLE_FONT_SIZE,
+            pad=gs.DEFAULT_TITLE_PAD,
+        )
+        ax.set_ylabel(
+            plot_y_label,
+            fontproperties=gs.POPPINS_REG,
+            fontsize=gs.DEFAULT_AXIS_LABEL_FONT_SIZE,
+            labelpad=gs.DEFAULT_AXIS_LABEL_PAD,
+        )
+        ax.set_xlabel(
+            plot_x_label,
+            fontproperties=gs.POPPINS_REG,
+            fontsize=gs.DEFAULT_AXIS_LABEL_FONT_SIZE,
+            labelpad=gs.DEFAULT_AXIS_LABEL_PAD,
+        )
         if source_text is not None:
             ax.annotate(
                 source_text,
-                xy=(-0.1, -0.2),
+                xy=(-0.1, -0.15),
                 xycoords="axes fraction",
                 ha="left",
                 va="center",
-                fontsize=10,
+                fontsize=gs.DEFAULT_SOURCE_FONT_SIZE,
+                fontproperties=gs.POPPINS_LIGHT_ITALIC,
+                color="dimgray",
             )
+        # Set the font properties for the tick labels
+        for tick in ax.get_xticklabels():
+            tick.set_fontproperties(gs.POPPINS_REG)
+        for tick in ax.get_yticklabels():
+            tick.set_fontproperties(gs.POPPINS_REG)
         return ax

pyretailscience-0.3.2/pyretailscience/standard_graphs.py ADDED Viewed

@@ -0,0 +1,313 @@
+from typing import Literal
+import numpy as np
+import pandas as pd
+from matplotlib.axes import Axes, SubplotBase
+from pandas.tseries.offsets import BaseOffset
+import pyretailscience.style.graph_utils as gu
+from pyretailscience.style.graph_utils import GraphStyles as gs
+from pyretailscience.style.tailwind import COLORS, get_linear_cmap
+# TODO: Consider simplifying this by reducing the color range in the get_linear_cmap function.
+COLORMAP_MIN = 0.25
+COLORMAP_MAX = 0.75
+def time_plot(
+    df: pd.DataFrame,
+    value_col: str,
+    period: str | BaseOffset = "D",
+    agg_func: str = "sum",
+    group_col: str | None = None,
+    title: str | None = None,
+    x_label: str | None = None,
+    y_label: str | None = None,
+    legend_title: str | None = None,
+    ax: Axes | None = None,
+    source_text: str = None,
+    **kwargs: dict[str, any],
+) -> SubplotBase:
+    """
+    Plots the value_col over time.
+    Args:
+        df (pd.DataFrame): The dataframe to plot.
+        value_col (str): The column to plot.
+        period (str | BaseOffset): The period to group the data by.
+        agg_func (str, optional): The aggregation function to apply to the value_col. Defaults to "sum".
+        group_col (str, optional): The column to group the data by. Defaults to None.
+        title (str, optional): The title of the plot. Defaults to None. When None the title is set to
+            `f"{value_col.title()} by {group_col.title()}"`
+        x_label (str, optional): The x-axis label. Defaults to None. When None the x-axis label is set to blank
+        y_label (str, optional): The y-axis label. Defaults to None. When None the y-axis label is set to the title
+            case of `value_col`
+        legend_title (str, optional): The title of the legend. Defaults to None. When None the legend title is set to
+            the title case of `group_col`
+        ax (Axes, optional): The matplotlib axes object to plot on. Defaults to None.
+        source_text (str, optional): The source text to add to the plot. Defaults to None.
+        **kwargs: Additional keyword arguments to pass to the Pandas plot function.
+    Returns:
+        SubplotBase: The matplotlib axes object.
+    """
+    df["transaction_period"] = df["transaction_datetime"].dt.to_period(period)
+    if group_col is None:
+        colors = COLORS["green"][500]
+        df = df.groupby("transaction_period")[value_col].agg(agg_func)
+        default_title = "Total Sales"
+        show_legend = False
+    else:
+        colors = get_linear_cmap("green")(np.linspace(COLORMAP_MIN, COLORMAP_MAX, df[group_col].nunique()))
+        df = (
+            df.groupby([group_col, "transaction_period"])[value_col]
+            .agg(agg_func)
+            .reset_index()
+            .pivot(index="transaction_period", columns=group_col, values=value_col)
+        )
+        default_title = f"{value_col.title()} by {group_col.title()}"
+        show_legend = True
+    ax = df.plot(
+        linewidth=3,
+        color=colors,
+        legend=show_legend,
+        ax=ax,
+        **kwargs,
+    )
+    ax = gu.standard_graph_styles(
+        ax,
+        title=gu.not_none(title, default_title),
+        x_label=gu.not_none(x_label, ""),
+        y_label=gu.not_none(y_label, value_col.title()),
+    )
+    decimals = gu.get_decimals(ax.get_ylim(), ax.get_yticks())
+    ax.yaxis.set_major_formatter(lambda x, pos: gu.human_format(x, pos, decimals=decimals))
+    if show_legend:
+        legend = ax.legend(
+            title=gu.not_none(legend_title, group_col.title()),
+            frameon=True,
+        )
+        legend.get_frame().set_facecolor("white")
+        legend.get_frame().set_edgecolor("white")
+    if source_text is not None:
+        ax.annotate(
+            source_text,
+            xy=(-0.1, -0.2),
+            xycoords="axes fraction",
+            ha="left",
+            va="center",
+            fontsize=gs.DEFAULT_SOURCE_FONT_SIZE,
+            fontproperties=gs.POPPINS_LIGHT_ITALIC,
+            color="dimgray",
+        )
+    # Set the font properties for the tick labels
+    for tick in ax.get_xticklabels():
+        tick.set_fontproperties(gs.POPPINS_REG)
+    for tick in ax.get_yticklabels():
+        tick.set_fontproperties(gs.POPPINS_REG)
+    return ax
+def get_indexes(
+    df: pd.DataFrame,
+    df_index_filter: list[bool],
+    index_col: str,
+    value_col: str,
+    index_subgroup_col: str | None = None,
+    agg_func: str = "sum",
+    offset: int = 0,
+) -> pd.DataFrame:
+    """
+    Calculates the index of the value_col for the subset of a dataframe defined by df_index_filter.
+    Args:
+        df (pd.DataFrame): The dataframe to calculate the index on.
+        df_index_filter (list[bool]): The boolean index to filter the data by.
+        grp_cols (list[str]): The columns to group the data by.
+        value_col (str): The column to calculate the index on.
+        agg_func (str): The aggregation function to apply to the value_col.
+        offset (int, optional): The offset to subtract from the index. Defaults to 0.
+    Returns:
+        pd.Series: The index of the value_col for the subset of data defined by filter_index.
+    """
+    if all(df_index_filter) or not any(df_index_filter):
+        raise ValueError("The df_index_filter cannot be all True or all False.")
+    grp_cols = [index_col] if index_subgroup_col is None else [index_subgroup_col, index_col]
+    overall_df = df.groupby(grp_cols)[value_col].agg(agg_func).to_frame(value_col)
+    if index_subgroup_col is None:
+        overall_total = overall_df[value_col].sum()
+    else:
+        overall_total = overall_df.groupby(index_subgroup_col)[value_col].sum()
+    overall_s = overall_df[value_col] / overall_total
+    subset_df = df[df_index_filter].groupby(grp_cols)[value_col].agg(agg_func).to_frame(value_col)
+    if index_subgroup_col is None:
+        subset_total = subset_df[value_col].sum()
+    else:
+        subset_total = subset_df.groupby(index_subgroup_col)[value_col].sum()
+    subset_s = subset_df[value_col] / subset_total
+    index_df = ((subset_s / overall_s * 100) - offset).to_frame("index").reset_index()
+    return index_df
+def index_plot(
+    df: pd.DataFrame,
+    df_index_filter: list[bool],
+    value_col: str,
+    group_col: str,
+    agg_func: str = "sum",
+    series_col: str | None = None,
+    title: str | None = None,
+    x_label: str = "Index",
+    y_label: str | None = None,
+    legend_title: str | None = None,
+    highlight_range: Literal["default"] | tuple[float, float] | None = "default",
+    sort_by: Literal["group", "value"] | None = "group",
+    sort_order: Literal["ascending", "descending"] = "ascending",
+    ax: Axes | None = None,
+    source_text: str = None,
+    exclude_groups: list[any] | None = None,
+    include_only_groups: list[any] | None = None,
+    **kwargs: dict[str, any],
+) -> SubplotBase:
+    """
+    Plots the value_col over time.
+    Args:
+        df (pd.DataFrame): The dataframe to plot.
+        df_index_filter (list[bool]): The filter to apply to the dataframe.
+        value_col (str): The column to plot.
+        group_col str: The column to group the data by.
+        agg_func (str, optional): The aggregation function to apply to the value_col. Defaults to "sum".
+        series_col (str, optional): The column to use as the series. Defaults to None.
+        title (str, optional): The title of the plot. Defaults to None. When None the title is set to
+            `f"{value_col.title()} by {group_col.title()}"`
+        x_label (str, optional): The x-axis label. Defaults to "Index".
+        y_label (str, optional): The y-axis label. Defaults to None. When None the y-axis label is set to the title
+            case of `group_col`
+        legend_title (str, optional): The title of the legend. Defaults to None. When None the legend title is set to
+            the title case of `group_col`
+        highlight_range (Literal["default"] | tuple[float, float] | None, optional): The range to highlight. Defaults
+            to "default". When "default" the range is set to (80, 120). When None no range is highlighted.
+        sort_by (Literal["group", "value"] | None, optional): The column to sort by. Defaults to "group". When None the
+            data is not sorted. When "group" the data is sorted by group_col. When "value" the data is sorted by
+            the value_col. When series_col is not None this option is ignored.
+        sort_order (Literal["ascending", "descending"], optional): The order to sort the data. Defaults to "ascending".
+        ax (Axes, optional): The matplotlib axes object to plot on. Defaults to None.
+        source_text (str, optional): The source text to add to the plot. Defaults to None.
+        exclude_groups (list[any], optional): The groups to exclude from the plot. Defaults to None.
+        include_only_groups (list[any], optional): The groups to include in the plot. Defaults to None. When None all
+            groups are included. When not None only the groups in the list are included. Can not be used with
+            exclude_groups.
+        **kwargs: Additional keyword arguments to pass to the Pandas plot function.
+    Returns:
+        SubplotBase: The matplotlib axes object.
+    Raises:
+        ValueError: If sort_by is not either "group" or "value" or None.
+        ValueError: If sort_order is not either "ascending" or "descending".
+        ValueError: If exclude_groups and include_only_groups are used together.
+    """
+    if sort_by is not None and sort_by not in ["group", "value"]:
+        raise ValueError("sort_by must be either 'group' or 'value' or None")
+    if sort_order not in ["ascending", "descending"]:
+        raise ValueError("sort_order must be either 'ascending' or 'descending'")
+    if exclude_groups is not None and include_only_groups is not None:
+        raise ValueError("exclude_groups and include_only_groups cannot be used together.")
+    index_df = get_indexes(
+        df=df,
+        df_index_filter=df_index_filter,
+        index_col=group_col,
+        index_subgroup_col=series_col,
+        value_col=value_col,
+        agg_func=agg_func,
+        offset=100,
+    )
+    if exclude_groups is not None:
+        index_df = index_df[~index_df[group_col].isin(exclude_groups)]
+    if include_only_groups is not None:
+        index_df = index_df[index_df[group_col].isin(include_only_groups)]
+    if series_col is None:
+        colors = COLORS["green"][500]
+        show_legend = False
+        index_df = index_df[[group_col, "index"]].set_index(group_col)
+        if sort_by == "group":
+            index_df = index_df.sort_values(by=group_col, ascending=sort_order == "ascending")
+        elif sort_by == "value":
+            index_df = index_df.sort_values(by="index", ascending=sort_order == "ascending")
+    else:
+        show_legend = True
+        colors = get_linear_cmap("green")(np.linspace(COLORMAP_MIN, COLORMAP_MAX, df[series_col].nunique()))
+        if sort_by == "group":
+            index_df = index_df.sort_values(by=[group_col, series_col], ascending=sort_order == "ascending")
+        index_df = index_df.pivot_table(index=group_col, columns=series_col, values="index", sort=False)
+    ax = index_df.plot.barh(
+        left=100,
+        legend=show_legend,
+        ax=ax,
+        color=colors,
+        width=gs.DEFAULT_BAR_WIDTH,
+        zorder=2,
+        **kwargs,
+    )
+    ax.axvline(100, color="black", linewidth=1, alpha=0.5)
+    if highlight_range == "default":
+        highlight_range = (80, 120)
+    if highlight_range is not None:
+        ax.axvline(highlight_range[0], color="black", linewidth=0.25, alpha=0.1, zorder=-1)
+        ax.axvline(highlight_range[1], color="black", linewidth=0.25, alpha=0.1, zorder=-1)
+        ax.axvspan(highlight_range[0], highlight_range[1], color="black", alpha=0.1, zorder=-1)
+    default_title = f"{value_col.title()} by {group_col.title()}"
+    ax = gu.standard_graph_styles(
+        ax=ax,
+        title=gu.not_none(title, default_title),
+        x_label=gu.not_none(x_label, "Index"),
+        y_label=gu.not_none(y_label, group_col.title()),
+    )
+    if show_legend:
+        legend = ax.legend(title=gu.not_none(legend_title, series_col.title()), frameon=True)
+        legend.get_frame().set_facecolor("white")
+        legend.get_frame().set_edgecolor("white")
+    if source_text is not None:
+        ax.annotate(
+            source_text,
+            xy=(-0.1, -0.2),
+            xycoords="axes fraction",
+            ha="left",
+            va="center",
+            fontsize=gs.DEFAULT_SOURCE_FONT_SIZE,
+            fontproperties=gs.POPPINS_LIGHT_ITALIC,
+            color="dimgray",
+        )
+    # Set the font properties for the tick labels
+    for tick in ax.get_xticklabels():
+        tick.set_fontproperties(gs.POPPINS_REG)
+    for tick in ax.get_yticklabels():
+        tick.set_fontproperties(gs.POPPINS_REG)
+    return ax

{pyretailscience-0.3.0 → pyretailscience-0.3.2}/pyretailscience/style/graph_utils.py RENAMED Viewed

@@ -1,14 +1,29 @@
+import matplotlib.font_manager as fm
 from matplotlib.axes import Axes
+import importlib.resources as pkg_resources
+ASSETS_PATH = pkg_resources.files("pyretailscience").joinpath("assets")
 class GraphStyles:
     """A class to hold the styles for a graph."""
-    DEFAULT_TITLE_FONT_SIZE = 16
+    POPPINS_BOLD = fm.FontProperties(fname=f"{ASSETS_PATH}/fonts/Poppins-Bold.ttf")
+    POPPINS_SEMI_BOLD = fm.FontProperties(fname=f"{ASSETS_PATH}/fonts/Poppins-SemiBold.ttf")
+    POPPINS_REG = fm.FontProperties(fname=f"{ASSETS_PATH}/fonts/Poppins-Regular.ttf")
+    POPPINS_MED = fm.FontProperties(fname=f"{ASSETS_PATH}/fonts/Poppins-Medium.ttf")
+    POPPINS_LIGHT_ITALIC = fm.FontProperties(fname=f"{ASSETS_PATH}/fonts/Poppins-LightItalic.ttf")
+    DEFAULT_TITLE_FONT_SIZE = 20
     DEFAULT_SOURCE_FONT_SIZE = 10
     DEFAULT_AXIS_LABEL_FONT_SIZE = 12
     DEFAULT_TICK_LABEL_FONT_SIZE = 10
+    DEFAULT_AXIS_LABEL_PAD = 10
+    DEFAULT_TITLE_PAD = 10
+    DEFAULT_BAR_WIDTH = 0.8
 def human_format(num, pos=None, decimals=0, prefix="") -> str:
     """Format a number in a human readable format for Matplotlib.
@@ -31,7 +46,12 @@ def human_format(num, pos=None, decimals=0, prefix="") -> str:
     return f"{prefix}%.{decimals}f%s" % (num, ["", "K", "M", "G", "T", "P"][magnitude])
-def standard_graph_styles(ax: Axes) -> Axes:
+def standard_graph_styles(
+    ax: Axes,
+    title: str | None = None,
+    x_label: str | None = None,
+    y_label: str | None = None,
+) -> Axes:
     """Apply standard styles to a Matplotlib graph.
     Args:
@@ -43,6 +63,31 @@ def standard_graph_styles(ax: Axes) -> Axes:
     ax.spines[["top", "right"]].set_visible(False)
     ax.grid(which="major", axis="x", color="#DAD8D7", alpha=0.5, zorder=1)
     ax.grid(which="major", axis="y", color="#DAD8D7", alpha=0.5, zorder=1)
+    if title is not None:
+        ax.set_title(
+            title,
+            fontproperties=GraphStyles.POPPINS_SEMI_BOLD,
+            fontsize=GraphStyles.DEFAULT_TITLE_FONT_SIZE,
+            pad=GraphStyles.DEFAULT_TITLE_PAD,
+        )
+    if x_label is not None:
+        ax.set_xlabel(
+            x_label,
+            fontproperties=GraphStyles.POPPINS_REG,
+            fontsize=GraphStyles.DEFAULT_AXIS_LABEL_FONT_SIZE,
+            labelpad=GraphStyles.DEFAULT_AXIS_LABEL_PAD,
+        )
+    if y_label is not None:
+        ax.set_ylabel(
+            y_label,
+            fontproperties=GraphStyles.POPPINS_REG,
+            fontsize=GraphStyles.DEFAULT_AXIS_LABEL_FONT_SIZE,
+            labelpad=GraphStyles.DEFAULT_AXIS_LABEL_PAD,
+        )
     return ax

pyretailscience-0.3.0/pyretailscience/standard_graphs.py DELETED Viewed

@@ -1,96 +0,0 @@
-import numpy as np
-import pandas as pd
-from matplotlib.axes import Axes, SubplotBase
-from pandas.tseries.offsets import BaseOffset
-import pyretailscience.style.graph_utils as gu
-from pyretailscience.style.tailwind import COLORS, get_linear_cmap
-# TODO: Consider simplifying this by reducing the color range in the get_linear_cmap function.
-COLORMAP_MIN = 0.25
-COLORMAP_MAX = 0.75
-def time_plot(
-    df: pd.DataFrame,
-    value_col: str,
-    period: str | BaseOffset = "D",
-    agg_func: str = "sum",
-    group_col: str | None = None,
-    title: str | None = None,
-    x_label: str | None = None,
-    y_label: str | None = None,
-    ax: Axes | None = None,
-    source_text: str = None,
-    **kwargs: dict[str, any],
-) -> SubplotBase:
-    """
-    Plots the value_col over time.
-    Args:
-        df (pd.DataFrame): The dataframe to plot.
-        value_col (str): The column to plot.
-        period (str | BaseOffset): The period to group the data by.
-        agg_func (str, optional): The aggregation function to apply to the value_col. Defaults to "sum".
-        group_col (str, optional): The column to group the data by. Defaults to None.
-        title (str, optional): The title of the plot. Defaults to None. When None the title is set to
-            `f"{value_col.title()} by {group_col.title()}"`
-        x_label (str, optional): The x-axis label. Defaults to None. When None the x-axis label is set to blank
-        y_label (str, optional): The y-axis label. Defaults to None. When None the y-axis label is set to the title
-            case of `value_col`
-        ax (Axes, optional): The matplotlib axes object to plot on. Defaults to None.
-        source_text (str, optional): The source text to add to the plot. Defaults to None.
-        **kwargs: Additional keyword arguments to pass to the Pandas plot function.
-    Returns:
-        SubplotBase: The matplotlib axes object.
-    """
-    df["transaction_period"] = df["transaction_datetime"].dt.to_period(period)
-    if group_col is None:
-        colors = COLORS["green"][500]
-        df = df.groupby("transaction_period")[value_col].agg(agg_func)
-        default_title = "Total Sales"
-        show_legend = False
-    else:
-        colors = get_linear_cmap("green")(np.linspace(COLORMAP_MIN, COLORMAP_MAX, df[group_col].nunique()))
-        df = (
-            df.groupby([group_col, "transaction_period"])[value_col]
-            .agg(agg_func)
-            .reset_index()
-            .pivot(index="transaction_period", columns=group_col, values=value_col)
-        )
-        default_title = f"{value_col.title()} by {group_col.title()}"
-        show_legend = True
-    ax = df.plot(
-        linewidth=3,
-        color=colors,
-        legend=show_legend,
-        ax=ax,
-        **kwargs,
-    )
-    ax = gu.standard_graph_styles(ax)
-    ax.set_ylabel(gu.not_none(y_label, value_col.title()))
-    ax.set_title(gu.not_none(title, default_title))
-    ax.set_xlabel(gu.not_none(x_label, ""))
-    decimals = gu.get_decimals(ax.get_ylim(), ax.get_yticks())
-    ax.yaxis.set_major_formatter(lambda x, pos: gu.human_format(x, pos, decimals=decimals))
-    if show_legend:
-        legend = ax.legend(title="Segment", frameon=True)
-        legend.get_frame().set_facecolor("white")
-        legend.get_frame().set_edgecolor("white")
-    if source_text is not None:
-        ax.annotate(
-            source_text,
-            xy=(-0.1, -0.2),
-            xycoords="axes fraction",
-            ha="left",
-            va="center",
-            fontsize=10,
-        )
-    return ax

{pyretailscience-0.3.0 → pyretailscience-0.3.2}/LICENSE RENAMED Viewed

File without changes

{pyretailscience-0.3.0 → pyretailscience-0.3.2}/README.md RENAMED Viewed

File without changes

{pyretailscience-0.3.0 → pyretailscience-0.3.2}/pyretailscience/__init__.py RENAMED Viewed

File without changes

{pyretailscience-0.3.0 → pyretailscience-0.3.2}/pyretailscience/data/__init__.py RENAMED Viewed

File without changes

{pyretailscience-0.3.0 → pyretailscience-0.3.2}/pyretailscience/data/cli.py RENAMED Viewed

File without changes

{pyretailscience-0.3.0 → pyretailscience-0.3.2}/pyretailscience/data/simulation.py RENAMED Viewed

File without changes

{pyretailscience-0.3.0 → pyretailscience-0.3.2}/pyretailscience/style/tailwind.py RENAMED Viewed

File without changes

pyretailscience 0.3.0__tar.gz → 0.3.2__tar.gz

pyretailscience 0.3.0tar.gz → 0.3.2tar.gz