PyPI - datupapi - Versions diffs - 1.113.0__tar.gz → 1.115.0__tar.gz - Mend

datupapi 1.113.0tar.gz → 1.115.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

{datupapi-1.113.0 → datupapi-1.115.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: datupapi
-Version: 1.113.0
+Version: 1.115.0
 Summary: Utility library to support Datup AI MLOps processes
 Author: Datup AI
 Author-email: ramiro@datup.ai

{datupapi-1.113.0 → datupapi-1.115.0}/datupapi/evaluate/errors.py RENAMED Viewed

@@ -192,6 +192,81 @@ class Errors(Config):
         return wmape_capped
+    def compute_wmape_by_date(self, target_col, forecast_col, date_col, target_sum_dict):
+        """
+        Calculate WMAPE for a single row where the weight is the sum of all targets
+        for the same date value.
+        This function is optimized to use a pre-calculated dictionary of target sums
+        by date, making it much faster than filtering the DataFrame on each iteration.
+        Dates are normalized to 'YYYY-MM-DD' string format.
+        :param target_col: Name of the target column
+        :param forecast_col: Name of the forecast column
+        :param date_col: Name of the date column
+        :param target_sum_dict: Dictionary with date string as key and sum of targets as value
+        :return: WMAPE value for that row weighted by date total
+        Example usage:
+        >>> # First, create the dictionary of target sums by date
+        >>> target_sum_dict = Errors.create_target_sum_dict(
+        ...     Errors, df=df, target_col='Target', date_col='date'
+        ... )
+        >>>
+        >>> # Then apply WMAPE calculation
+        >>> df['WMAPE'] = df.apply(lambda row: compute_wmape_by_date(target_col=row['Target'], forecast_col=row[forecast_col], date_col=row['Date'],target_sum_dict=target_sum_dict), axis=1
+        ... )
+        """
+        try:
+            # Get the total target sum for this date from the dictionary
+            target_sum = target_sum_dict.get(date_col, 0)
+            target = np.array(target_col, dtype=float)
+            forecast = np.array(forecast_col, dtype=float)
+            wmape_capped = 0
+            # Calculate absolute error for current row
+            e = target - forecast
+            wmape = 100 * (target * np.divide(abs(e), abs(target),
+                                                out=np.ones_like(target),
+                                                where=target != 0)).sum() / target_sum
+            wmape_capped = wmape if wmape <= 100 else 100
+        except ZeroDivisionError as err:
+            self.logger.exception(f'Division by zero. Error set to 0 by default: {err}')
+            wmape_capped = 0
+        return wmape_capped
+    def create_target_sum_dict(self, df, target_col, date_col):
+        """
+        Create a dictionary with the sum of target values for each unique date.
+        This pre-calculation significantly improves performance when computing
+        WMAPE row by row, as it avoids filtering the DataFrame repeatedly.
+        Dates are always normalized to 'YYYY-MM-DD' string format.
+        :param df: DataFrame containing the data
+        :param target_col: Name of the target column
+        :param date_col: Name of the date column
+        :return: Dictionary with date string (YYYY-MM-DD) as key and sum of targets as value
+        Example:
+        >>> target_sum_dict = Errors.create_target_sum_dict(
+        ...     Errors, df=df, target_col='Target', date_col='date'
+        ... )
+        >>> # Returns: {'2024-01-01': 450, '2024-01-02': 320, ...}
+        """
+        try:
+            # Convert dates to string format YYYY-MM-DD for dictionary keys
+            df_copy = df.copy()
+            df_copy[date_col] = pd.to_datetime(df_copy[date_col]).dt.strftime('%Y-%m-%d')
+            target_sum_dict = df_copy.groupby(date_col)[target_col].sum().to_dict()
+            return target_sum_dict
+        except Exception as err:
+            self.logger.exception(f'Error creating target sum dictionary: {err}')
+            return {}

datupapi 1.113.0__tar.gz → 1.115.0__tar.gz

datupapi 1.113.0tar.gz → 1.115.0tar.gz