datupapi 1.113.0__tar.gz → 1.115.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datupapi-1.113.0 → datupapi-1.115.0}/PKG-INFO +1 -1
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/evaluate/errors.py +75 -0
- datupapi-1.115.0/datupapi/extract/io_opt.py +2015 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi.egg-info/PKG-INFO +1 -1
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi.egg-info/SOURCES.txt +1 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/setup.py +1 -1
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/__init__.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/configure/__init__.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/configure/config.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/distribution/conf/__init__.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/distribution/src/DistributionFunctions/__init__.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/distribution/src/DistributionFunctions/functions_distribution.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/distribution/src/Format/__init__.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/distribution/src/Format/distribution_format.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/evaluate/__init__.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/evaluate/anomaly.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/extract/__init__.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/extract/io.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/extract/io_citrix.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/feateng/__init__.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/feateng/relation.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/feateng/scale.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/inventory/conf/__init__.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/inventory/src/DailyUsage/__init__.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/inventory/src/DailyUsage/daily_usage.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/inventory/src/Format/__init__.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/inventory/src/Format/inventory_format.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/inventory/src/FutureInventory/__init__.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/inventory/src/FutureInventory/daily_usage_future.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/inventory/src/FutureInventory/future_reorder.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/inventory/src/InventoryFunctions/__init__.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/inventory/src/InventoryFunctions/functions_inventory.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/inventory/src/ProcessForecast/__init__.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/inventory/src/ProcessForecast/define_periods.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/inventory/src/ProcessForecast/extract_forecast.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/inventory/src/SuggestedForecast/__init__.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/inventory/src/SuggestedForecast/suggested_forecast.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/inventory/src/Transformation/__init__.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/inventory/src/Transformation/inventory_transformation.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/predict/__init__.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/predict/forecast.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/prepare/__init__.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/prepare/cleanse.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/prepare/format.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/prepare/format_dask.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/prepare/format_opt.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/training/__init__.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/training/attup.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/training/deepar.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/training/tft.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/transform/__init__.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/transform/backtesting.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/transform/forecasting.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/transform/ranking.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/utils/__init__.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi/utils/utils.py +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi.egg-info/dependency_links.txt +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi.egg-info/requires.txt +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/datupapi.egg-info/top_level.txt +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/pyproject.toml +0 -0
- {datupapi-1.113.0 → datupapi-1.115.0}/setup.cfg +0 -0
|
@@ -192,6 +192,81 @@ class Errors(Config):
|
|
|
192
192
|
return wmape_capped
|
|
193
193
|
|
|
194
194
|
|
|
195
|
+
def compute_wmape_by_date(self, target_col, forecast_col, date_col, target_sum_dict):
|
|
196
|
+
"""
|
|
197
|
+
Calculate WMAPE for a single row where the weight is the sum of all targets
|
|
198
|
+
for the same date value.
|
|
199
|
+
|
|
200
|
+
This function is optimized to use a pre-calculated dictionary of target sums
|
|
201
|
+
by date, making it much faster than filtering the DataFrame on each iteration.
|
|
202
|
+
Dates are normalized to 'YYYY-MM-DD' string format.
|
|
203
|
+
|
|
204
|
+
:param target_col: Name of the target column
|
|
205
|
+
:param forecast_col: Name of the forecast column
|
|
206
|
+
:param date_col: Name of the date column
|
|
207
|
+
:param target_sum_dict: Dictionary with date string as key and sum of targets as value
|
|
208
|
+
:return: WMAPE value for that row weighted by date total
|
|
209
|
+
|
|
210
|
+
Example usage:
|
|
211
|
+
>>> # First, create the dictionary of target sums by date
|
|
212
|
+
>>> target_sum_dict = Errors.create_target_sum_dict(
|
|
213
|
+
... Errors, df=df, target_col='Target', date_col='date'
|
|
214
|
+
... )
|
|
215
|
+
>>>
|
|
216
|
+
>>> # Then apply WMAPE calculation
|
|
217
|
+
>>> df['WMAPE'] = df.apply(lambda row: compute_wmape_by_date(target_col=row['Target'], forecast_col=row[forecast_col], date_col=row['Date'],target_sum_dict=target_sum_dict), axis=1
|
|
218
|
+
... )
|
|
219
|
+
"""
|
|
220
|
+
try:
|
|
221
|
+
# Get the total target sum for this date from the dictionary
|
|
222
|
+
target_sum = target_sum_dict.get(date_col, 0)
|
|
223
|
+
|
|
224
|
+
target = np.array(target_col, dtype=float)
|
|
225
|
+
forecast = np.array(forecast_col, dtype=float)
|
|
226
|
+
wmape_capped = 0
|
|
195
227
|
|
|
228
|
+
# Calculate absolute error for current row
|
|
229
|
+
|
|
230
|
+
e = target - forecast
|
|
231
|
+
wmape = 100 * (target * np.divide(abs(e), abs(target),
|
|
232
|
+
out=np.ones_like(target),
|
|
233
|
+
where=target != 0)).sum() / target_sum
|
|
196
234
|
|
|
235
|
+
wmape_capped = wmape if wmape <= 100 else 100
|
|
197
236
|
|
|
237
|
+
except ZeroDivisionError as err:
|
|
238
|
+
self.logger.exception(f'Division by zero. Error set to 0 by default: {err}')
|
|
239
|
+
wmape_capped = 0
|
|
240
|
+
|
|
241
|
+
return wmape_capped
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def create_target_sum_dict(self, df, target_col, date_col):
|
|
245
|
+
"""
|
|
246
|
+
Create a dictionary with the sum of target values for each unique date.
|
|
247
|
+
|
|
248
|
+
This pre-calculation significantly improves performance when computing
|
|
249
|
+
WMAPE row by row, as it avoids filtering the DataFrame repeatedly.
|
|
250
|
+
Dates are always normalized to 'YYYY-MM-DD' string format.
|
|
251
|
+
|
|
252
|
+
:param df: DataFrame containing the data
|
|
253
|
+
:param target_col: Name of the target column
|
|
254
|
+
:param date_col: Name of the date column
|
|
255
|
+
:return: Dictionary with date string (YYYY-MM-DD) as key and sum of targets as value
|
|
256
|
+
|
|
257
|
+
Example:
|
|
258
|
+
>>> target_sum_dict = Errors.create_target_sum_dict(
|
|
259
|
+
... Errors, df=df, target_col='Target', date_col='date'
|
|
260
|
+
... )
|
|
261
|
+
>>> # Returns: {'2024-01-01': 450, '2024-01-02': 320, ...}
|
|
262
|
+
"""
|
|
263
|
+
try:
|
|
264
|
+
# Convert dates to string format YYYY-MM-DD for dictionary keys
|
|
265
|
+
df_copy = df.copy()
|
|
266
|
+
df_copy[date_col] = pd.to_datetime(df_copy[date_col]).dt.strftime('%Y-%m-%d')
|
|
267
|
+
target_sum_dict = df_copy.groupby(date_col)[target_col].sum().to_dict()
|
|
268
|
+
|
|
269
|
+
return target_sum_dict
|
|
270
|
+
except Exception as err:
|
|
271
|
+
self.logger.exception(f'Error creating target sum dictionary: {err}')
|
|
272
|
+
return {}
|