datupapi 1.111.0__py3-none-any.whl → 1.112.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,358 +1,609 @@
1
1
  import pandas as pd
2
+ import numpy as np
2
3
  import os
3
4
  import ast
4
- import numpy as np
5
- from datetime import timedelta
5
+ import time
6
+ import sys
7
+ from datetime import timedelta, datetime
8
+ from concurrent.futures import ProcessPoolExecutor, as_completed
9
+ from multiprocessing import cpu_count
10
+ import gc
11
+ from typing import Dict, List, Tuple, Optional, Union
6
12
  from datupapi.utils.utils import Utils
7
13
  from datupapi.inventory.src.SuggestedForecast.suggested_forecast import SuggestedForecast
8
14
  from datupapi.inventory.src.FutureInventory.daily_usage_future import DailyUsageFuture
9
15
 
10
16
 
11
- class FutureReorder():
17
+ def _generate_item_dates_worker(key, df_lead_time, periods, period2, start_date, start_date_zero, default_coverage, location):
12
18
  """
13
- A class for calculating future inventory reorder points and quantities.
14
-
15
- This class implements a sophisticated inventory management system that:
16
- - Calculates optimal reorder points based on forecasted demand
17
- - Manages in-transit inventory and arrival schedules
18
- - Determines safety stock levels using statistical or reference methods
19
- - Generates reorder recommendations for multiple future periods
20
- - Supports both single-location and multi-location inventory
19
+ Generate dates for a single item in the worker process context.
20
+ This function replicates the logic from future_date() but for a single item.
21
21
 
22
- The system uses dynamic coverage strategies to optimize inventory levels
23
- while maintaining adequate safety stock to prevent stockouts.
24
-
25
- Output Fields:
26
- - FutureInventoryTransit: Total future inventory (stock + transit)
27
- - FutureInventory: Future inventory in stock only
28
- - FutureTransit: Future inventory in transit only
29
- - FutureInventoryTransitArrival: Future inventory in stock + arrivals in the period
22
+ Args:
23
+ key: Item identifier (str) or (item, location) tuple
24
+ df_lead_time: Lead time DataFrame (filtered for this item)
25
+ periods: Number of periods to generate (for ReorderFreq > 20)
26
+ period2: Number of periods to generate (for ReorderFreq <= 20)
27
+ start_date: Start date for period 1 (can be None)
28
+ start_date_zero: Custom start date for period 0 (can be None)
29
+ default_coverage: Default coverage days
30
+ location: Boolean indicating location-based processing
31
+ (Note: This parameter is kept for interface consistency but is not
32
+ directly used in date generation logic, as dates depend on ReorderFreq
33
+ which is already in the filtered df_lead_time)
34
+
35
+ Returns:
36
+ List[str]: List of dates in 'YYYYMMDD' format
30
37
  """
31
-
32
- def __init__(self, df_inv, df_lead_time, df_prep, df_fcst, periods, start_date, location=False, security_stock_ref=False, df_transit=None, integer=True, complete_suggested=False, start_date_zero=None):
33
- """
34
- Initialize the FutureReorder instance.
38
+ try:
39
+ # Determine the starting date for period 0
40
+ if start_date_zero is not None:
41
+ # Use custom start date for period 0
42
+ actual_date = pd.to_datetime(start_date_zero, format='%Y-%m-%d')
43
+ else:
44
+ # Use current system date for period 0 (original behavior)
45
+ DOCKER_CONFIG_PATH = os.path.join('/opt/ml/processing/input', 'config.yml')
46
+ utils = Utils(config_file=DOCKER_CONFIG_PATH, logfile='data_io', log_path='output/logs')
47
+ timestamp = utils.set_timestamp()
48
+ actual_date = pd.to_datetime(str(int(float(timestamp[0:8]))), format='%Y%m%d')
35
49
 
36
- Args:
37
- df_inv (pd.DataFrame): Current inventory data with columns:
38
- - Item: Item identifier
39
- - Location: Location identifier (if location=True)
40
- - Inventory: Current on-hand stock
41
- - Transit: In-transit quantity
42
- - PurchaseFactor: Minimum order multiple
43
-
44
- df_lead_time (pd.DataFrame): Lead time and reorder parameters:
45
- - Item: Item identifier
46
- - Location: Location identifier (if location=True)
47
- - ReorderFreq: Days between reorders (default: 30)
48
- - AvgLeadTime: Average lead time in days
49
- - MaxLeadTime: Maximum lead time in days
50
- - Coverage: Total coverage days (optional)
51
- - SecurityStockDaysRef: Reference days for safety stock (optional)
52
-
53
- df_prep (pd.DataFrame): Preparation data for forecast calculations
54
-
55
- df_fcst (pd.DataFrame): Forecast data containing demand predictions
56
-
57
- periods (int): Number of future periods to calculate
58
-
59
- start_date (str): Starting date for calculations (format: 'YYYY-MM-DD')
60
-
61
- location (bool, optional): Whether to process by location. Defaults to False.
62
-
63
- security_stock_ref (bool, optional): Use reference days method for safety stock
64
- calculation instead of statistical method. Defaults to False.
65
-
66
- df_transit (pd.DataFrame, optional): Transit arrival schedule with columns:
67
- - Item: Item identifier
68
- - Location: Location identifier (if location=True)
69
- - Transit: Partial transit quantity
70
- - ArrivalDate: Arrival date (format: 'YYYY-MM-DD')
71
- If None, complete transit arrives in period 1. Defaults to None.
72
-
73
- integer (bool, optional): Controls numeric formatting of quantity fields.
74
- When True, quantity fields are displayed as integers.
75
- When False, quantity fields are displayed with decimals.
76
- Defaults to True.
77
-
78
- complete_suggested (bool, optional): When True, uses the last calculated
79
- SuggestedForecast value for periods without forecast data instead of
80
- raising an error. Defaults to False.
81
-
82
- start_date_zero (str, optional): Custom start date for period 0 (format: 'YYYY-MM-DD').
83
- When None (default), uses the current system date for period 0.
84
- When specified, uses this date as the starting point for period 0 instead
85
- of the current system date. Defaults to None.
86
- """
87
- self.df_inv = df_inv
88
- self.df_lead_time = df_lead_time
89
- self.df_prep = df_prep
90
- self.df_fcst = df_fcst
91
- self.default_coverage = 30
92
- self.periods = periods
93
- self.start_date = pd.to_datetime(start_date, format='%Y-%m-%d')
94
- self.location = location
95
- self.security_stock_ref = security_stock_ref
96
- self.df_transit = df_transit
97
- self.integer = integer
98
- self.complete_suggested = complete_suggested
99
- self.start_date_zero = start_date_zero
50
+ # Determine which period count to use based on ReorderFreq
51
+ reorder_freq = df_lead_time['ReorderFreq'].iloc[0]
52
+ if pd.isna(reorder_freq) or reorder_freq == 0:
53
+ reorder_freq = default_coverage
54
+ reorder_freq = int(reorder_freq)
100
55
 
101
- # Initialize metadata columns based on location usage
102
- self.metadata = ['Item']
103
- if self.location:
104
- self.metadata.append('Location')
105
-
106
-
107
- def _format_value(self, value, field_name):
108
- """
109
- Apply appropriate formatting based on field type and integer setting.
56
+ # Use period2 for ReorderFreq <= 20, otherwise use periods
57
+ if reorder_freq <= 20:
58
+ effective_periods = period2
59
+ else:
60
+ effective_periods = periods
110
61
 
111
- Args:
112
- value: The numeric value to format (scalar or Series)
113
- field_name: The name of the field to determine formatting rules
114
-
115
- Returns:
116
- Formatted value (int or float with 2 decimals)
117
- """
118
- # Handle pandas Series - extract scalar value
119
- if isinstance(value, pd.Series):
120
- if len(value) == 1:
121
- value = value.iloc[0]
122
- else:
123
- raise ValueError(f"Expected scalar value for {field_name}, got Series with {len(value)} elements")
62
+ # Use effective_periods + 1 internally to calculate one extra period for transit calculations
63
+ end_date = actual_date + pd.DateOffset(months=effective_periods + 1)
124
64
 
125
- # Handle NaN, None, and infinite values
126
- if pd.isna(value) or value is None:
127
- return 0
128
- if np.isinf(value):
129
- return 0
130
-
131
- # Fields that are ALWAYS integers
132
- always_integer_fields = [
133
- 'PurchaseFactor', 'AvgLeadTime', 'MaxLeadTime',
134
- 'ReorderQtyDays', 'ReorderFreq', 'Coverage', 'FutureStockoutDays'
135
- ]
65
+ # Handle start_date = None case
66
+ if start_date is None:
67
+ # If start_date is None, use actual_date as the base for period 1
68
+ base_start_date = actual_date
69
+ else:
70
+ base_start_date = pd.to_datetime(start_date, format='%Y-%m-%d')
136
71
 
137
- # Fields that are ALWAYS decimals (2 decimal places)
138
- always_decimal_fields = ['AvgDailyUsage', 'MaxDailyUsage']
72
+ # ReorderFreq was already calculated above, no need to recalculate
139
73
 
140
- # Fields that change based on self.integer setting
141
- quantity_fields = [
142
- 'FutureInventoryTransit', 'FutureInventory', 'FutureTransit',
143
- 'FutureInventoryTransitArrival', 'SuggestedForecast', 'SuggestedForecastPeriod',
144
- 'ReorderPoint', 'ReorderQtyBase', 'ReorderQty', 'SecurityStock', 'Inventory', 'Transit'
145
- ]
74
+ # Generate date range for this item
75
+ date_range = []
146
76
 
147
- if field_name in always_integer_fields:
148
- return int(round(value))
149
- elif field_name in always_decimal_fields:
150
- return round(value, 2)
151
- elif field_name in quantity_fields:
152
- if self.integer:
153
- return int(round(value))
77
+ # Always include actual date (period 0)
78
+ date_range.append(actual_date)
79
+
80
+ # Include base_start_date if after actual_date
81
+ if base_start_date > actual_date:
82
+ date_range.append(base_start_date)
83
+
84
+ # Generate subsequent dates using a controlled loop instead of pd.date_range
85
+ current_date = base_start_date + timedelta(days=reorder_freq)
86
+ while current_date <= end_date:
87
+ date_range.append(current_date)
88
+ current_date += timedelta(days=reorder_freq)
89
+
90
+ # Convert to string format
91
+ date_strings = [d.strftime('%Y%m%d') for d in date_range]
92
+
93
+ return date_strings
94
+
95
+ except Exception as e:
96
+ print(f"Error generating dates for item {key}: {str(e)}")
97
+ # Return a minimal date list with just the current date
98
+ try:
99
+ if start_date_zero is not None:
100
+ actual_date = pd.to_datetime(start_date_zero, format='%Y-%m-%d')
154
101
  else:
155
- return round(value, 2)
156
- else:
157
- # Default: return as is
158
- return value
102
+ DOCKER_CONFIG_PATH = os.path.join('/opt/ml/processing/input', 'config.yml')
103
+ utils = Utils(config_file=DOCKER_CONFIG_PATH, logfile='data_io', log_path='output/logs')
104
+ timestamp = utils.set_timestamp()
105
+ actual_date = pd.to_datetime(str(int(float(timestamp[0:8]))), format='%Y%m%d')
106
+ return [actual_date.strftime('%Y%m%d')]
107
+ except:
108
+ # Last resort: return today's date
109
+ return [datetime.now().strftime('%Y%m%d')]
159
110
 
160
111
 
161
- def future_date(self):
162
- """
163
- Generate future reorder dates for each item based on reorder frequency.
164
-
165
- This method creates a schedule of dates when reorders should be evaluated
166
- for each item (or item-location combination). The schedule includes:
167
- 1. Current date (always first)
168
- 2. Start date (if after current date)
169
- 3. Subsequent dates at reorder frequency intervals
112
+ def process_item_batch_complete(batch_args):
113
+ """
114
+ Process a batch of items in parallel with complete functionality.
115
+
116
+ This function executes in a separate process and handles batch processing
117
+ of inventory items for reorder calculations. It provides optimized error
118
+ handling and progress tracking for large-scale inventory processing.
119
+
120
+ Args:
121
+ batch_args (tuple): Contains all necessary data for batch processing:
122
+ - batch_items: List of item data tuples (key, lead_time_df, inv_df)
123
+ - df_fcst: Forecast data DataFrame
124
+ - df_prep: Preparation data DataFrame
125
+ - metadata: List of metadata columns
126
+ - location: Boolean indicating if location processing is enabled
127
+ - default_coverage: Default coverage days
128
+ - complete_suggested: Boolean for complete suggested forecast mode
129
+ - security_stock_ref: Boolean for reference-based security stock calculation
130
+ - integer: Boolean for integer formatting of quantities
131
+ - verbose: Boolean for detailed logging
132
+ - df_transit: Transit schedule DataFrame (optional)
133
+ - periods: Number of periods to generate
134
+ - start_date: Start date for period 1 (can be None)
135
+ - start_date_zero: Custom start date for period 0 (can be None)
136
+
137
+ Returns:
138
+ pd.DataFrame: Combined results for all items in the batch, or empty DataFrame if errors
139
+ """
140
+ try:
141
+ (batch_items, df_fcst, df_prep, metadata, location, default_coverage,
142
+ complete_suggested, security_stock_ref, integer, verbose, df_transit,
143
+ periods, period2, start_date, start_date_zero) = batch_args
170
144
 
171
- This optimized version groups items by reorder frequency for better performance
172
- with large datasets.
145
+ results = []
146
+ processed_count = 0
147
+ error_count = 0
173
148
 
174
- Returns:
175
- dict: Dictionary mapping item (or (item, location) tuple) to list of
176
- reorder dates in 'YYYYMMDD' format.
177
-
178
- Example:
179
- {
180
- 'ITEM001': ['20240101', '20240115', '20240214', ...],
181
- ('ITEM002', 'LOC1'): ['20240101', '20240120', '20240219', ...]
182
- }
183
- """
184
- # Determine the starting date for period 0
185
- if self.start_date_zero is not None:
186
- # Use custom start date for period 0
187
- actual_date = pd.to_datetime(self.start_date_zero, format='%Y-%m-%d')
149
+ for item_data in batch_items:
150
+ key, current_df_lead_time, current_df_inv = item_data
151
+
152
+ try:
153
+ # Generate dates for this item locally in the worker process
154
+ dates = _generate_item_dates_worker(
155
+ key, current_df_lead_time, periods, period2, start_date,
156
+ start_date_zero, default_coverage, location
157
+ )
158
+
159
+ # Procesar este ítem usando la lógica completa con timeout implícito
160
+ item_result = _process_item_complete(
161
+ key, dates, current_df_lead_time, current_df_inv,
162
+ df_fcst, df_prep, metadata, location, default_coverage,
163
+ complete_suggested, security_stock_ref, integer,
164
+ df_transit
165
+ )
166
+
167
+ if item_result is not None and not item_result.empty:
168
+ results.append(item_result)
169
+ processed_count += 1
170
+ else:
171
+ error_count += 1
172
+
173
+ except Exception as e:
174
+ error_count += 1
175
+ if verbose and error_count <= 3: # Limit error messages to avoid spam
176
+ print(f"⚠️ Error procesando {key}: {str(e)[:100]}...")
177
+ continue
178
+
179
+ # Log batch summary if there were errors
180
+ if verbose and error_count > 0:
181
+ print(f"📊 Batch summary: {processed_count} processed, {error_count} errors")
182
+
183
+ # Combine all items in this batch
184
+ if results:
185
+ return pd.concat(results, ignore_index=True)
188
186
  else:
189
- # Use current system date for period 0 (original behavior)
190
- DOCKER_CONFIG_PATH = os.path.join('/opt/ml/processing/input', 'config.yml')
191
- utils = Utils(config_file=DOCKER_CONFIG_PATH, logfile='data_io', log_path='output/logs')
192
- timestamp = utils.set_timestamp()
193
- actual_date = pd.to_datetime(str(int(float(timestamp[0:8]))), format='%Y%m%d')
194
-
195
- end_date = actual_date + pd.DateOffset(months=self.periods)
187
+ return pd.DataFrame()
188
+
189
+ except Exception as e:
190
+ print(f"❌ Error crítico en batch: {str(e)}")
191
+ return pd.DataFrame()
196
192
 
197
- # Get unique items with their reorder frequencies
198
- columns = self.metadata + ['ReorderFreq']
199
- df_unique = self.df_lead_time[columns].drop_duplicates().copy()
193
+
194
+ def _process_item_complete(key, dates, current_df_lead_time, current_df_inv,
195
+ df_fcst, df_prep, metadata, location, default_coverage,
196
+ complete_suggested, security_stock_ref, integer, df_transit=None):
197
+ """
198
+ Process a single item through all periods with complete functionality.
199
+
200
+ This function handles the complete inventory reorder calculation for a single item
201
+ across all time periods. It optimizes performance by pre-allocating data structures
202
+ and reducing repetitive calls to forecast and daily usage calculations.
203
+
204
+ The process includes:
205
+ 1. Calculating suggested forecasts for each period
206
+ 2. Computing daily usage rates (average and maximum)
207
+ 3. Determining security stock requirements
208
+ 4. Processing current period inventory
209
+ 5. Calculating future period reorder needs
210
+ 6. Managing transit order schedules
211
+ 7. Computing final inventory metrics
212
+
213
+ Args:
214
+ key: Item identifier (str) or (item, location) tuple
215
+ dates: List of calculation dates in 'YYYYMMDD' format
216
+ current_df_lead_time: Lead time data for this item
217
+ current_df_inv: Current inventory data for this item
218
+ df_fcst: Forecast data DataFrame
219
+ df_prep: Preparation data DataFrame
220
+ metadata: List of metadata columns
221
+ location: Boolean indicating location-based processing
222
+ default_coverage: Default coverage days
223
+ complete_suggested: Boolean for complete suggested forecast mode
224
+ security_stock_ref: Boolean for reference-based security stock
225
+ integer: Boolean for integer formatting
226
+ df_transit: Transit schedule DataFrame (optional)
227
+
228
+ Returns:
229
+ pd.DataFrame: Complete reorder calculations for all periods of this item
230
+ """
231
+ try:
232
+ # Pre-allocate dictionaries for intermediate results
233
+ suggested_forecasts = {}
234
+ df_avgs = {}
235
+ df_maxs = {}
236
+ df_sstocks = {}
237
+ period_results = {}
200
238
 
201
- # Process ReorderFreq values
202
- df_unique['ReorderFreq'] = df_unique['ReorderFreq'].fillna(self.default_coverage)
203
- df_unique.loc[df_unique['ReorderFreq'] == 0, 'ReorderFreq'] = self.default_coverage
204
- df_unique['ReorderFreq'] = df_unique['ReorderFreq'].astype(int)
239
+ # Initialize transit orders for this item
240
+ transit_orders = {key: []}
205
241
 
206
- # Pre-allocate result dictionary
207
- item_dates = {}
242
+ # Track last suggested forecast value for complete_suggested feature
243
+ last_suggested_value = None
208
244
 
209
- # Group by ReorderFreq for batch processing - more efficient for large datasets
210
- for freq, group in df_unique.groupby('ReorderFreq'):
211
- # Generate date range for this frequency
212
- date_range = []
213
-
214
- # Always include actual date
215
- date_range.append(actual_date)
245
+ # Pre-calculate common values to avoid repeated calculations
246
+ coverage = current_df_lead_time['Coverage'].iloc[0] if 'Coverage' in current_df_lead_time.columns else default_coverage
247
+ if pd.isna(coverage):
248
+ coverage = default_coverage
249
+
250
+ reorder_freq = current_df_lead_time['ReorderFreq'].iloc[0]
251
+ if pd.isna(reorder_freq) or reorder_freq == 0:
252
+ reorder_freq = default_coverage
253
+
254
+ # Process each period with optimized error handling
255
+ for i, date in enumerate(dates):
256
+ try:
257
+ # Calculate suggested forecast with better error handling
258
+ suggested_forecasts[i] = _calculate_suggested_forecast_complete(
259
+ current_df_lead_time, current_df_inv, date, last_suggested_value,
260
+ df_fcst, df_prep, metadata, location, default_coverage, complete_suggested
261
+ )
262
+
263
+ # Update last_suggested_value for next iteration
264
+ if 'SuggestedForecast' in suggested_forecasts[i].columns:
265
+ new_suggested_value = suggested_forecasts[i]['SuggestedForecast'].iloc[0]
266
+
267
+ # Only update if the new value is not NaN
268
+ if not pd.isna(new_suggested_value):
269
+ last_suggested_value = new_suggested_value
270
+
271
+ # Calculate daily usage with optimized calls
272
+ df_avgs[i], df_maxs[i] = _calculate_daily_usage_complete(
273
+ suggested_forecasts[i], date, df_fcst, location
274
+ )
275
+
276
+ # Calculate security stock data with pre-calculated values
277
+ df_sstocks[i] = _calculate_security_stock_data_complete(
278
+ df_maxs[i], current_df_lead_time, default_coverage, i, dates
279
+ )
280
+
281
+ # Process period based on whether it's current or future
282
+ if i == 0:
283
+ period_results[i] = _process_current_period_complete(
284
+ current_df_inv, df_sstocks[i], key, date, transit_orders, dates,
285
+ metadata, integer, security_stock_ref, df_transit
286
+ )
287
+ else:
288
+ period_results[i] = _process_future_period_complete(
289
+ current_df_inv, df_sstocks[i], period_results[i-1],
290
+ key, date, dates, i, transit_orders, metadata, integer, security_stock_ref
291
+ )
292
+
293
+ # Add metadata columns efficiently
294
+ period_results[i]['Date'] = date
295
+ if location:
296
+ item, loc = key
297
+ period_results[i]['Item'] = item
298
+ period_results[i]['Location'] = loc
299
+ else:
300
+ period_results[i]['Item'] = key
301
+
302
+ except Exception as e:
303
+ # Log error but continue with next period
304
+ import traceback
305
+ tb = traceback.extract_tb(e.__traceback__)
306
+ function_name = tb[-1].name if tb else 'unknown'
307
+ line_number = tb[-1].lineno if tb else 'unknown'
308
+
309
+ print(f"Warning: Error processing period {i} for item {key}:")
310
+ print(f" Function: {function_name} (line {line_number})")
311
+ print(f" Error: {str(e)}")
312
+ print(f" Error type: {type(e).__name__}")
313
+ print(f" Date value: {repr(date)} (type: {type(date)})")
314
+ print(f" Period index: {i}")
315
+ print(f" Total dates available: {len(dates)}")
316
+
317
+ # Print more context for debugging
318
+ if hasattr(e, '__cause__') and e.__cause__:
319
+ print(f" Caused by: {str(e.__cause__)}")
320
+
321
+ # Print the full traceback for error analysis
322
+ print(f" Full traceback:")
323
+ traceback.print_exc()
324
+
325
+ continue
326
+
327
+ # After processing all periods, update FutureInventoryTransitArrival
328
+ for i in range(len(dates)):
329
+ if i < len(dates) - 1: # If there's a next period
330
+ # Get next period's TransitArrival
331
+ next_transit_arrival = period_results[i + 1]['TransitArrival'].iloc[0]
332
+ transit_arrival_sum = _sum_transit_arrivals(next_transit_arrival)
333
+ else: # Last period - no next period
334
+ transit_arrival_sum = 0
216
335
 
217
- # Include start_date if after actual_date
218
- if self.start_date > actual_date:
219
- date_range.append(self.start_date)
336
+ # Update FutureInventoryTransitArrival
337
+ period_results[i]['FutureInventoryTransitArrival'] = _format_value_complete(
338
+ period_results[i]['FutureInventory'].iloc[0] + transit_arrival_sum,
339
+ 'FutureInventoryTransitArrival', integer
340
+ )
220
341
 
221
- # Generate subsequent dates using pandas date_range for efficiency
222
- num_periods = int((end_date - self.start_date).days / freq) + 1
223
- future_dates = pd.date_range(
224
- start=self.start_date + timedelta(days=freq),
225
- periods=num_periods,
226
- freq=f'{freq}D'
342
+ # Recalculate FutureStockoutDays with the updated FutureInventoryTransitArrival
343
+ period_results[i]['FutureStockoutDays'] = _calculate_inventory_days_complete(
344
+ period_results[i], integer
227
345
  )
228
- date_range.extend(future_dates[future_dates <= end_date])
346
+
347
+ # Combine all periods for this item
348
+ if period_results:
349
+ # Stack all period results at once
350
+ item_df = pd.concat(period_results.values(), ignore_index=True)
229
351
 
230
- # Convert to string format
231
- date_strings = [d.strftime('%Y%m%d') for d in date_range]
352
+ # Reorder columns for consistency
353
+ cols = ['Date', 'Item']
354
+ if location:
355
+ cols.append('Location')
356
+ other_cols = [col for col in item_df.columns if col not in cols]
357
+ item_df = item_df[cols + other_cols]
232
358
 
233
- # Assign to all items in this group
234
- for _, row in group.iterrows():
235
- if self.location:
236
- key = (row['Item'], row['Location'])
237
- else:
238
- key = row['Item']
239
- item_dates[key] = date_strings
359
+ return item_df
240
360
 
241
- return item_dates
242
-
243
-
244
- def _get_current_dataframes(self, item, location=None):
245
- """
246
- Get filtered dataframes for current item/location combination.
361
+ return None
247
362
 
248
- Args:
249
- item (str): Item identifier to filter for
250
- location (str, optional): Location identifier if using multi-location mode
251
-
252
- Returns:
253
- tuple: (current_df_lead_time, current_df_inv)
254
- - current_df_lead_time: Lead time data filtered for item/location
255
- - current_df_inv: Inventory data filtered for item/location
256
- """
257
- # Create filter mask based on item
258
- mask_lead_time = self.df_lead_time['Item'] == item
259
- mask_inv = self.df_inv['Item'] == item
363
+ except Exception as e:
364
+ # Handle any unexpected errors at the item level
365
+ import traceback
366
+ tb = traceback.extract_tb(e.__traceback__)
367
+ function_name = tb[-1].name if tb else 'unknown'
368
+ line_number = tb[-1].lineno if tb else 'unknown'
369
+
370
+ print(f"Error processing item {key}:")
371
+ print(f" Function: {function_name} (line {line_number})")
372
+ print(f" Error: {str(e)}")
373
+ print(f" Error type: {type(e).__name__}")
374
+ print(f" Item dates: {dates[:5] if dates else 'None'}... (showing first 5)")
375
+ print(f" Total dates: {len(dates) if dates else 0}")
260
376
 
261
- # Add location filter if needed
262
- if self.location and location is not None:
263
- mask_lead_time &= self.df_lead_time['Location'] == location
264
- mask_inv &= self.df_inv['Location'] == location
377
+ # Print more context for debugging
378
+ if hasattr(e, '__cause__') and e.__cause__:
379
+ print(f" Caused by: {str(e.__cause__)}")
265
380
 
266
- # Apply filters using boolean indexing
267
- current_df_lead_time = self.df_lead_time[mask_lead_time]
268
- current_df_inv = self.df_inv[mask_inv]
381
+ # Print the full traceback for error analysis
382
+ print(f" Full traceback:")
383
+ traceback.print_exc()
269
384
 
270
- return current_df_lead_time, current_df_inv
385
+ return None
271
386
 
272
387
 
273
- def _calculate_suggested_forecast(self, current_df_lead_time, current_df_inv, date, last_suggested_value=None):
274
- """
275
- Calculate suggested forecast for the given date using the SuggestedForecast class.
388
+ def _format_value_complete(value, field_name, integer):
389
+ """Apply appropriate formatting based on field type and integer setting."""
390
+ # Handle pandas Series - extract scalar value
391
+ if isinstance(value, pd.Series):
392
+ if len(value) == 1:
393
+ value = value.iloc[0]
394
+ else:
395
+ raise ValueError(f"Expected scalar value for {field_name}, got Series with {len(value)} elements")
396
+
397
+ # Handle NaN, None, and infinite values
398
+ if pd.isna(value) or value is None:
399
+ return 0
400
+ if np.isinf(value):
401
+ return 0
402
+
403
+ # Fields that are ALWAYS integers
404
+ always_integer_fields = [
405
+ 'PurchaseFactor', 'AvgLeadTime', 'MaxLeadTime',
406
+ 'ReorderQtyDays', 'ReorderFreq', 'Coverage', 'FutureStockoutDays'
407
+ ]
408
+
409
+ # Fields that are ALWAYS decimals (2 decimal places)
410
+ always_decimal_fields = ['AvgDailyUsage', 'MaxDailyUsage']
411
+
412
+ # Fields that change based on integer setting
413
+ quantity_fields = [
414
+ 'FutureInventoryTransit', 'FutureInventory', 'FutureTransit',
415
+ 'FutureInventoryTransitArrival', 'SuggestedForecast', 'SuggestedForecastPeriod',
416
+ 'ReorderPoint', 'ReorderQtyBase', 'ReorderQty', 'SecurityStock', 'Inventory', 'Transit'
417
+ ]
418
+
419
+ if field_name in always_integer_fields:
420
+ return int(round(value))
421
+ elif field_name in always_decimal_fields:
422
+ return round(value, 2)
423
+ elif field_name in quantity_fields:
424
+ if integer:
425
+ return int(round(value))
426
+ else:
427
+ return round(value, 2)
428
+ else:
429
+ # Default: return as is
430
+ return value
431
+
432
+
433
+ def _suggested_forecast_fallback(current_df_lead_time, current_df_inv, date, df_fcst, metadata, location, default_coverage):
434
+ """
435
+ Simplified SuggestedForecast fallback function for multiprocessing compatibility.
436
+
437
+ This function provides a basic forecast calculation when the main SuggestedForecast class fails
438
+ due to multiprocessing issues. It calculates the forecast using an average-based approach:
439
+
440
+ 1. Sum all forecasts in the coverage period
441
+ 2. Calculate daily average (sum / total_days_in_period)
442
+ 3. Multiply by coverage days
443
+ 4. Round up to nearest integer
444
+
445
+ Args:
446
+ current_df_lead_time: Lead time DataFrame for this item
447
+ current_df_inv: Inventory DataFrame for this item
448
+ date: Date string in 'YYYYMMDD' format
449
+ df_fcst: Forecast DataFrame
450
+ metadata: List of metadata columns
451
+ location: Boolean indicating location-based processing
452
+ default_coverage: Default coverage days
453
+
454
+ Returns:
455
+ pd.DataFrame: DataFrame with SuggestedForecast column
456
+ """
457
+ try:
458
+ # Parse the date
459
+ current_date = pd.to_datetime(date, format='%Y%m%d')
276
460
 
277
- This method now validates that sufficient forecast data exists to cover the
278
- required coverage period. If forecast data doesn't extend far enough into
279
- the future, it either raises an error or uses the last calculated value
280
- based on the complete_suggested parameter.
461
+ # Get coverage for this item
462
+ coverage = current_df_lead_time['Coverage'].iloc[0] if 'Coverage' in current_df_lead_time.columns else default_coverage
463
+ if pd.isna(coverage):
464
+ coverage = default_coverage
465
+ coverage = int(coverage)
281
466
 
282
- Args:
283
- current_df_lead_time (pd.DataFrame): Lead time data for current item
284
- current_df_inv (pd.DataFrame): Inventory data for current item
285
- date (str): Date for forecast calculation in 'YYYYMMDD' format
286
- last_suggested_value (float, optional): Last calculated SuggestedForecast value
287
- to use when complete_suggested is True and forecast data is insufficient
288
-
289
- Returns:
290
- pd.DataFrame: DataFrame containing suggested forecast values
291
-
292
- Raises:
293
- ValueError: If forecast data doesn't extend far enough to cover the required period
294
- and complete_suggested is False or no previous value is available
295
- """
296
- # Convert current date to datetime
297
- current_date = pd.to_datetime(date, format='%Y%m%d')
467
+ # Calculate forecast end date
468
+ forecast_end_date = current_date + timedelta(days=coverage)
298
469
 
299
- # Get the maximum forecast date available
300
- max_forecast_date = self.df_fcst['Date'].max()
470
+ # Filter forecast data for this item and date range
471
+ if location:
472
+ item = current_df_inv['Item'].iloc[0]
473
+ loc = current_df_inv['Location'].iloc[0]
474
+ forecast_mask = (df_fcst['Item'] == item) & (df_fcst['Location'] == loc)
475
+ else:
476
+ item = current_df_inv['Item'].iloc[0]
477
+ forecast_mask = df_fcst['Item'] == item
301
478
 
302
- # Get coverage value for this item
303
- coverage = current_df_lead_time['Coverage'].iloc[0]
479
+ # Add date range filter - get all forecast data for this item
480
+ forecast_mask &= (df_fcst['Date'] >= current_date) & (df_fcst['Date'] <= forecast_end_date)
304
481
 
305
- # Calculate the required forecast end date
306
- required_forecast_end_date = current_date + timedelta(days=int(coverage))
482
+ item_forecast = df_fcst[forecast_mask]
307
483
 
308
- # Check if we have sufficient forecast data
309
- if max_forecast_date < required_forecast_end_date:
310
- # Get item identifier for error message
484
+ # Calculate suggested forecast using average-based approach
485
+ if not item_forecast.empty and 'Forecast' in item_forecast.columns:
486
+ # Step 1: Sum all forecasts in the period
487
+ total_forecast = item_forecast['Forecast'].sum()
488
+
489
+ # Step 2: Calculate total days in the forecast period
490
+ # Simplification: assume 30 days per month for calculation
491
+ total_days_in_period = len(item_forecast) # Number of forecast records
492
+ if total_days_in_period == 0:
493
+ suggested_forecast = 0.0
494
+ else:
495
+ # Step 3: Calculate daily average
496
+ daily_average = total_forecast / total_days_in_period
497
+
498
+ # Step 4: Multiply by coverage days
499
+ suggested_forecast = daily_average * coverage
500
+
501
+ # Step 5: Round up to nearest integer
502
+ suggested_forecast = np.ceil(suggested_forecast)
503
+
504
+
505
+ else:
506
+ # Fallback: use 0 if no forecast data available
507
+ suggested_forecast = 0.0
311
508
  item = current_df_inv['Item'].iloc[0]
312
509
  location_msg = ""
313
- if self.location and 'Location' in current_df_inv.columns:
314
- location = current_df_inv['Location'].iloc[0]
315
- location_msg = f" at location {location}"
316
-
317
- if self.complete_suggested:
318
- if last_suggested_value is not None:
319
- # Use the last calculated SuggestedForecast value
320
- # Create a DataFrame with the same structure as the normal output
321
- result_df = current_df_inv[self.metadata].copy()
322
- result_df['SuggestedForecast'] = last_suggested_value
323
-
324
- # Add PurchaseFactor and ItemDescription from inventory data
325
- if 'PurchaseFactor' in current_df_inv.columns:
326
- result_df['PurchaseFactor'] = current_df_inv['PurchaseFactor'].iloc[0]
327
- else:
328
- result_df['PurchaseFactor'] = 1 # Default value if not present
329
-
330
- if 'ItemDescription' in current_df_inv.columns:
331
- result_df['ItemDescription'] = current_df_inv['ItemDescription'].iloc[0]
332
- else:
333
- result_df['ItemDescription'] = '' # Default value if not present
510
+ if location and 'Location' in current_df_inv.columns:
511
+ loc = current_df_inv['Location'].iloc[0]
512
+ location_msg = f" at location {loc}"
513
+ print(f" ⚠️ No forecast data found for item {item}{location_msg}, using 0")
514
+
515
+ # Create result DataFrame
516
+ result_df = current_df_inv[metadata].copy()
517
+ result_df['SuggestedForecast'] = suggested_forecast
518
+
519
+ # Add required columns
520
+ result_df['PurchaseFactor'] = current_df_inv.get('PurchaseFactor', pd.Series([1])).iloc[0]
521
+ result_df['ItemDescription'] = current_df_inv.get('ItemDescription', pd.Series([''])).iloc[0]
522
+
523
+ return result_df
524
+
525
+ except Exception as e:
526
+ print(f" ❌ Fallback SuggestedForecast also failed: {str(e)}")
527
+ # Last resort: return basic structure with 0 forecast
528
+ result_df = current_df_inv[metadata].copy()
529
+ result_df['SuggestedForecast'] = 0.0
530
+ result_df['PurchaseFactor'] = current_df_inv.get('PurchaseFactor', pd.Series([1])).iloc[0]
531
+ result_df['ItemDescription'] = current_df_inv.get('ItemDescription', pd.Series([''])).iloc[0]
532
+ return result_df
334
533
 
335
- return result_df
336
- else:
337
- # For the first period when complete_suggested=True but no previous value exists,
338
- # try to calculate with available data up to max_forecast_date
339
- # This allows at least the first period to be calculated
534
+
535
+ def _calculate_suggested_forecast_complete(current_df_lead_time, current_df_inv, date, last_suggested_value,
536
+ df_fcst, df_prep, metadata, location, default_coverage, complete_suggested):
537
+ """Calculate suggested forecast for the given date using the SuggestedForecast class."""
538
+ # Convert current date to datetime
539
+ try:
540
+ current_date = pd.to_datetime(date, format='%Y%m%d')
541
+ except Exception as e:
542
+ raise ValueError(f"_calculate_suggested_forecast_complete: Invalid date '{date}' - {str(e)}")
543
+
544
+ # Get the maximum forecast date available
545
+ max_forecast_date = df_fcst['Date'].max()
546
+
547
+ # Get coverage value for this item
548
+ coverage = current_df_lead_time['Coverage'].iloc[0] if 'Coverage' in current_df_lead_time.columns else default_coverage
549
+ if pd.isna(coverage):
550
+ coverage = default_coverage
551
+
552
+ # Calculate the required forecast end date
553
+ required_forecast_end_date = current_date + timedelta(days=int(coverage))
554
+
555
+ # Check if we have sufficient forecast data
556
+ if max_forecast_date < required_forecast_end_date:
557
+ if complete_suggested:
558
+ if last_suggested_value is not None:
559
+ # Use the last calculated SuggestedForecast value
560
+ result_df = current_df_inv[metadata].copy()
561
+ result_df['SuggestedForecast'] = last_suggested_value
562
+
563
+ # Add PurchaseFactor and ItemDescription from inventory data using safe access
564
+ result_df['PurchaseFactor'] = current_df_inv.get('PurchaseFactor', pd.Series([1])).iloc[0]
565
+ result_df['ItemDescription'] = current_df_inv.get('ItemDescription', pd.Series([''])).iloc[0]
566
+
567
+ return result_df
568
+ else:
569
+ # For the first period when complete_suggested=True but no previous value exists
570
+ try:
571
+ return SuggestedForecast(
572
+ df_LeadTimes=current_df_lead_time,
573
+ df_Forecast=df_fcst,
574
+ df_Prep=df_prep,
575
+ df_inv=current_df_inv,
576
+ column_forecast='SuggestedForecast',
577
+ columns_metadata=metadata,
578
+ frequency_='M',
579
+ location=location,
580
+ actualdate=date,
581
+ default_coverage_=default_coverage,
582
+ join_='left'
583
+ ).suggested_forecast()
584
+ except Exception as e:
585
+ print(f" ❌ Initial calculation failed: {str(e)}")
586
+ print(f" 🔄 Attempting fallback SuggestedForecast calculation...")
587
+
340
588
  try:
341
- return SuggestedForecast(
342
- df_LeadTimes=current_df_lead_time,
343
- df_Forecast=self.df_fcst,
344
- df_Prep=self.df_prep,
345
- df_inv=current_df_inv,
346
- column_forecast='SuggestedForecast',
347
- columns_metadata=self.metadata,
348
- frequency_='M',
349
- location=self.location,
350
- actualdate=date,
351
- default_coverage_=self.default_coverage,
352
- join_='left'
353
- ).suggested_forecast()
354
- except Exception as e:
355
- # If even the basic calculation fails, raise a more informative error
589
+ # Use simplified fallback function
590
+ fallback_result = _suggested_forecast_fallback(
591
+ current_df_lead_time, current_df_inv, date, df_fcst,
592
+ metadata, location, default_coverage
593
+ )
594
+
595
+ return fallback_result
596
+
597
+ except Exception as fallback_error:
598
+ print(f" ❌ Fallback initial calculation also failed: {str(fallback_error)}")
599
+
600
+ # Get item identifier for error message
601
+ item = current_df_inv['Item'].iloc[0]
602
+ location_msg = ""
603
+ if location and 'Location' in current_df_inv.columns:
604
+ loc = current_df_inv['Location'].iloc[0]
605
+ location_msg = f" at location {loc}"
606
+
356
607
  error_msg = (
357
608
  f"Cannot calculate initial forecast for item {item}{location_msg}. "
358
609
  f"Forecast data extends only to {max_forecast_date.strftime('%Y-%m-%d')}, "
@@ -361,574 +612,793 @@ class FutureReorder():
361
612
  f"Original error: {str(e)}"
362
613
  )
363
614
  raise ValueError(error_msg)
364
- else:
365
- error_msg = (
366
- f"Insufficient forecast data for item {item}{location_msg}. "
367
- f"Forecast data extends only to {max_forecast_date.strftime('%Y-%m-%d')}, "
368
- f"but coverage of {int(coverage)} days from {current_date.strftime('%Y-%m-%d')} "
369
- f"requires forecast data until {required_forecast_end_date.strftime('%Y-%m-%d')}."
370
- )
371
- raise ValueError(error_msg)
372
-
373
- # If validation passes, proceed with the original calculation
374
- return SuggestedForecast(
615
+ else:
616
+ # Get item identifier for error message
617
+ item = current_df_inv['Item'].iloc[0]
618
+ location_msg = ""
619
+ if location and 'Location' in current_df_inv.columns:
620
+ loc = current_df_inv['Location'].iloc[0]
621
+ location_msg = f" at location {loc}"
622
+
623
+ error_msg = (
624
+ f"Insufficient forecast data for item {item}{location_msg}. "
625
+ f"Forecast data extends only to {max_forecast_date.strftime('%Y-%m-%d')}, "
626
+ f"but coverage of {int(coverage)} days from {current_date.strftime('%Y-%m-%d')} "
627
+ f"requires forecast data until {required_forecast_end_date.strftime('%Y-%m-%d')}."
628
+ )
629
+ raise ValueError(error_msg)
630
+
631
+ # If validation passes, proceed with the original calculation
632
+ try:
633
+ result = SuggestedForecast(
375
634
  df_LeadTimes=current_df_lead_time,
376
- df_Forecast=self.df_fcst,
377
- df_Prep=self.df_prep,
635
+ df_Forecast=df_fcst,
636
+ df_Prep=df_prep,
378
637
  df_inv=current_df_inv,
379
638
  column_forecast='SuggestedForecast',
380
- columns_metadata=self.metadata,
639
+ columns_metadata=metadata,
381
640
  frequency_='M',
382
- location=self.location,
641
+ location=location,
383
642
  actualdate=date,
384
- default_coverage_=self.default_coverage,
643
+ default_coverage_=default_coverage,
385
644
  join_='left'
386
645
  ).suggested_forecast()
387
-
388
-
389
- def _calculate_daily_usage(self, suggested_forecast_df, date):
390
- """
391
- Calculate average and maximum daily usage rates.
392
646
 
393
- This method computes both average and maximum daily consumption rates
394
- which are used for inventory planning and safety stock calculations.
647
+
648
+ return result
649
+
650
+ except Exception as e:
651
+ print(f" ❌ Normal calculation failed: {str(e)}")
652
+ print(f" 🔄 Attempting fallback SuggestedForecast calculation...")
395
653
 
396
- Args:
397
- suggested_forecast_df (pd.DataFrame): DataFrame with forecast data
398
- date (str): Current calculation date in 'YYYYMMDD' format
654
+ try:
655
+ # Use simplified fallback function
656
+ fallback_result = _suggested_forecast_fallback(
657
+ current_df_lead_time, current_df_inv, date, df_fcst,
658
+ metadata, location, default_coverage
659
+ )
399
660
 
400
- Returns:
401
- tuple: (df_avg, df_max)
402
- - df_avg: DataFrame with average daily usage
403
- - df_max: DataFrame with maximum daily usage
404
- """
661
+
662
+ return fallback_result
663
+
664
+ except Exception as fallback_error:
665
+ print(f" ❌ Fallback calculation also failed: {str(fallback_error)}")
666
+ # Re-raise the original error
667
+ raise e
668
+
669
+
670
+ def _calculate_daily_usage_complete(suggested_forecast_df, date, df_fcst, location):
671
+ """Calculate average and maximum daily usage rates."""
672
+
673
+ try:
405
674
  df_avg = DailyUsageFuture(
406
- location=self.location,
675
+ location=location,
407
676
  column_forecast='SuggestedForecast',
408
677
  date=date,
409
- df_fcst=self.df_fcst
678
+ df_fcst=df_fcst
410
679
  ).daily_usage(suggested_forecast_df, 'AvgDailyUsage').fillna(0)
411
-
680
+
412
681
  df_max = DailyUsageFuture(
413
- location=self.location,
682
+ location=location,
414
683
  column_forecast='SuggestedForecast',
415
684
  date=date,
416
- df_fcst=self.df_fcst
685
+ df_fcst=df_fcst
417
686
  ).daily_usage(df_avg, 'MaxDailyUsage').fillna(0)
418
687
 
419
- return df_avg, df_max
420
-
421
-
422
- def _calculate_security_stock_data(self, df_max, current_df_lead_time, period_index=None, dates=None):
423
- """
424
- Calculate security stock related data and prepare for reorder calculations.
425
-
426
- This method:
427
- 1. Merges daily usage with lead time data
428
- 2. Determines effective reorder frequency and coverage
429
- 3. Calculates SuggestedForecastPeriod based on coverage ratio
430
- 4. For period 0, uses days to next period instead of reorder frequency
688
+ except Exception as e:
689
+ print(f" ❌ DailyUsageFuture error: {str(e)}")
690
+ print(f" ❌ Error type: {type(e).__name__}")
431
691
 
432
- Args:
433
- df_max (pd.DataFrame): DataFrame with maximum daily usage
434
- current_df_lead_time (pd.DataFrame): Lead time data for current item
435
- period_index (int, optional): Current period index (0, 1, 2, ...)
436
- dates (list, optional): List of dates for this item
437
-
438
- Returns:
439
- pd.DataFrame: DataFrame with merged data and calculated fields:
440
- - All fields from df_max
441
- - AvgLeadTime, MaxLeadTime from lead time data
442
- - SuggestedForecastPeriod: Adjusted forecast for the period
443
- """
444
- merge_columns = ['Item', 'Location', 'AvgLeadTime', 'MaxLeadTime'] if self.location else ['Item', 'AvgLeadTime', 'MaxLeadTime']
445
- df_sstock = pd.merge(df_max, current_df_lead_time[merge_columns], on=self.metadata, how='inner').drop_duplicates()
692
+ # Print more detailed error info
693
+ import traceback
694
+ print(f" ❌ Full traceback:")
695
+ traceback.print_exc()
446
696
 
447
- # Get ReorderFreq and Coverage
448
- reorder_freq = current_df_lead_time['ReorderFreq'].values[0]
449
- if pd.isnull(reorder_freq) or reorder_freq == 0:
450
- reorder_freq = self.default_coverage
451
-
452
- coverage = self.default_coverage
453
- if 'Coverage' in current_df_lead_time.columns:
454
- coverage_val = current_df_lead_time['Coverage'].values[0]
455
- if not pd.isnull(coverage_val):
456
- coverage = coverage_val
457
- else:
458
- coverage = reorder_freq + df_sstock['AvgLeadTime'].values[0]
697
+ # Re-raise the original error to maintain the error flow
698
+ raise e
699
+
700
+ return df_avg, df_max
701
+
702
+
703
+ def _calculate_security_stock_data_complete(df_max, current_df_lead_time, default_coverage, period_index, dates):
704
+ """
705
+ Calculate security stock related data and prepare for reorder calculations.
706
+
707
+ This function merges daily usage data with lead time information and calculates
708
+ the suggested forecast period based on coverage ratios. For period 0, it uses
709
+ days to the next period instead of reorder frequency for more accurate consumption.
710
+
711
+ The process includes:
712
+ 1. Merging daily usage with lead time data
713
+ 2. Determining effective reorder frequency and coverage
714
+ 3. Calculating SuggestedForecastPeriod based on coverage ratio
715
+ 4. Special handling for period 0 using actual days to next period
716
+
717
+ Args:
718
+ df_max: DataFrame with maximum daily usage
719
+ current_df_lead_time: Lead time data for current item
720
+ default_coverage: Default coverage days
721
+ period_index: Current period index (0, 1, 2, ...)
722
+ dates: List of dates for this item
723
+
724
+ Returns:
725
+ pd.DataFrame: DataFrame with merged data and calculated fields including
726
+ SuggestedForecastPeriod adjusted for the specific period
727
+ """
728
+ metadata = ['Item', 'Location'] if 'Location' in df_max.columns else ['Item']
729
+ merge_columns = ['Item', 'Location', 'AvgLeadTime', 'MaxLeadTime'] if 'Location' in df_max.columns else ['Item', 'AvgLeadTime', 'MaxLeadTime']
730
+ df_sstock = pd.merge(df_max, current_df_lead_time[merge_columns], on=metadata, how='inner').drop_duplicates()
731
+
732
+ # Get ReorderFreq and Coverage
733
+ reorder_freq = current_df_lead_time['ReorderFreq'].values[0]
734
+ if pd.isnull(reorder_freq) or reorder_freq == 0:
735
+ reorder_freq = default_coverage
736
+
737
+ coverage = default_coverage
738
+ if 'Coverage' in current_df_lead_time.columns:
739
+ coverage_val = current_df_lead_time['Coverage'].values[0]
740
+ if not pd.isnull(coverage_val):
741
+ coverage = coverage_val
459
742
  else:
460
743
  coverage = reorder_freq + df_sstock['AvgLeadTime'].values[0]
461
-
462
- # Calculate SuggestedForecastPeriod
463
- if period_index == 0 and dates is not None and len(dates) > 1:
464
- # For period 0, use days to next period instead of reorder frequency
465
- # This allows uniform consumption calculation in all future periods
744
+ else:
745
+ coverage = reorder_freq + df_sstock['AvgLeadTime'].values[0]
746
+
747
+ # Calculate SuggestedForecastPeriod
748
+ if period_index == 0 and dates is not None and len(dates) > 1:
749
+ # For period 0, use days to next period instead of reorder frequency
750
+ try:
751
+ # Validate dates array and indices
752
+ if len(dates) < 2:
753
+ raise ValueError(f"Insufficient dates for period 0 calculation: need at least 2 dates, got {len(dates)}")
754
+
755
+ # Validate date formats before conversion
756
+ if not isinstance(dates[0], str) or len(dates[0]) != 8:
757
+ raise ValueError(f"Invalid dates[0] format: {repr(dates[0])} (expected 8-character string)")
758
+ if not isinstance(dates[1], str) or len(dates[1]) != 8:
759
+ raise ValueError(f"Invalid dates[1] format: {repr(dates[1])} (expected 8-character string)")
760
+
466
761
  current_date = pd.to_datetime(dates[0], format='%Y%m%d')
467
762
  next_date = pd.to_datetime(dates[1], format='%Y%m%d')
468
- days_to_next_period = (next_date - current_date).days
469
763
 
470
- # Formula: SuggestedForecast × (days_to_next_period / coverage)
471
- # This represents the forecasted consumption from period 0 to period 1
472
- suggested_forecast_period = np.ceil(df_sstock['SuggestedForecast'] * (days_to_next_period / coverage))
764
+ except Exception as e:
765
+ error_msg = f"_calculate_security_stock_data_complete: Date processing error - "
766
+ error_msg += f"dates[0]='{dates[0] if len(dates) > 0 else 'MISSING'}' "
767
+ error_msg += f"(type: {type(dates[0]) if len(dates) > 0 else 'N/A'}), "
768
+ error_msg += f"dates[1]='{dates[1] if len(dates) > 1 else 'MISSING'}' "
769
+ error_msg += f"(type: {type(dates[1]) if len(dates) > 1 else 'N/A'}), "
770
+ error_msg += f"period_index={period_index}, dates_length={len(dates)}, "
771
+ error_msg += f"original_error: {str(e)}"
772
+ raise ValueError(error_msg)
773
+ days_to_next_period = (next_date - current_date).days
774
+
775
+ # Formula: SuggestedForecast × (days_to_next_period / coverage)
776
+ suggested_forecast_period = np.ceil(df_sstock['SuggestedForecast'] * (days_to_next_period / coverage))
777
+ else:
778
+ # For other periods, use the original calculation with reorder frequency
779
+ suggested_forecast_period = np.ceil(df_sstock['SuggestedForecast'] * (reorder_freq / coverage))
780
+
781
+ df_sstock['SuggestedForecastPeriod'] = suggested_forecast_period.apply(
782
+ lambda x: int(round(x)) # SuggestedForecastPeriod is always integer
783
+ )
784
+
785
+ return df_sstock
786
+
787
+
788
+ def _calculate_security_stock_complete(df, security_stock_ref, integer):
789
+ """Calculate security stock using configured method. Replicates exactly the logic from future_reorder_optimized."""
790
+ # EXACTLY like future_reorder_optimized line 528-536
791
+ if security_stock_ref:
792
+ if 'SecurityStockDaysRef' in df.columns:
793
+ security_stock_value = df['SecurityStockDaysRef'].iloc[0] * df['AvgDailyUsage'].iloc[0]
794
+ else:
795
+ security_stock_value = 0
796
+ else:
797
+ security_stock_value = (df['MaxDailyUsage'].iloc[0] * df['MaxLeadTime'].iloc[0]) - (df['AvgDailyUsage'].iloc[0] * df['AvgLeadTime'].iloc[0])
798
+
799
+ # Apply formatting and return as scalar
800
+ return _format_value_complete(security_stock_value, 'SecurityStock', integer)
801
+
802
+
803
+ def _calculate_inventory_days_complete(df, integer):
804
+ """Calculate inventory days using configured method."""
805
+ # Calculate future stockout days with safe division
806
+ future_stockout_days = np.where(
807
+ df['AvgDailyUsage'] > 0,
808
+ (df['FutureInventoryTransitArrival'] - df['SecurityStock']) / df['AvgDailyUsage'],
809
+ 0 # If no daily usage, return 0 days
810
+ )
811
+
812
+ # Apply formatting
813
+ return pd.Series(future_stockout_days).apply(lambda x: _format_value_complete(x, 'FutureStockoutDays', integer))
814
+
815
+
816
+ def _sum_transit_arrivals(transit_arrivals_str):
817
+ """Calculate the total quantity from TransitArrival string."""
818
+ if transit_arrivals_str == '[]' or not transit_arrivals_str:
819
+ return 0.0
820
+
821
+ try:
822
+ arrivals = ast.literal_eval(transit_arrivals_str)
823
+ return sum(arrival.get('quantity', 0) for arrival in arrivals)
824
+ except:
825
+ return 0.0
826
+
827
+
828
+ def _prepare_transit_schedule_complete(key, transit_amount, dates, df_transit, location):
829
+ """Prepare transit schedule based on df_transit or default logic."""
830
+ if transit_amount <= 0:
831
+ return []
832
+
833
+ transit_schedule = []
834
+
835
+ if df_transit is None:
836
+ # Default logic: complete transit arrives in period 1
837
+ if len(dates) > 1:
838
+ try:
839
+ arrival_date = pd.to_datetime(dates[1], format='%Y%m%d')
840
+ except Exception as e:
841
+ raise ValueError(f"_prepare_transit_schedule_complete: Invalid date dates[1]='{dates[1]}' - {str(e)}")
842
+ transit_schedule.append({
843
+ 'quantity': transit_amount,
844
+ 'arrival_date': arrival_date
845
+ })
846
+ else:
847
+ # Use provided transit schedule
848
+ if location:
849
+ item, loc = key
850
+ mask = (df_transit['Item'] == item) & (df_transit['Location'] == loc)
851
+ else:
852
+ mask = df_transit['Item'] == key
853
+
854
+ transit_data = df_transit[mask].copy()
855
+
856
+ if not transit_data.empty:
857
+ # Validate total matches
858
+ total_scheduled = transit_data['Transit'].sum()
859
+ if abs(total_scheduled - transit_amount) > 0.01: # Allow small floating point differences
860
+ raise ValueError(f"Transit schedule total ({total_scheduled}) does not match inventory transit ({transit_amount}) for {key}")
861
+
862
+ # Create transit orders
863
+ for _, row in transit_data.iterrows():
864
+ arrival_date = pd.to_datetime(row['ArrivalDate'], format='%Y-%m-%d')
865
+ transit_schedule.append({
866
+ 'quantity': float(row['Transit']),
867
+ 'arrival_date': arrival_date
868
+ })
473
869
  else:
474
- # For other periods, use the original calculation with reorder frequency
475
- # Formula: SuggestedForecast × (reorder_freq / coverage)
476
- suggested_forecast_period = np.ceil(df_sstock['SuggestedForecast'] * (reorder_freq / coverage))
870
+ # If no transit data provided for this item, use default logic
871
+ if len(dates) > 1:
872
+ try:
873
+ arrival_date = pd.to_datetime(dates[1], format='%Y%m%d')
874
+ except Exception as e:
875
+ raise ValueError(f"_prepare_transit_schedule_complete: Invalid fallback date dates[1]='{dates[1]}' - {str(e)}")
876
+ transit_schedule.append({
877
+ 'quantity': transit_amount,
878
+ 'arrival_date': arrival_date
879
+ })
880
+
881
+ return transit_schedule
882
+
883
+
884
+ def _process_current_period_complete(current_df_inv, df_sstock, key, date, transit_orders, dates, metadata, integer, security_stock_ref=False, df_transit=None):
885
+ """Process inventory for the current period (i=0). Replicates exactly the logic from future_reorder_optimized."""
886
+
887
+ # Get inventory data efficiently - EXACTLY like future_reorder_optimized line 410-414
888
+ try:
889
+ inventory_data = {
890
+ 'FutureInventory': current_df_inv['Inventory'].iloc[0],
891
+ 'FutureTransit': current_df_inv['Transit'].iloc[0],
892
+ 'PurchaseFactor': current_df_inv['PurchaseFactor'].iloc[0] if 'PurchaseFactor' in current_df_inv.columns else 1
893
+ }
894
+ except KeyError as e:
895
+ # Handle missing columns gracefully
896
+ inventory_data = {
897
+ 'FutureInventory': current_df_inv.get('Inventory', pd.Series([0])).iloc[0],
898
+ 'FutureTransit': current_df_inv.get('Transit', pd.Series([0])).iloc[0],
899
+ 'PurchaseFactor': current_df_inv.get('PurchaseFactor', pd.Series([1])).iloc[0]
900
+ }
901
+
902
+ # Vectorized calculations - EXACTLY like future_reorder_optimized line 417-428
903
+ df = df_sstock.copy()
904
+ df['FutureInventory'] = _format_value_complete(inventory_data['FutureInventory'], 'FutureInventory', integer)
905
+ df['FutureTransit'] = _format_value_complete(inventory_data['FutureTransit'], 'FutureTransit', integer)
906
+ df['FutureInventoryTransit'] = _format_value_complete(
907
+ inventory_data['FutureInventory'] + inventory_data['FutureTransit'],
908
+ 'FutureInventoryTransit', integer
909
+ )
910
+ df['PurchaseFactor'] = inventory_data['PurchaseFactor']
911
+
912
+ # Initialize transit orders - EXACTLY like future_reorder_optimized line 430-438
913
+ if key not in transit_orders:
914
+ transit_orders[key] = []
915
+
916
+ # Handle transit schedule
917
+ transit_qty = float(inventory_data['FutureTransit'])
918
+ if transit_qty > 0:
919
+ transit_schedule = _prepare_transit_schedule_complete(key, transit_qty, dates, df_transit, 'Location' in metadata)
920
+ transit_orders[key].extend(transit_schedule)
921
+
922
+ # Set initial values - EXACTLY like future_reorder_optimized line 440-452
923
+ df['TransitArrival'] = '[]'
924
+ df['SecurityStock'] = _calculate_security_stock_complete(df, security_stock_ref, integer)
925
+ df['SuggestedForecast'] = _format_value_complete(df['SuggestedForecast'].iloc[0], 'SuggestedForecast', integer)
926
+ df['ReorderPoint'] = _format_value_complete(
927
+ max(0, df['SuggestedForecast'].iloc[0] + df['SecurityStock'].iloc[0]), 'ReorderPoint', integer
928
+ )
929
+ df['ReorderQtyBase'] = _format_value_complete(
930
+ max(0, df['ReorderPoint'].iloc[0] - df['FutureInventoryTransit'].iloc[0]), 'ReorderQtyBase', integer
931
+ )
932
+ df['ReorderQty'] = 0
933
+ df['ReorderQtyDays'] = 0
934
+ df['ArrivalDate'] = ''
935
+
936
+ return df
937
+
938
+
939
+ def _process_transit_orders_complete(transit_orders, key, current_date, previous_date):
940
+ """Process transit orders and calculate arrivals for the current period."""
941
+ # Get orders for this key, return early if none
942
+ orders = transit_orders.get(key, [])
943
+ if not orders:
944
+ return 0, 0, []
945
+
946
+ new_transit = 0
947
+ remaining_orders = []
948
+ transit_arrivals = []
949
+ stock_from_arrivals = 0
950
+
951
+ for order in orders:
952
+ if order['arrival_date'] > previous_date and order['arrival_date'] <= current_date:
953
+ # Order arrives in this period
954
+ stock_from_arrivals += order['quantity']
955
+ transit_arrivals.append({
956
+ 'quantity': float(order['quantity']),
957
+ 'arrival_date': order['arrival_date'].strftime('%Y-%m-%d')
958
+ })
959
+ else:
960
+ # Order still in transit
961
+ new_transit += order['quantity']
962
+ remaining_orders.append(order)
963
+
964
+ transit_orders[key] = remaining_orders
965
+ return stock_from_arrivals, new_transit, transit_arrivals
966
+
967
+
968
+ def _process_future_period_complete(current_df_inv, df_sstock, df_previous, key, date, dates, i, transit_orders, metadata, integer, security_stock_ref=False):
969
+ """Process inventory for future periods (i>0). Replicates exactly the logic from future_reorder_optimized."""
970
+
971
+ # EXACTLY like future_reorder_optimized line 460-461
972
+ df = df_sstock.copy()
973
+ try:
974
+ df['PurchaseFactor'] = current_df_inv['PurchaseFactor'].iloc[0] if 'PurchaseFactor' in current_df_inv.columns else 1
975
+ except (KeyError, IndexError):
976
+ df['PurchaseFactor'] = 1
977
+
978
+ # Calculate consumption - EXACTLY like future_reorder_optimized line 463-465
979
+ consumption = df_previous['SuggestedForecastPeriod'].iloc[0]
980
+ previous_stock = df_previous['FutureInventory'].iloc[0] - consumption
981
+
982
+ # Process transit orders - EXACTLY like future_reorder_optimized line 467-473
983
+ try:
984
+ # Validate indices before accessing dates array
985
+ if i <= 0:
986
+ raise ValueError(f"Invalid period index {i} for future period processing (must be > 0)")
987
+ if i-1 >= len(dates):
988
+ raise ValueError(f"Previous period index {i-1} is out of bounds for dates array of length {len(dates)}")
989
+
990
+ # Validate date values before conversion
991
+ if not isinstance(date, str) or len(date) != 8:
992
+ raise ValueError(f"Invalid current date format: {repr(date)} (expected 8-character string)")
993
+ if not isinstance(dates[i-1], str) or len(dates[i-1]) != 8:
994
+ raise ValueError(f"Invalid previous date format: {repr(dates[i-1])} (expected 8-character string)")
477
995
 
478
- df_sstock['SuggestedForecastPeriod'] = df_sstock.apply(
479
- lambda row: self._format_value(suggested_forecast_period.iloc[row.name], 'SuggestedForecastPeriod'),
480
- axis=1
481
- )
996
+ current_date = pd.to_datetime(date, format='%Y%m%d')
997
+ previous_date = pd.to_datetime(dates[i-1], format='%Y%m%d')
482
998
 
483
- return df_sstock
999
+ except Exception as e:
1000
+ error_msg = f"_process_future_period_complete: Date processing error - "
1001
+ error_msg += f"current='{date}' (type: {type(date)}), "
1002
+ error_msg += f"previous='{dates[i-1] if i-1 < len(dates) else 'INDEX_OUT_OF_BOUNDS'}' "
1003
+ error_msg += f"(type: {type(dates[i-1]) if i-1 < len(dates) else 'N/A'}), "
1004
+ error_msg += f"period_index={i}, dates_length={len(dates)}, "
1005
+ error_msg += f"original_error: {str(e)}"
1006
+ raise ValueError(error_msg)
1007
+
1008
+ stock_from_arrivals, new_transit, transit_arrivals = _process_transit_orders_complete(
1009
+ transit_orders, key, current_date, previous_date
1010
+ )
1011
+
1012
+ # Vectorized inventory updates - EXACTLY like future_reorder_optimized line 475-482
1013
+ future_stock = max(0, previous_stock + stock_from_arrivals)
1014
+ df['FutureInventory'] = _format_value_complete(future_stock, 'FutureInventory', integer)
1015
+ df['FutureTransit'] = _format_value_complete(new_transit, 'FutureTransit', integer)
1016
+ df['FutureInventoryTransit'] = _format_value_complete(
1017
+ future_stock + new_transit, 'FutureInventoryTransit', integer
1018
+ )
1019
+ df['TransitArrival'] = str(transit_arrivals) if transit_arrivals else '[]'
1020
+
1021
+ # Vectorized reorder calculations - EXACTLY like future_reorder_optimized line 484-508
1022
+ df['SecurityStock'] = _calculate_security_stock_complete(df, security_stock_ref, integer)
1023
+ df['SuggestedForecast'] = _format_value_complete(df['SuggestedForecast'].iloc[0], 'SuggestedForecast', integer)
1024
+ df['ReorderPoint'] = _format_value_complete(
1025
+ max(0, df['SuggestedForecast'].iloc[0] + df['SecurityStock'].iloc[0]), 'ReorderPoint', integer
1026
+ )
1027
+ df['ReorderQtyBase'] = _format_value_complete(
1028
+ max(0, df['ReorderPoint'].iloc[0] - df['FutureInventoryTransit'].iloc[0]), 'ReorderQtyBase', integer
1029
+ )
1030
+
1031
+ # Calculate ReorderQty - EXACTLY like future_reorder_optimized line 494-500
1032
+ reorder_qty_base = df['ReorderQtyBase'].iloc[0]
1033
+ purchase_factor = df['PurchaseFactor'].iloc[0]
1034
+
1035
+ if reorder_qty_base > 0:
1036
+ reorder_qty = np.ceil(reorder_qty_base / purchase_factor) * purchase_factor
1037
+ else:
1038
+ reorder_qty = 0
1039
+
1040
+ df['ReorderQty'] = _format_value_complete(reorder_qty, 'ReorderQty', integer)
1041
+
1042
+ # Calculate ReorderQtyDays - EXACTLY like future_reorder_optimized line 502-508
1043
+ if df['ReorderQty'].iloc[0] > 0 and df['AvgDailyUsage'].iloc[0] > 0:
1044
+ reorder_qty_days = df['ReorderQty'].iloc[0] / df['AvgDailyUsage'].iloc[0]
1045
+ else:
1046
+ reorder_qty_days = 0
1047
+
1048
+ df['ReorderQtyDays'] = _format_value_complete(reorder_qty_days, 'ReorderQtyDays', integer)
1049
+
1050
+ # Handle new orders - EXACTLY like future_reorder_optimized line 510-521
1051
+ if df['ReorderQty'].iloc[0] > 0:
1052
+ avg_lead_time = df['AvgLeadTime'].iloc[0]
1053
+ arrival_date = current_date + timedelta(days=int(avg_lead_time))
1054
+ transit_orders[key].append({
1055
+ 'quantity': float(df['ReorderQty'].iloc[0]),
1056
+ 'arrival_date': arrival_date
1057
+ })
1058
+ df['ArrivalDate'] = arrival_date.strftime('%Y-%m-%d')
1059
+ else:
1060
+ df['ArrivalDate'] = ''
1061
+
1062
+ return df
1063
+
484
1064
 
1065
+ class FutureReorder():
1066
+ """
1067
+ Versión completa optimizada para procesamiento masivo de datasets grandes.
1068
+ Incluye TODA la funcionalidad de la clase original pero optimizada para paralelización.
1069
+
1070
+ Nueva funcionalidad period2:
1071
+ - period2 controla el número de períodos para ítems con ReorderFreq <= 20
1072
+ - periods controla el número de períodos para ítems con ReorderFreq > 20
1073
+ - Esto permite reducir el número de resultados para ítems con frecuencias de reorden pequeñas
1074
+ """
485
1075
 
486
- def _calculate_security_stock(self, df):
1076
+ def __init__(self, df_inv, df_lead_time, df_prep, df_fcst, periods, start_date,
1077
+ location=False, security_stock_ref=False, df_transit=None, integer=True,
1078
+ complete_suggested=False, start_date_zero=None, batch_size=None, n_workers=None,
1079
+ verbose=True, period2=2):
487
1080
  """
488
- Calculate security stock using configured method.
489
-
490
- Two methods are available:
491
- 1. Statistical method (default):
492
- SecurityStock = (MaxDailyUsage × MaxLeadTime) - (AvgDailyUsage × AvgLeadTime)
493
- This represents the difference between worst-case and average scenarios.
494
-
495
- 2. Reference days method (if security_stock_ref=True):
496
- SecurityStock = SecurityStockDaysRef × AvgDailyUsage
497
- Uses a predefined number of days of coverage.
1081
+ Initialize FutureReorder with enhanced period control.
498
1082
 
499
1083
  Args:
500
- df (pd.DataFrame): DataFrame containing required calculation fields
501
-
502
- Returns:
503
- pd.Series: Calculated security stock values
1084
+ df_inv: Inventory DataFrame
1085
+ df_lead_time: Lead time DataFrame
1086
+ df_prep: Preparation DataFrame
1087
+ df_fcst: Forecast DataFrame
1088
+ periods: Number of periods for items with ReorderFreq > 20
1089
+ start_date: Start date for calculations
1090
+ location: Boolean for location-based processing
1091
+ security_stock_ref: Boolean for reference-based security stock
1092
+ df_transit: Transit DataFrame (optional)
1093
+ integer: Boolean for integer formatting
1094
+ complete_suggested: Boolean for complete suggested forecast mode
1095
+ start_date_zero: Custom start date for period 0
1096
+ batch_size: Batch size for parallel processing (auto-configured if None)
1097
+ n_workers: Number of workers for parallel processing (auto-configured if None)
1098
+ verbose: Boolean for detailed logging
1099
+ period2: Number of periods for items with ReorderFreq <= 20 (default: 2)
504
1100
  """
505
- if self.security_stock_ref:
506
- security_stock = df['SecurityStockDaysRef'] * df['AvgDailyUsage']
1101
+
1102
+ # Original parameters - TODOS los parámetros de la clase original
1103
+ self.df_inv = df_inv
1104
+ self.df_lead_time = df_lead_time
1105
+ self.df_prep = df_prep
1106
+ self.df_fcst = df_fcst
1107
+ self.default_coverage = 30
1108
+ self.periods = periods
1109
+ self.period2 = period2
1110
+ self.start_date = pd.to_datetime(start_date, format='%Y-%m-%d') if start_date is not None else None
1111
+ self.location = location
1112
+ self.security_stock_ref = security_stock_ref
1113
+ self.df_transit = df_transit
1114
+ self.integer = integer
1115
+ self.complete_suggested = complete_suggested
1116
+ self.start_date_zero = start_date_zero
1117
+
1118
+ # Optimization parameters with intelligent defaults
1119
+ total_items = len(df_inv)
1120
+
1121
+ # Auto-configure batch_size based on dataset size
1122
+ if batch_size is None:
1123
+ if total_items <= 500:
1124
+ self.batch_size = 50 # Small batches for small datasets
1125
+ elif total_items <= 2000:
1126
+ self.batch_size = 100 # Medium batches
1127
+ else:
1128
+ self.batch_size = 200 # Larger batches for big datasets
1129
+ else:
1130
+ self.batch_size = batch_size
1131
+
1132
+ # Auto-configure n_workers based on system and dataset
1133
+ if n_workers is None:
1134
+ available_cores = cpu_count()
1135
+ if total_items <= 200:
1136
+ self.n_workers = min(2, available_cores - 1) # Conservative for small datasets
1137
+ elif total_items <= 1000:
1138
+ self.n_workers = min(4, available_cores - 1) # Moderate parallelization
1139
+ else:
1140
+ self.n_workers = min(max(4, available_cores - 2), 8) # Aggressive for large datasets
507
1141
  else:
508
- security_stock = (df['MaxDailyUsage'] * df['MaxLeadTime']) - (df['AvgDailyUsage'] * df['AvgLeadTime'])
1142
+ self.n_workers = n_workers
1143
+
1144
+ self.verbose = verbose
509
1145
 
510
- # Apply formatting
511
- return security_stock.apply(lambda x: self._format_value(x, 'SecurityStock'))
512
-
513
-
514
- def _calculate_inventory_days(self, df):
515
- """
516
- Calculate inventory days using configured method.
517
-
518
- FutureStockoutDays = (FutureInventoryTransitArrival - SecurityStock) / AvgDailyUsage
1146
+ # Initialize metadata columns
1147
+ self.metadata = ['Item']
1148
+ if self.location:
1149
+ self.metadata.append('Location')
1150
+
1151
+ # Pre-filter dataframes based on df_inv to improve performance
1152
+ self._prefilter_dataframes()
519
1153
 
520
- Args:
521
- df (pd.DataFrame): DataFrame containing required calculation fields
522
-
523
- Returns:
524
- pd.Series: Calculated future stockout days
525
- """
526
- # Calculate future stockout days with safe division
527
- # Avoid division by zero by checking AvgDailyUsage
528
- future_stockout_days = np.where(
529
- df['AvgDailyUsage'] > 0,
530
- (df['FutureInventoryTransitArrival'] - df['SecurityStock']) / df['AvgDailyUsage'],
531
- 0 # If no daily usage, return 0 days
532
- )
1154
+ self._log(f"🚀 FutureReorder Massive Complete - Inicializado para {len(self.df_inv)} ítems")
1155
+ self._log(f"⚙️ Configuración: batch_size={batch_size}, workers={self.n_workers}")
533
1156
 
534
- # Apply formatting
535
- return pd.Series(future_stockout_days).apply(lambda x: self._format_value(x, 'FutureStockoutDays'))
536
-
537
-
538
- def _sum_transit_arrivals(self, transit_arrivals_str):
1157
+ def _prefilter_dataframes(self):
539
1158
  """
540
- Calculate the total quantity from TransitArrival string.
541
-
542
- Args:
543
- transit_arrivals_str (str): String representation of transit arrivals list
544
- e.g., '[{"quantity": 100.0, "arrival_date": "2024-01-15"}]'
545
-
546
- Returns:
547
- float: Total quantity of all arrivals in the period
1159
+ Pre-filter all input dataframes based on df_inv to improve performance.
1160
+ Only process data that exists in df_inv (inventory data).
548
1161
  """
1162
+ if self.verbose:
1163
+ original_sizes = {
1164
+ 'df_lead_time': len(self.df_lead_time),
1165
+ 'df_prep': len(self.df_prep),
1166
+ 'df_fcst': len(self.df_fcst),
1167
+ 'df_transit': len(self.df_transit) if self.df_transit is not None else 0
1168
+ }
1169
+ self._log("📊 Pre-filtering dataframes based on df_inv...")
549
1170
 
550
- if transit_arrivals_str == '[]' or not transit_arrivals_str:
551
- return 0.0
552
-
553
- try:
554
- arrivals = ast.literal_eval(transit_arrivals_str)
555
- return sum(arrival.get('quantity', 0) for arrival in arrivals)
556
- except:
557
- return 0.0
558
-
559
-
560
- def _prepare_transit_schedule(self, key, transit_amount, dates):
561
- """
562
- Prepare transit schedule based on df_transit or default logic.
1171
+ # Create base filter from df_inv
1172
+ if self.location:
1173
+ base_filter = self.df_inv[['Item', 'Location']].drop_duplicates()
1174
+ else:
1175
+ base_filter = self.df_inv[['Item']].drop_duplicates()
563
1176
 
564
- Args:
565
- key (tuple or str): Item identifier (item) or (item, location)
566
- transit_amount (float): Total transit amount from df_inv
567
- dates (list): List of dates for this item
568
-
569
- Returns:
570
- list: List of transit orders with 'quantity' and 'arrival_date'
571
- """
572
- if transit_amount <= 0:
573
- return []
574
-
575
- transit_schedule = []
1177
+ # Filter df_lead_time
1178
+ if self.location:
1179
+ self.df_lead_time = self.df_lead_time.merge(
1180
+ base_filter,
1181
+ on=['Item', 'Location'],
1182
+ how='inner'
1183
+ )
1184
+ else:
1185
+ self.df_lead_time = self.df_lead_time.merge(
1186
+ base_filter,
1187
+ on=['Item'],
1188
+ how='inner'
1189
+ )
576
1190
 
577
- if self.df_transit is None:
578
- # Default logic: complete transit arrives in period 1
579
- if len(dates) > 1:
580
- arrival_date = pd.to_datetime(dates[1], format='%Y%m%d')
581
- transit_schedule.append({
582
- 'quantity': transit_amount,
583
- 'arrival_date': arrival_date
584
- })
1191
+ # Filter df_prep - handle different column naming conventions
1192
+ if self.location:
1193
+ # Check if df_prep uses 'item_id' and 'location' columns
1194
+ if 'item_id' in self.df_prep.columns and 'location' in self.df_prep.columns:
1195
+ # Create renamed base filter for df_prep
1196
+ base_filter_prep = base_filter.copy()
1197
+ base_filter_prep = base_filter_prep.rename(columns={'Item': 'item_id', 'Location': 'location'})
1198
+ self.df_prep = self.df_prep.merge(
1199
+ base_filter_prep,
1200
+ on=['item_id', 'location'],
1201
+ how='inner'
1202
+ )
1203
+ else:
1204
+ # Use standard column names
1205
+ self.df_prep = self.df_prep.merge(
1206
+ base_filter,
1207
+ on=['Item', 'Location'],
1208
+ how='inner'
1209
+ )
1210
+ else:
1211
+ # Check if df_prep uses 'item_id' column
1212
+ if 'item_id' in self.df_prep.columns:
1213
+ base_filter_prep = base_filter.copy()
1214
+ base_filter_prep = base_filter_prep.rename(columns={'Item': 'item_id'})
1215
+ self.df_prep = self.df_prep.merge(
1216
+ base_filter_prep,
1217
+ on=['item_id'],
1218
+ how='inner'
1219
+ )
1220
+ else:
1221
+ self.df_prep = self.df_prep.merge(
1222
+ base_filter,
1223
+ on=['Item'],
1224
+ how='inner'
1225
+ )
1226
+
1227
+ # Filter df_fcst
1228
+ if self.location:
1229
+ self.df_fcst = self.df_fcst.merge(
1230
+ base_filter,
1231
+ on=['Item', 'Location'],
1232
+ how='inner'
1233
+ )
585
1234
  else:
586
- # Use provided transit schedule
1235
+ self.df_fcst = self.df_fcst.merge(
1236
+ base_filter,
1237
+ on=['Item'],
1238
+ how='inner'
1239
+ )
1240
+
1241
+ # Filter df_transit if it exists
1242
+ if self.df_transit is not None:
587
1243
  if self.location:
588
- item, location = key
589
- mask = (self.df_transit['Item'] == item) & (self.df_transit['Location'] == location)
1244
+ self.df_transit = self.df_transit.merge(
1245
+ base_filter,
1246
+ on=['Item', 'Location'],
1247
+ how='inner'
1248
+ )
590
1249
  else:
591
- mask = self.df_transit['Item'] == key
592
-
593
- transit_data = self.df_transit[mask].copy()
1250
+ self.df_transit = self.df_transit.merge(
1251
+ base_filter,
1252
+ on=['Item'],
1253
+ how='inner'
1254
+ )
1255
+
1256
+ if self.verbose:
1257
+ new_sizes = {
1258
+ 'df_lead_time': len(self.df_lead_time),
1259
+ 'df_prep': len(self.df_prep),
1260
+ 'df_fcst': len(self.df_fcst),
1261
+ 'df_transit': len(self.df_transit) if self.df_transit is not None else 0
1262
+ }
594
1263
 
595
- if not transit_data.empty:
596
- # Validate total matches
597
- total_scheduled = transit_data['Transit'].sum()
598
- if abs(total_scheduled - transit_amount) > 0.01: # Allow small floating point differences
599
- raise ValueError(f"Transit schedule total ({total_scheduled}) does not match inventory transit ({transit_amount}) for {key}")
600
-
601
- # Create transit orders
602
- for _, row in transit_data.iterrows():
603
- arrival_date = pd.to_datetime(row['ArrivalDate'], format='%Y-%m-%d')
604
- transit_schedule.append({
605
- 'quantity': float(row['Transit']),
606
- 'arrival_date': arrival_date
607
- })
608
- else:
609
- # If no transit data provided for this item, use default logic
610
- if len(dates) > 1:
611
- arrival_date = pd.to_datetime(dates[1], format='%Y%m%d')
612
- transit_schedule.append({
613
- 'quantity': transit_amount,
614
- 'arrival_date': arrival_date
615
- })
616
-
617
- return transit_schedule
1264
+ self._log("📊 Filtrado completado:")
1265
+ for df_name, original_size in original_sizes.items():
1266
+ new_size = new_sizes[df_name]
1267
+ if original_size > 0:
1268
+ reduction_pct = ((original_size - new_size) / original_size) * 100
1269
+ self._log(f" • {df_name}: {original_size:,} → {new_size:,} (-{reduction_pct:.1f}%)")
1270
+ else:
1271
+ self._log(f" • {df_name}: {original_size:,} {new_size:,}")
618
1272
 
1273
+ def _log(self, message):
1274
+ if self.verbose:
1275
+ print(message)
1276
+ sys.stdout.flush()
619
1277
 
620
- def _process_current_period(self, current_df_inv, df_sstock, key, date, transit_orders, dates):
1278
+ def future_date(self):
621
1279
  """
622
- Process inventory for the current period (i=0).
623
-
624
- This optimized version uses vectorized operations where possible and
625
- minimizes redundant calculations.
626
-
627
- Args:
628
- current_df_inv (pd.DataFrame): Current inventory data
629
- df_sstock (pd.DataFrame): Security stock calculation data
630
- key (tuple or str): Item identifier (item) or (item, location)
631
- date (str): Current date in 'YYYYMMDD' format
632
- transit_orders (dict): Dictionary tracking in-transit orders
633
- dates (list): List of all dates for this item
634
-
635
- Returns:
636
- pd.DataFrame: Processed inventory data for the current period
1280
+ Generate future reorder dates for each item based on reorder frequency.
1281
+ Versión optimizada de la función original.
637
1282
  """
638
- inventory_columns = ['Item', 'Location', 'Inventory', 'Transit', 'PurchaseFactor'] if self.location else ['Item', 'Inventory', 'Transit', 'PurchaseFactor']
639
- df_inventory = current_df_inv[inventory_columns].copy()
640
-
641
- # Vectorized initialization of inventory values with formatting
642
- df_inventory['FutureInventory'] = df_inventory['Inventory'].apply(
643
- lambda x: self._format_value(x, 'FutureInventory')
644
- )
645
- df_inventory['FutureTransit'] = df_inventory['Transit'].apply(
646
- lambda x: self._format_value(x, 'FutureTransit')
647
- )
648
- df_inventory['FutureInventoryTransit'] = df_inventory.apply(
649
- lambda row: self._format_value(row['Inventory'] + row['Transit'], 'FutureInventoryTransit'),
650
- axis=1
651
- )
652
-
653
- # Initialize transit orders for this item
654
- if key not in transit_orders:
655
- transit_orders[key] = []
656
-
657
- # Handle initial transit
658
- transit_qty = float(df_inventory['Transit'].iloc[0])
659
-
660
- # Prepare transit schedule
661
- transit_schedule = self._prepare_transit_schedule(key, transit_qty, dates)
1283
+ # Determine the starting date for period 0 - EXACTLY like future_reorder_optimized line 148-155
1284
+ if self.start_date_zero is not None:
1285
+ # Use custom start date for period 0
1286
+ actual_date = pd.to_datetime(self.start_date_zero, format='%Y-%m-%d')
1287
+ else:
1288
+ # Use current system date for period 0 (original behavior)
1289
+ DOCKER_CONFIG_PATH = os.path.join('/opt/ml/processing/input', 'config.yml')
1290
+ utils = Utils(config_file=DOCKER_CONFIG_PATH, logfile='data_io', log_path='output/logs')
1291
+ timestamp = utils.set_timestamp()
1292
+ actual_date = pd.to_datetime(str(int(float(timestamp[0:8]))), format='%Y%m%d')
662
1293
 
663
- # Add scheduled transits to transit_orders
664
- transit_orders[key].extend(transit_schedule)
1294
+ # Use periods + 1 internally to calculate one extra period for transit calculations
1295
+ # The extra period will be filtered out in the final results
1296
+ end_date = actual_date + pd.DateOffset(months=self.periods + 1)
665
1297
 
666
- # For period 0, TransitArrival should always be empty list
667
- df_inventory['TransitArrival'] = '[]'
668
-
669
- # Select relevant columns
670
- df_inventory = df_inventory[self.metadata + ['FutureInventoryTransit', 'FutureInventory', 'FutureTransit', 'TransitArrival']]
671
-
672
- # Merge with stock data
673
- df = pd.merge(df_inventory, df_sstock, on=self.metadata, how='inner')
674
-
675
- # Vectorized calculations for all rows at once
676
- df['SuggestedForecastPeriod'] = df_sstock['SuggestedForecastPeriod']
677
- df['SecurityStock'] = self._calculate_security_stock(df)
678
-
679
- # Apply formatting to calculated fields
680
- df['SuggestedForecast'] = df['SuggestedForecast'].apply(
681
- lambda x: self._format_value(x, 'SuggestedForecast')
682
- )
683
- df['ReorderPoint'] = df.apply(
684
- lambda row: self._format_value(max(0, row['SuggestedForecast'] + row['SecurityStock']), 'ReorderPoint'),
685
- axis=1
686
- )
687
- df['ReorderQtyBase'] = df.apply(
688
- lambda row: self._format_value(max(0, row['ReorderPoint'] - row['FutureInventoryTransit']), 'ReorderQtyBase'),
689
- axis=1
690
- )
691
-
692
- # First period has no reorder - vectorized assignment
693
- df['ReorderQty'] = 0
694
- df['ReorderQtyDays'] = 0
695
- df['ArrivalDate'] = '' # No order in period 0
696
-
697
- # Note: FutureInventoryTransitArrival and FutureStockoutDays are calculated later
698
- # in _process_item_optimized after all periods are processed
699
-
700
- return df
701
-
1298
+ # Handle start_date = None case
1299
+ if self.start_date is None:
1300
+ # If start_date is None, use actual_date as the base for period 1
1301
+ base_start_date = actual_date
1302
+ else:
1303
+ base_start_date = self.start_date
702
1304
 
703
- def _process_transit_orders(self, transit_orders, key, current_date, previous_date):
704
- """
705
- Process transit orders and calculate arrivals for the current period.
1305
+ # Get unique items with their reorder frequencies
1306
+ columns = self.metadata + ['ReorderFreq']
1307
+ df_unique = self.df_lead_time[columns].drop_duplicates().copy()
1308
+
1309
+ # Process ReorderFreq values
1310
+ df_unique['ReorderFreq'] = df_unique['ReorderFreq'].fillna(self.default_coverage)
1311
+ df_unique.loc[df_unique['ReorderFreq'] == 0, 'ReorderFreq'] = self.default_coverage
1312
+ df_unique['ReorderFreq'] = df_unique['ReorderFreq'].astype(int)
706
1313
 
707
- This optimized method uses vectorization for better performance with large
708
- numbers of transit orders. It manages the lifecycle of transit orders:
709
- 1. Identifies orders arriving in the current period
710
- 2. Moves arrived quantities from transit to stock
711
- 3. Updates remaining transit orders
712
- 4. Maintains arrival history for reporting
1314
+ # Pre-allocate result dictionary
1315
+ item_dates = {}
713
1316
 
714
- Args:
715
- transit_orders (dict): Dictionary of active transit orders by item/location
716
- key (tuple or str): Item identifier (item) or (item, location)
717
- current_date (pd.Timestamp): Current period date
718
- previous_date (pd.Timestamp): Previous period date
1317
+ # Group by ReorderFreq for batch processing - more efficient for large datasets
1318
+ for freq, group in df_unique.groupby('ReorderFreq'):
1319
+ # Generate date range for this frequency
1320
+ date_range = []
719
1321
 
720
- Returns:
721
- tuple: (stock_from_arrivals, new_transit, transit_arrivals)
722
- - stock_from_arrivals: Total quantity arriving in this period
723
- - new_transit: Total quantity still in transit
724
- - transit_arrivals: List of arrival records for this period
725
- """
726
- # Get orders for this key, return early if none
727
- orders = transit_orders.get(key, [])
728
- if not orders:
729
- return 0, 0, []
730
-
731
- # For small numbers of orders, use loops implementation
732
- # as it has less overhead
733
- if len(orders) < 10:
734
- new_transit = 0
735
- remaining_orders = []
736
- transit_arrivals = []
737
- stock_from_arrivals = 0
1322
+ # Always include actual date (period 0)
1323
+ date_range.append(actual_date)
738
1324
 
739
- for order in orders:
740
- if order['arrival_date'] > previous_date and order['arrival_date'] <= current_date:
741
- # Order arrives in this period
742
- stock_from_arrivals += order['quantity']
743
- transit_arrivals.append({
744
- 'quantity': float(order['quantity']),
745
- 'arrival_date': order['arrival_date'].strftime('%Y-%m-%d')
746
- })
747
- else:
748
- # Order still in transit
749
- new_transit += order['quantity']
750
- remaining_orders.append(order)
1325
+ # Include base_start_date if after actual_date
1326
+ if base_start_date > actual_date:
1327
+ date_range.append(base_start_date)
751
1328
 
752
- transit_orders[key] = remaining_orders
753
- return stock_from_arrivals, new_transit, transit_arrivals
754
-
755
- # For larger numbers of orders, use vectorized approach
756
- # Extract data into numpy arrays for faster processing
757
- quantities = np.array([order['quantity'] for order in orders], dtype=np.float64)
758
- arrival_dates = np.array([order['arrival_date'] for order in orders])
759
-
760
- # Vectorized date comparison
761
- mask_arrived = (arrival_dates > previous_date) & (arrival_dates <= current_date)
762
-
763
- # Calculate totals using numpy operations
764
- stock_from_arrivals = float(quantities[mask_arrived].sum()) if mask_arrived.any() else 0
765
- new_transit = float(quantities[~mask_arrived].sum()) if (~mask_arrived).any() else 0
766
-
767
- # Create transit arrivals list
768
- transit_arrivals = []
769
- if mask_arrived.any():
770
- arrived_indices = np.where(mask_arrived)[0]
771
- transit_arrivals = [
772
- {
773
- 'quantity': float(quantities[i]),
774
- 'arrival_date': arrival_dates[i].strftime('%Y-%m-%d')
775
- }
776
- for i in arrived_indices
777
- ]
778
-
779
- # Update transit orders with remaining orders
780
- if (~mask_arrived).any():
781
- remaining_indices = np.where(~mask_arrived)[0]
782
- transit_orders[key] = [orders[i] for i in remaining_indices]
783
- else:
784
- transit_orders[key] = []
1329
+ # Generate subsequent dates using pandas date_range for efficiency
1330
+ num_periods = int((end_date - base_start_date).days / freq) + 1
1331
+ future_dates = pd.date_range(
1332
+ start=base_start_date + timedelta(days=freq),
1333
+ periods=num_periods,
1334
+ freq=f'{freq}D'
1335
+ )
1336
+ date_range.extend(future_dates[future_dates <= end_date])
1337
+
1338
+ # Convert to string format
1339
+ date_strings = [d.strftime('%Y%m%d') for d in date_range]
1340
+
1341
+ # Assign to all items in this group
1342
+ for _, row in group.iterrows():
1343
+ if self.location:
1344
+ key = (row['Item'], row['Location'])
1345
+ else:
1346
+ key = row['Item']
1347
+ item_dates[key] = date_strings
785
1348
 
786
- return stock_from_arrivals, new_transit, transit_arrivals
787
-
1349
+ return item_dates
788
1350
 
789
- def _process_future_period(self, current_df_inv, df_sstock, df_previous, key, date, dates, i, transit_orders):
1351
+ def _prepare_batch_data(self):
790
1352
  """
791
- Process inventory for future periods (i>0).
792
-
793
- This method:
794
- 1. Calculates consumption using SuggestedForecastPeriod from previous period
795
- 2. Updates stock levels considering consumption and arrivals
796
- 3. Determines if reorder is needed
797
- 4. Calculates reorder quantity if needed
798
- 5. Adds new orders to transit tracking
799
-
800
- Args:
801
- current_df_inv (pd.DataFrame): Current inventory data
802
- df_sstock (pd.DataFrame): Security stock calculation data
803
- df_previous (pd.DataFrame): Previous period's results
804
- key (tuple or str): Item identifier (item) or (item, location)
805
- date (str): Current date in 'YYYYMMDD' format
806
- dates (list): List of all dates for this item
807
- i (int): Current period index
808
- transit_orders (dict): Dictionary tracking in-transit orders
809
-
810
- Returns:
811
- pd.DataFrame: Processed inventory data for the period including:
812
- - Updated inventory levels
813
- - Reorder recommendations
814
- - Transit arrival information
1353
+ Prepara datos por lotes de manera eficiente sin generar fechas pre-calculadas.
1354
+ Las fechas se generarán localmente en cada worker process.
815
1355
  """
816
- inventory_columns = ['Item', 'Location', 'PurchaseFactor'] if self.location else ['Item', 'PurchaseFactor']
817
- df_inventory = current_df_inv[inventory_columns].copy()
818
- df = pd.merge(df_inventory, df_sstock, on=inventory_columns, how='inner')
819
- df['SuggestedForecastPeriod'] = df_sstock['SuggestedForecastPeriod']
820
-
821
- # Calculate consumption using SuggestedForecastPeriod from previous period
822
- consumption = df_previous['SuggestedForecastPeriod'].values[0]
823
-
824
- previous_stock = df_previous['FutureInventory'].values[0] - consumption
1356
+ batch_data = []
825
1357
 
826
- # Process transit orders
827
- current_date = pd.to_datetime(date, format='%Y%m%d')
828
- previous_date = pd.to_datetime(dates[i-1], format='%Y%m%d')
829
-
830
- stock_from_arrivals, new_transit, transit_arrivals = self._process_transit_orders(
831
- transit_orders, key, current_date, previous_date
832
- )
833
-
834
- # Update inventory values with formatting
835
- future_stock = max(0, previous_stock + stock_from_arrivals)
836
- df['FutureInventory'] = self._format_value(future_stock, 'FutureInventory')
837
- df['FutureTransit'] = self._format_value(new_transit, 'FutureTransit')
838
- df['FutureInventoryTransit'] = self._format_value(
839
- future_stock + new_transit,
840
- 'FutureInventoryTransit'
841
- )
842
- df['TransitArrival'] = str(transit_arrivals) if transit_arrivals else '[]'
843
-
844
- # Calculate security stock and reorder values
845
- df['SecurityStock'] = self._calculate_security_stock(df)
846
-
847
- # Apply formatting to calculated fields
848
- df['SuggestedForecast'] = df['SuggestedForecast'].apply(
849
- lambda x: self._format_value(x, 'SuggestedForecast')
850
- )
851
- df['ReorderPoint'] = df.apply(
852
- lambda row: self._format_value(max(0, row['SuggestedForecast'] + row['SecurityStock']), 'ReorderPoint'),
853
- axis=1
854
- )
855
- df['ReorderQtyBase'] = df.apply(
856
- lambda row: self._format_value(max(0, row['ReorderPoint'] - row['FutureInventoryTransit']), 'ReorderQtyBase'),
857
- axis=1
858
- )
859
-
860
- # Calculate ReorderQty only if ReorderQtyBase > 0
861
- reorder_qty = np.where(
862
- df['ReorderQtyBase'] > 0,
863
- ((df['ReorderQtyBase'] / df['PurchaseFactor']).apply(np.ceil)) * df['PurchaseFactor'],
864
- 0
865
- )
866
- df['ReorderQty'] = df.apply(
867
- lambda row: self._format_value(reorder_qty[row.name], 'ReorderQty'),
868
- axis=1
869
- )
870
-
871
- # Calculate ReorderQtyDays, avoiding division by zero
872
- reorder_qty_days = np.where(
873
- (df['ReorderQty'] > 0) & (df['AvgDailyUsage'] > 0),
874
- df['ReorderQty'] / df['AvgDailyUsage'],
875
- 0
876
- )
877
- df['ReorderQtyDays'] = df.apply(
878
- lambda row: self._format_value(reorder_qty_days[row.name], 'ReorderQtyDays'),
879
- axis=1
880
- )
881
-
882
- # Add new order to transit if needed
883
- if df['ReorderQty'].values[0] > 0:
884
- avg_lead_time = df['AvgLeadTime'].values[0]
885
- arrival_date = current_date + timedelta(days=int(avg_lead_time))
886
- # Store the raw value for transit calculations
887
- transit_orders[key].append({
888
- 'quantity': float(df['ReorderQty'].values[0]),
889
- 'arrival_date': arrival_date
890
- })
891
- # Store arrival date for this period's order
892
- df['ArrivalDate'] = arrival_date.strftime('%Y-%m-%d')
1358
+ # Get unique items from df_inv
1359
+ if self.location:
1360
+ unique_items = self.df_inv[['Item', 'Location']].drop_duplicates()
893
1361
  else:
894
- # No order in this period
895
- df['ArrivalDate'] = ''
1362
+ unique_items = self.df_inv[['Item']].drop_duplicates()
896
1363
 
1364
+ for _, row in unique_items.iterrows():
1365
+ try:
1366
+ if self.location:
1367
+ key = (row['Item'], row['Location'])
1368
+ item, location = key
1369
+ else:
1370
+ key = row['Item']
1371
+ item = key
1372
+ location = None
1373
+
1374
+ # Create filter mask based on item
1375
+ mask_lead_time = self.df_lead_time['Item'] == item
1376
+ mask_inv = self.df_inv['Item'] == item
1377
+
1378
+ # Add location filter if needed
1379
+ if self.location and location is not None:
1380
+ mask_lead_time &= self.df_lead_time['Location'] == location
1381
+ mask_inv &= self.df_inv['Location'] == location
1382
+
1383
+ # Apply filters using boolean indexing
1384
+ current_df_lead_time = self.df_lead_time[mask_lead_time]
1385
+ current_df_inv = self.df_inv[mask_inv]
1386
+
1387
+ if not current_df_lead_time.empty and not current_df_inv.empty:
1388
+ # Only include key and dataframes, dates will be generated in worker
1389
+ batch_data.append((key, current_df_lead_time, current_df_inv))
1390
+
1391
+ except Exception as e:
1392
+ if self.verbose:
1393
+ print(f"Error preparando {key}: {e}")
1394
+ continue
897
1395
 
898
- # Note: FutureInventoryTransitArrival and FutureStockoutDays are calculated later
899
- # in _process_item_optimized after all periods are processed
900
-
901
- return df
902
-
1396
+ return batch_data
903
1397
 
904
1398
  def _prepare_final_dataframe(self, data_frame):
905
1399
  """
906
1400
  Prepare the final output dataframe with proper formatting and column selection.
907
-
908
- This method:
909
- 1. Merges with lead time data to add reorder parameters
910
- 2. Formats dates to YYYY-MM-DD format
911
- 3. Renames columns for clarity
912
- 4. Rounds numeric values to 2 decimal places
913
- 5. Selects and orders final columns
914
-
915
- Args:
916
- data_frame (pd.DataFrame): Raw calculation results
917
-
918
- Returns:
919
- pd.DataFrame: Formatted output with columns:
920
- - PurchaseDate, Item, ItemDescription, (Location)
921
- - Forecast metrics: SuggestedForecast, SuggestedForecastPeriod
922
- - Inventory levels: FutureInventoryTransit (total), FutureInventory (stock), FutureTransit (transit)
923
- - FutureInventoryTransitArrival: FutureInventory + arrivals in the period
924
- - FutureStockoutDays: Days of inventory coverage
925
- - Transit information: TransitArrival
926
- - Reorder metrics: ReorderQtyBase, ReorderQty, ReorderQtyDays
927
- - Order information: ArrivalDate (arrival date of current period's order)
928
- - Planning parameters: PurchaseFactor, ReorderPoint, SecurityStock
929
- - Usage rates: AvgDailyUsage, MaxDailyUsage
930
- - Lead times: AvgLeadTime, MaxLeadTime
931
- - Coverage parameters: ReorderFreq, Coverage
1401
+ Versión completa de la función original.
932
1402
  """
933
1403
  leadtimes_columns = ['Item', 'Location', 'ReorderFreq', 'Coverage'] if self.location else ['Item', 'ReorderFreq', 'Coverage']
934
1404
  leadtimes = self.df_lead_time[leadtimes_columns]
@@ -946,13 +1416,13 @@ class FutureReorder():
946
1416
  always_integer_fields = ['PurchaseFactor', 'AvgLeadTime', 'MaxLeadTime', 'ReorderQtyDays', 'ReorderFreq', 'Coverage']
947
1417
  for field in always_integer_fields:
948
1418
  if field in df_final.columns:
949
- df_final[field] = df_final[field].apply(lambda x: self._format_value(x, field))
1419
+ df_final[field] = df_final[field].apply(lambda x: _format_value_complete(x, field, True))
950
1420
 
951
1421
  # Apply formatting to fields that are ALWAYS decimals
952
1422
  always_decimal_fields = ['AvgDailyUsage', 'MaxDailyUsage']
953
1423
  for field in always_decimal_fields:
954
1424
  if field in df_final.columns:
955
- df_final[field] = df_final[field].apply(lambda x: self._format_value(x, field))
1425
+ df_final[field] = df_final[field].apply(lambda x: _format_value_complete(x, field, False))
956
1426
 
957
1427
  # Select final columns
958
1428
  if self.location:
@@ -976,186 +1446,220 @@ class FutureReorder():
976
1446
 
977
1447
  return df_final[final_cols]
978
1448
 
1449
+ def _filter_periods(self, df):
1450
+ """
1451
+ Filter out period 0 and last period from results.
1452
+ Period 0 is used only as calculation base.
1453
+ Last period is filtered because it doesn't have next period transit data.
1454
+
1455
+ Special case: When start_date=None, don't filter the first period
1456
+ because it represents the actual current period.
1457
+ """
1458
+ if df.empty:
1459
+ return df
1460
+
1461
+ # Convert PurchaseDate to datetime for filtering
1462
+ df['PurchaseDate_dt'] = pd.to_datetime(df['PurchaseDate'])
1463
+
1464
+ # Get unique dates and sort them
1465
+ unique_dates = sorted(df['PurchaseDate_dt'].unique())
1466
+
1467
+ # Determine filtering logic based on start_date parameter
1468
+ if self.start_date is None:
1469
+ # When start_date=None, only filter the last period
1470
+ # Keep period 0 as it represents the current period
1471
+ if len(unique_dates) <= 1:
1472
+ self._log("⚠️ Warning: Only 1 period available, cannot filter last period")
1473
+ return pd.DataFrame(columns=df.columns.drop('PurchaseDate_dt'))
1474
+
1475
+ last_date = unique_dates[-1]
1476
+ filtered_df = df[df['PurchaseDate_dt'] != last_date].copy()
1477
+
1478
+ self._log(f"📊 Filtered periods (start_date=None): Only removed last period ({last_date.strftime('%Y-%m-%d')})")
1479
+
1480
+ else:
1481
+ # When start_date is specified, filter both first and last periods (original logic)
1482
+ if len(unique_dates) <= 2:
1483
+ self._log("⚠️ Warning: Only 2 or fewer periods available after filtering")
1484
+ return pd.DataFrame(columns=df.columns.drop('PurchaseDate_dt'))
1485
+
1486
+ first_date = unique_dates[0]
1487
+ last_date = unique_dates[-1]
1488
+
1489
+ filtered_df = df[
1490
+ (df['PurchaseDate_dt'] != first_date) &
1491
+ (df['PurchaseDate_dt'] != last_date)
1492
+ ].copy()
1493
+
1494
+ self._log(f"📊 Filtered periods: Removed period 0 ({first_date.strftime('%Y-%m-%d')}) and last period ({last_date.strftime('%Y-%m-%d')})")
1495
+
1496
+ # Drop the temporary datetime column
1497
+ filtered_df = filtered_df.drop('PurchaseDate_dt', axis=1)
1498
+
1499
+ return filtered_df
979
1500
 
980
1501
  def reorder(self):
981
1502
  """
982
- Main method to calculate future reorder recommendations.
983
-
984
- This optimized version uses batch processing and vectorization to improve
985
- performance, especially for large datasets. The method:
986
- 1. Generates future dates based on reorder frequencies
987
- 2. Groups items for batch processing when possible
988
- 3. Pre-allocates data structures to minimize memory operations
989
- 4. Uses vectorized calculations where applicable
990
- 5. Formats and returns consolidated results
1503
+ Main method to calculate future reorder recommendations at massive scale.
1504
+
1505
+ This high-performance method orchestrates the complete inventory reorder calculation
1506
+ process using parallel processing and intelligent resource management. It handles
1507
+ large datasets efficiently through batching, multiprocessing, and optimized algorithms.
1508
+
1509
+ Processing Pipeline:
1510
+ 1. Generate future dates based on reorder frequencies and period control
1511
+ 2. Pre-filter and prepare data for batch processing
1512
+ 3. Split items into optimally-sized batches
1513
+ 4. Process batches in parallel using multiple CPU cores
1514
+ 5. Combine and format results with proper data types
1515
+ 6. Apply period filtering (removes period 0 and last period)
1516
+ 7. Return comprehensive reorder recommendations
1517
+
1518
+ Performance Features:
1519
+ - Auto-configures batch sizes based on dataset size
1520
+ - Uses ProcessPoolExecutor for true parallel processing
1521
+ - Provides real-time progress tracking and ETA calculations
1522
+ - Implements intelligent error handling and recovery
1523
+ - Optimizes memory usage through efficient data structures
1524
+
1525
+ Period Control Logic:
1526
+ - Items with ReorderFreq <= 20: Uses period2 (default: 2 periods)
1527
+ - Items with ReorderFreq > 20: Uses periods parameter
1528
+ - This reduces output volume for high-frequency reorder items
1529
+
1530
+ Period Filtering Logic:
1531
+ - When start_date=None: Only removes last period (keeps period 0 as current)
1532
+ - When start_date specified: Removes both period 0 and last period
1533
+ - Last period is always removed due to incomplete transit data
991
1534
 
992
1535
  Returns:
993
- pd.DataFrame: Complete reorder recommendations for all items/locations
994
- and time periods. See _prepare_final_dataframe() for
995
- detailed column descriptions.
996
-
1536
+ pd.DataFrame: Complete reorder recommendations with columns:
1537
+ - PurchaseDate: Date when reorder should be evaluated
1538
+ - Item, ItemDescription, (Location): Item identification
1539
+ - Forecast metrics: SuggestedForecast, SuggestedForecastPeriod
1540
+ - Inventory levels: FutureInventoryTransit, FutureInventory, FutureTransit
1541
+ - FutureInventoryTransitArrival: Stock + arrivals in the period
1542
+ - FutureStockoutDays: Days of inventory coverage
1543
+ - Transit information: TransitArrival details
1544
+ - Reorder metrics: ReorderQtyBase, ReorderQty, ReorderQtyDays
1545
+ - Order information: ArrivalDate of current period's order
1546
+ - Planning parameters: PurchaseFactor, ReorderPoint, SecurityStock
1547
+ - Usage rates: AvgDailyUsage, MaxDailyUsage
1548
+ - Lead times: AvgLeadTime, MaxLeadTime
1549
+ - Coverage parameters: ReorderFreq, Coverage
1550
+
997
1551
  Example usage:
998
1552
  >>> reorder_system = FutureReorder(
999
1553
  ... df_inv=inventory_df,
1000
1554
  ... df_lead_time=lead_time_df,
1001
1555
  ... df_prep=prep_df,
1002
1556
  ... df_fcst=forecast_df,
1003
- ... periods=6,
1004
- ... start_date='2024-01-01'
1557
+ ... periods=6, # For items with ReorderFreq > 20
1558
+ ... start_date=None, # Use current date
1559
+ ... period2=2, # For items with ReorderFreq <= 20
1560
+ ... batch_size=100, # Optional: auto-configured if None
1561
+ ... n_workers=4 # Optional: auto-configured if None
1005
1562
  ... )
1006
1563
  >>> results = reorder_system.reorder()
1007
- >>> results.head()
1008
- # Returns DataFrame with reorder recommendations
1564
+ >>> print(f"Generated {len(results)} reorder recommendations")
1009
1565
  """
1566
+ start_time = time.time()
1010
1567
 
1011
- item_dates = self.future_date()
1568
+ self._log("🚀 FutureReorder Massive Complete - Processing Started")
1012
1569
 
1013
- # Pre-allocate list for results instead of concatenating DataFrames
1014
- all_results = []
1570
+ # Prepare batch data without pre-generating dates
1571
+ self._log("🔧 Preparando datos por lotes...")
1572
+ batch_data = self._prepare_batch_data()
1015
1573
 
1016
- # Group items by number of periods for potential batch processing
1017
- items_by_period_count = {}
1018
- for key, dates in item_dates.items():
1019
- period_count = len(dates)
1020
- if period_count not in items_by_period_count:
1021
- items_by_period_count[period_count] = []
1022
- items_by_period_count[period_count].append((key, dates))
1574
+ # Calculate statistics based on items that will be processed
1575
+ total_items = len(batch_data)
1023
1576
 
1024
- # Process each group
1025
- for period_count, items_group in items_by_period_count.items():
1026
- # For each item in the group
1027
- for key, dates in items_group:
1028
- if self.location:
1029
- item, location = key
1030
- else:
1031
- item = key
1032
- location = None
1033
-
1034
- # Get current dataframes
1035
- current_df_lead_time, current_df_inv = self._get_current_dataframes(item, location)
1036
-
1037
- if current_df_lead_time.empty or current_df_inv.empty:
1038
- continue
1039
-
1040
- # Process this item using optimized approach
1041
- item_results = self._process_item_optimized(
1042
- key, item, location, dates, current_df_lead_time, current_df_inv
1043
- )
1044
-
1045
- if item_results is not None and not item_results.empty:
1046
- all_results.append(item_results)
1577
+ self._log(f"📊 Dataset Info:")
1578
+ self._log(f" • Total Items: {total_items}")
1579
+ self._log(f" • Periods (ReorderFreq > 20): {self.periods}")
1580
+ self._log(f" • Period2 (ReorderFreq <= 20): {self.period2}")
1581
+ self._log(f" • Estimated Total Calculations: {total_items * self.periods}")
1047
1582
 
1048
- # Combine all results efficiently
1049
- if all_results:
1050
- data_frame = pd.concat(all_results, ignore_index=True)
1051
- else:
1583
+ if not batch_data:
1584
+ self._log("⚠️ No items to process after filtering")
1052
1585
  columns = ['Date', 'Item'] + (['Location'] if self.location else [])
1053
- data_frame = pd.DataFrame(columns=columns)
1054
-
1055
- # Prepare and return final dataframe
1056
- return self._prepare_final_dataframe(data_frame)
1057
-
1058
-
1059
- def _process_item_optimized(self, key, item, location, dates, current_df_lead_time, current_df_inv):
1060
- """
1061
- Process a single item through all periods using optimized approach.
1586
+ return pd.DataFrame(columns=columns)
1587
+
1588
+ # Split into batches for parallel processing
1589
+ batches = []
1590
+ for i in range(0, len(batch_data), self.batch_size):
1591
+ batch = batch_data[i:i + self.batch_size]
1592
+ batch_args = (
1593
+ batch, self.df_fcst, self.df_prep, self.metadata,
1594
+ self.location, self.default_coverage, self.complete_suggested,
1595
+ self.security_stock_ref, self.integer, self.verbose, self.df_transit,
1596
+ self.periods, self.period2, self.start_date.strftime('%Y-%m-%d') if self.start_date else None,
1597
+ self.start_date_zero
1598
+ )
1599
+ batches.append(batch_args)
1062
1600
 
1063
- This method pre-allocates arrays and uses vectorized operations where possible
1064
- to improve performance.
1601
+ total_batches = len(batches)
1602
+ items_per_batch = len(batch_data) / total_batches if total_batches > 0 else 0
1065
1603
 
1066
- Args:
1067
- key: Item key (item or (item, location))
1068
- item: Item identifier
1069
- location: Location identifier (if applicable)
1070
- dates: List of dates to process
1071
- current_df_lead_time: Lead time data for this item
1072
- current_df_inv: Inventory data for this item
1073
-
1074
- Returns:
1075
- pd.DataFrame: Results for all periods of this item
1076
- """
1604
+ self._log(f"⚙️ Processing Config:")
1605
+ self._log(f" • Batch Size: {self.batch_size}")
1606
+ self._log(f" • Workers: {self.n_workers}")
1607
+ self._log(f" • Total Batches: {total_batches}")
1608
+ self._log(f" • Items per Batch: {items_per_batch:.1f}")
1077
1609
 
1078
- # Pre-allocate dictionaries for intermediate results
1079
- suggested_forecasts = {}
1080
- df_avgs = {}
1081
- df_maxs = {}
1082
- df_sstocks = {}
1083
- period_results = {}
1610
+ current_time = datetime.now().strftime('%H:%M:%S')
1611
+ self._log(f"⏱️ Starting processing at {current_time}")
1084
1612
 
1085
- # Initialize transit orders for this item
1086
- transit_orders = {key: []}
1613
+ # Process batches in parallel
1614
+ results = []
1615
+ completed_batches = 0
1087
1616
 
1088
- # Track last suggested forecast value for complete_suggested feature
1089
- last_suggested_value = None
1090
-
1091
- # Process each period
1092
- for i, date in enumerate(dates):
1093
- # Calculate suggested forecast (cached if possible)
1094
- suggested_forecasts[i] = self._calculate_suggested_forecast(
1095
- current_df_lead_time, current_df_inv, date, last_suggested_value
1096
- )
1097
-
1098
- # Update last_suggested_value for next iteration
1099
- if 'SuggestedForecast' in suggested_forecasts[i].columns:
1100
- last_suggested_value = suggested_forecasts[i]['SuggestedForecast'].iloc[0]
1101
-
1102
- # Calculate daily usage
1103
- df_avgs[i], df_maxs[i] = self._calculate_daily_usage(
1104
- suggested_forecasts[i], date
1105
- )
1106
-
1107
- # Calculate security stock data
1108
- df_sstocks[i] = self._calculate_security_stock_data(
1109
- df_maxs[i], current_df_lead_time, period_index=i, dates=dates
1110
- )
1111
-
1112
- # Process period based on whether it's current or future
1113
- if i == 0:
1114
- period_results[i] = self._process_current_period(
1115
- current_df_inv, df_sstocks[i], key, date, transit_orders, dates
1116
- )
1117
- else:
1118
- period_results[i] = self._process_future_period(
1119
- current_df_inv, df_sstocks[i], period_results[i-1],
1120
- key, date, dates, i, transit_orders
1121
- )
1617
+ with ProcessPoolExecutor(max_workers=self.n_workers) as executor:
1618
+ # Submit all batches
1619
+ future_to_batch = {executor.submit(process_item_batch_complete, batch_args): i
1620
+ for i, batch_args in enumerate(batches)}
1122
1621
 
1123
- # Add metadata columns efficiently
1124
- period_results[i]['Date'] = date
1125
- period_results[i]['Item'] = item
1126
- if self.location:
1127
- period_results[i]['Location'] = location
1622
+ # Collect results as they complete
1623
+ for future in as_completed(future_to_batch):
1624
+ batch_idx = future_to_batch[future]
1625
+ try:
1626
+ result = future.result()
1627
+ if not result.empty:
1628
+ results.append(result)
1629
+
1630
+ completed_batches += 1
1631
+ progress = (completed_batches / total_batches) * 100
1632
+
1633
+ elapsed_time = time.time() - start_time
1634
+ if completed_batches > 0:
1635
+ eta_seconds = (elapsed_time / completed_batches) * (total_batches - completed_batches)
1636
+ eta_str = f"{int(eta_seconds // 60)}m {int(eta_seconds % 60)}s"
1637
+ else:
1638
+ eta_str = "calculating..."
1639
+
1640
+ self._log(f"✅ Batch {completed_batches}/{total_batches} completed ({progress:.1f}%) - ETA: {eta_str}")
1641
+
1642
+ except Exception as e:
1643
+ self._log(f"❌ Error in batch {batch_idx}: {e}")
1644
+ continue
1128
1645
 
1129
- # After processing all periods, update FutureInventoryTransitArrival with next period's TransitArrival
1130
- for i in range(len(dates)):
1131
- if i < len(dates) - 1: # If there's a next period
1132
- # Get next period's TransitArrival
1133
- next_transit_arrival = period_results[i + 1]['TransitArrival'].iloc[0]
1134
- transit_arrival_sum = self._sum_transit_arrivals(next_transit_arrival)
1135
- else: # Last period - no next period
1136
- transit_arrival_sum = 0
1646
+ # Combine all results
1647
+ if results:
1648
+ self._log("🔗 Combinando resultados...")
1649
+ final_result = pd.concat(results, ignore_index=True)
1137
1650
 
1138
- # Update FutureInventoryTransitArrival
1139
- period_results[i]['FutureInventoryTransitArrival'] = self._format_value(
1140
- period_results[i]['FutureInventory'].iloc[0] + transit_arrival_sum,
1141
- 'FutureInventoryTransitArrival'
1142
- )
1651
+ # Prepare final dataframe with proper formatting
1652
+ final_result = self._prepare_final_dataframe(final_result)
1143
1653
 
1144
- # Recalculate FutureStockoutDays with the updated FutureInventoryTransitArrival
1145
- period_results[i]['FutureStockoutDays'] = self._calculate_inventory_days(period_results[i])
1146
-
1147
- # Combine all periods for this item
1148
- if period_results:
1149
- # Stack all period results at once
1150
- item_df = pd.concat(period_results.values(), ignore_index=True)
1654
+ # Filter out period 0 and last period from results
1655
+ final_result = self._filter_periods(final_result)
1151
1656
 
1152
- # Reorder columns for consistency
1153
- cols = ['Date', 'Item']
1154
- if self.location:
1155
- cols.append('Location')
1156
- other_cols = [col for col in item_df.columns if col not in cols]
1157
- item_df = item_df[cols + other_cols]
1657
+ total_time = time.time() - start_time
1658
+ self._log(f"🎉 Processing completed in {total_time:.2f}s")
1659
+ self._log(f"📈 Final result: {len(final_result)} records")
1158
1660
 
1159
- return item_df
1160
-
1161
- return None
1661
+ return final_result
1662
+ else:
1663
+ self._log("⚠️ No results generated")
1664
+ columns = ['Date', 'Item'] + (['Location'] if self.location else [])
1665
+ return pd.DataFrame(columns=columns)