datupapi 1.110.2__py3-none-any.whl → 1.112.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,224 +1,1282 @@
1
1
  import pandas as pd
2
- import os
3
2
  import numpy as np
4
- from datetime import timedelta
3
+ import os
4
+ import ast
5
+ import time
6
+ import sys
7
+ from datetime import timedelta, datetime
8
+ from concurrent.futures import ProcessPoolExecutor, as_completed
9
+ from multiprocessing import cpu_count
10
+ import gc
11
+ from typing import Dict, List, Tuple, Optional, Union
5
12
  from datupapi.utils.utils import Utils
6
13
  from datupapi.inventory.src.SuggestedForecast.suggested_forecast import SuggestedForecast
7
14
  from datupapi.inventory.src.FutureInventory.daily_usage_future import DailyUsageFuture
8
15
 
9
16
 
10
- class FutureReorder():
17
+ def process_item_batch_complete(batch_args):
18
+ """
19
+ Process a batch of items in parallel with complete functionality.
20
+
21
+ This function executes in a separate process and handles batch processing
22
+ of inventory items for reorder calculations. It provides optimized error
23
+ handling and progress tracking for large-scale inventory processing.
24
+
25
+ Args:
26
+ batch_args (tuple): Contains all necessary data for batch processing:
27
+ - batch_items: List of item data tuples (key, dates, lead_time_df, inv_df)
28
+ - df_fcst: Forecast data DataFrame
29
+ - df_prep: Preparation data DataFrame
30
+ - metadata: List of metadata columns
31
+ - location: Boolean indicating if location processing is enabled
32
+ - default_coverage: Default coverage days
33
+ - complete_suggested: Boolean for complete suggested forecast mode
34
+ - security_stock_ref: Boolean for reference-based security stock calculation
35
+ - integer: Boolean for integer formatting of quantities
36
+ - verbose: Boolean for detailed logging
37
+ - df_transit: Transit schedule DataFrame (optional)
38
+
39
+ Returns:
40
+ pd.DataFrame: Combined results for all items in the batch, or empty DataFrame if errors
41
+ """
42
+ try:
43
+ (batch_items, df_fcst, df_prep, metadata, location, default_coverage,
44
+ complete_suggested, security_stock_ref, integer, verbose, df_transit) = batch_args
45
+
46
+ results = []
47
+ processed_count = 0
48
+ error_count = 0
49
+
50
+ for item_data in batch_items:
51
+ key, dates, current_df_lead_time, current_df_inv = item_data
52
+
53
+ try:
54
+ # Procesar este ítem usando la lógica completa con timeout implícito
55
+ item_result = _process_item_complete(
56
+ key, dates, current_df_lead_time, current_df_inv,
57
+ df_fcst, df_prep, metadata, location, default_coverage,
58
+ complete_suggested, security_stock_ref, integer,
59
+ df_transit
60
+ )
61
+
62
+ if item_result is not None and not item_result.empty:
63
+ results.append(item_result)
64
+ processed_count += 1
65
+ else:
66
+ error_count += 1
67
+
68
+ except Exception as e:
69
+ error_count += 1
70
+ if verbose and error_count <= 3: # Limit error messages to avoid spam
71
+ print(f"⚠️ Error procesando {key}: {str(e)[:100]}...")
72
+ continue
73
+
74
+ # Log batch summary if there were errors
75
+ if verbose and error_count > 0:
76
+ print(f"📊 Batch summary: {processed_count} processed, {error_count} errors")
77
+
78
+ # Combine all items in this batch
79
+ if results:
80
+ return pd.concat(results, ignore_index=True)
81
+ else:
82
+ return pd.DataFrame()
83
+
84
+ except Exception as e:
85
+ print(f"❌ Error crítico en batch: {str(e)}")
86
+ return pd.DataFrame()
11
87
 
12
- def __init__(self, df_inv, df_lead_time, df_prep, df_fcst, periods, start_date, location=False, security_stock_ref=False):
13
- self.df_inv = df_inv
14
- self.df_lead_time = df_lead_time
15
- self.df_prep = df_prep
16
- self.df_fcst = df_fcst
17
- self.default_coverage = 30
18
- self.periods = periods
19
- self.start_date = pd.to_datetime(start_date, format='%Y-%m-%d')
20
- self.location = location
21
- self.security_stock_ref = security_stock_ref
22
88
 
89
+ def _process_item_complete(key, dates, current_df_lead_time, current_df_inv,
90
+ df_fcst, df_prep, metadata, location, default_coverage,
91
+ complete_suggested, security_stock_ref, integer, df_transit=None):
92
+ """
93
+ Process a single item through all periods with complete functionality.
94
+
95
+ This function handles the complete inventory reorder calculation for a single item
96
+ across all time periods. It optimizes performance by pre-allocating data structures
97
+ and reducing repetitive calls to forecast and daily usage calculations.
98
+
99
+ The process includes:
100
+ 1. Calculating suggested forecasts for each period
101
+ 2. Computing daily usage rates (average and maximum)
102
+ 3. Determining security stock requirements
103
+ 4. Processing current period inventory
104
+ 5. Calculating future period reorder needs
105
+ 6. Managing transit order schedules
106
+ 7. Computing final inventory metrics
107
+
108
+ Args:
109
+ key: Item identifier (str) or (item, location) tuple
110
+ dates: List of calculation dates in 'YYYYMMDD' format
111
+ current_df_lead_time: Lead time data for this item
112
+ current_df_inv: Current inventory data for this item
113
+ df_fcst: Forecast data DataFrame
114
+ df_prep: Preparation data DataFrame
115
+ metadata: List of metadata columns
116
+ location: Boolean indicating location-based processing
117
+ default_coverage: Default coverage days
118
+ complete_suggested: Boolean for complete suggested forecast mode
119
+ security_stock_ref: Boolean for reference-based security stock
120
+ integer: Boolean for integer formatting
121
+ df_transit: Transit schedule DataFrame (optional)
122
+
123
+ Returns:
124
+ pd.DataFrame: Complete reorder calculations for all periods of this item
125
+ """
126
+ try:
127
+ # Pre-allocate dictionaries for intermediate results
128
+ suggested_forecasts = {}
129
+ df_avgs = {}
130
+ df_maxs = {}
131
+ df_sstocks = {}
132
+ period_results = {}
133
+
134
+ # Initialize transit orders for this item
135
+ transit_orders = {key: []}
136
+
137
+ # Track last suggested forecast value for complete_suggested feature
138
+ last_suggested_value = None
139
+
140
+ # Pre-calculate common values to avoid repeated calculations
141
+ coverage = current_df_lead_time['Coverage'].iloc[0] if 'Coverage' in current_df_lead_time.columns else default_coverage
142
+ if pd.isna(coverage):
143
+ coverage = default_coverage
144
+
145
+ reorder_freq = current_df_lead_time['ReorderFreq'].iloc[0]
146
+ if pd.isna(reorder_freq) or reorder_freq == 0:
147
+ reorder_freq = default_coverage
148
+
149
+ # Process each period with optimized error handling
150
+ for i, date in enumerate(dates):
151
+ try:
152
+ # Calculate suggested forecast with better error handling
153
+ suggested_forecasts[i] = _calculate_suggested_forecast_complete(
154
+ current_df_lead_time, current_df_inv, date, last_suggested_value,
155
+ df_fcst, df_prep, metadata, location, default_coverage, complete_suggested
156
+ )
157
+
158
+ # Update last_suggested_value for next iteration
159
+ if 'SuggestedForecast' in suggested_forecasts[i].columns:
160
+ last_suggested_value = suggested_forecasts[i]['SuggestedForecast'].iloc[0]
161
+
162
+ # Calculate daily usage with optimized calls
163
+ df_avgs[i], df_maxs[i] = _calculate_daily_usage_complete(
164
+ suggested_forecasts[i], date, df_fcst, location
165
+ )
166
+
167
+ # Calculate security stock data with pre-calculated values
168
+ df_sstocks[i] = _calculate_security_stock_data_complete(
169
+ df_maxs[i], current_df_lead_time, default_coverage, i, dates
170
+ )
171
+
172
+ # Process period based on whether it's current or future
173
+ if i == 0:
174
+ period_results[i] = _process_current_period_complete(
175
+ current_df_inv, df_sstocks[i], key, date, transit_orders, dates,
176
+ metadata, integer, security_stock_ref, df_transit
177
+ )
178
+ else:
179
+ period_results[i] = _process_future_period_complete(
180
+ current_df_inv, df_sstocks[i], period_results[i-1],
181
+ key, date, dates, i, transit_orders, metadata, integer, security_stock_ref
182
+ )
183
+
184
+ # Add metadata columns efficiently
185
+ period_results[i]['Date'] = date
186
+ if location:
187
+ item, loc = key
188
+ period_results[i]['Item'] = item
189
+ period_results[i]['Location'] = loc
190
+ else:
191
+ period_results[i]['Item'] = key
192
+
193
+ except Exception as e:
194
+ # Log error but continue with next period
195
+ print(f"Warning: Error processing period {i} for item {key}: {str(e)}")
196
+ continue
197
+
198
+ # After processing all periods, update FutureInventoryTransitArrival
199
+ for i in range(len(dates)):
200
+ if i < len(dates) - 1: # If there's a next period
201
+ # Get next period's TransitArrival
202
+ next_transit_arrival = period_results[i + 1]['TransitArrival'].iloc[0]
203
+ transit_arrival_sum = _sum_transit_arrivals(next_transit_arrival)
204
+ else: # Last period - no next period
205
+ transit_arrival_sum = 0
206
+
207
+ # Update FutureInventoryTransitArrival
208
+ period_results[i]['FutureInventoryTransitArrival'] = _format_value_complete(
209
+ period_results[i]['FutureInventory'].iloc[0] + transit_arrival_sum,
210
+ 'FutureInventoryTransitArrival', integer
211
+ )
212
+
213
+ # Recalculate FutureStockoutDays with the updated FutureInventoryTransitArrival
214
+ period_results[i]['FutureStockoutDays'] = _calculate_inventory_days_complete(
215
+ period_results[i], integer
216
+ )
217
+
218
+ # Combine all periods for this item
219
+ if period_results:
220
+ # Stack all period results at once
221
+ item_df = pd.concat(period_results.values(), ignore_index=True)
222
+
223
+ # Reorder columns for consistency
224
+ cols = ['Date', 'Item']
225
+ if location:
226
+ cols.append('Location')
227
+ other_cols = [col for col in item_df.columns if col not in cols]
228
+ item_df = item_df[cols + other_cols]
229
+
230
+ return item_df
231
+
232
+ return None
233
+
234
+ except Exception as e:
235
+ # Handle any unexpected errors at the item level
236
+ print(f"Error processing item {key}: {str(e)}")
237
+ return None
23
238
 
24
- def future_date(self):
25
239
 
26
- '''Function to calculate the future dates by Item or Item-Location'''
240
+ def _format_value_complete(value, field_name, integer):
241
+ """Apply appropriate formatting based on field type and integer setting."""
242
+ # Handle pandas Series - extract scalar value
243
+ if isinstance(value, pd.Series):
244
+ if len(value) == 1:
245
+ value = value.iloc[0]
246
+ else:
247
+ raise ValueError(f"Expected scalar value for {field_name}, got Series with {len(value)} elements")
248
+
249
+ # Handle NaN, None, and infinite values
250
+ if pd.isna(value) or value is None:
251
+ return 0
252
+ if np.isinf(value):
253
+ return 0
254
+
255
+ # Fields that are ALWAYS integers
256
+ always_integer_fields = [
257
+ 'PurchaseFactor', 'AvgLeadTime', 'MaxLeadTime',
258
+ 'ReorderQtyDays', 'ReorderFreq', 'Coverage', 'FutureStockoutDays'
259
+ ]
260
+
261
+ # Fields that are ALWAYS decimals (2 decimal places)
262
+ always_decimal_fields = ['AvgDailyUsage', 'MaxDailyUsage']
263
+
264
+ # Fields that change based on integer setting
265
+ quantity_fields = [
266
+ 'FutureInventoryTransit', 'FutureInventory', 'FutureTransit',
267
+ 'FutureInventoryTransitArrival', 'SuggestedForecast', 'SuggestedForecastPeriod',
268
+ 'ReorderPoint', 'ReorderQtyBase', 'ReorderQty', 'SecurityStock', 'Inventory', 'Transit'
269
+ ]
270
+
271
+ if field_name in always_integer_fields:
272
+ return int(round(value))
273
+ elif field_name in always_decimal_fields:
274
+ return round(value, 2)
275
+ elif field_name in quantity_fields:
276
+ if integer:
277
+ return int(round(value))
278
+ else:
279
+ return round(value, 2)
280
+ else:
281
+ # Default: return as is
282
+ return value
283
+
27
284
 
28
- DOCKER_CONFIG_PATH = os.path.join('/opt/ml/processing/input', 'config.yml')
29
- utils = Utils(config_file=DOCKER_CONFIG_PATH, logfile='data_io', log_path='output/logs')
285
+ def _calculate_suggested_forecast_complete(current_df_lead_time, current_df_inv, date, last_suggested_value,
286
+ df_fcst, df_prep, metadata, location, default_coverage, complete_suggested):
287
+ """Calculate suggested forecast for the given date using the SuggestedForecast class."""
288
+ # Convert current date to datetime
289
+ current_date = pd.to_datetime(date, format='%Y%m%d')
290
+
291
+ # Get the maximum forecast date available
292
+ max_forecast_date = df_fcst['Date'].max()
293
+
294
+ # Get coverage value for this item
295
+ coverage = current_df_lead_time['Coverage'].iloc[0] if 'Coverage' in current_df_lead_time.columns else default_coverage
296
+ if pd.isna(coverage):
297
+ coverage = default_coverage
298
+
299
+ # Calculate the required forecast end date
300
+ required_forecast_end_date = current_date + timedelta(days=int(coverage))
301
+
302
+ # Check if we have sufficient forecast data
303
+ if max_forecast_date < required_forecast_end_date:
304
+ if complete_suggested:
305
+ if last_suggested_value is not None:
306
+ # Use the last calculated SuggestedForecast value
307
+ result_df = current_df_inv[metadata].copy()
308
+ result_df['SuggestedForecast'] = last_suggested_value
309
+
310
+ # Add PurchaseFactor and ItemDescription from inventory data using safe access
311
+ result_df['PurchaseFactor'] = current_df_inv.get('PurchaseFactor', pd.Series([1])).iloc[0]
312
+ result_df['ItemDescription'] = current_df_inv.get('ItemDescription', pd.Series([''])).iloc[0]
30
313
 
31
- timestamp = utils.set_timestamp()
32
- actual_date = pd.to_datetime(str(int(float(timestamp[0:8]))), format='%Y%m%d')
314
+ return result_df
315
+ else:
316
+ # For the first period when complete_suggested=True but no previous value exists
317
+ try:
318
+ return SuggestedForecast(
319
+ df_LeadTimes=current_df_lead_time,
320
+ df_Forecast=df_fcst,
321
+ df_Prep=df_prep,
322
+ df_inv=current_df_inv,
323
+ column_forecast='SuggestedForecast',
324
+ columns_metadata=metadata,
325
+ frequency_='M',
326
+ location=location,
327
+ actualdate=date,
328
+ default_coverage_=default_coverage,
329
+ join_='left'
330
+ ).suggested_forecast()
331
+ except Exception as e:
332
+ # Get item identifier for error message
333
+ item = current_df_inv['Item'].iloc[0]
334
+ location_msg = ""
335
+ if location and 'Location' in current_df_inv.columns:
336
+ loc = current_df_inv['Location'].iloc[0]
337
+ location_msg = f" at location {loc}"
338
+
339
+ error_msg = (
340
+ f"Cannot calculate initial forecast for item {item}{location_msg}. "
341
+ f"Forecast data extends only to {max_forecast_date.strftime('%Y-%m-%d')}, "
342
+ f"but coverage of {int(coverage)} days from {current_date.strftime('%Y-%m-%d')} "
343
+ f"requires forecast data until {required_forecast_end_date.strftime('%Y-%m-%d')}. "
344
+ f"Original error: {str(e)}"
345
+ )
346
+ raise ValueError(error_msg)
347
+ else:
348
+ # Get item identifier for error message
349
+ item = current_df_inv['Item'].iloc[0]
350
+ location_msg = ""
351
+ if location and 'Location' in current_df_inv.columns:
352
+ loc = current_df_inv['Location'].iloc[0]
353
+ location_msg = f" at location {loc}"
354
+
355
+ error_msg = (
356
+ f"Insufficient forecast data for item {item}{location_msg}. "
357
+ f"Forecast data extends only to {max_forecast_date.strftime('%Y-%m-%d')}, "
358
+ f"but coverage of {int(coverage)} days from {current_date.strftime('%Y-%m-%d')} "
359
+ f"requires forecast data until {required_forecast_end_date.strftime('%Y-%m-%d')}."
360
+ )
361
+ raise ValueError(error_msg)
362
+
363
+ # If validation passes, proceed with the original calculation
364
+ return SuggestedForecast(
365
+ df_LeadTimes=current_df_lead_time,
366
+ df_Forecast=df_fcst,
367
+ df_Prep=df_prep,
368
+ df_inv=current_df_inv,
369
+ column_forecast='SuggestedForecast',
370
+ columns_metadata=metadata,
371
+ frequency_='M',
372
+ location=location,
373
+ actualdate=date,
374
+ default_coverage_=default_coverage,
375
+ join_='left'
376
+ ).suggested_forecast()
33
377
 
34
- item_dates = {}
35
378
 
36
- columns = ['Item', 'ReorderFreq']
37
- if self.location:
38
- columns.append('Location')
379
+ def _calculate_daily_usage_complete(suggested_forecast_df, date, df_fcst, location):
380
+ """Calculate average and maximum daily usage rates."""
381
+ df_avg = DailyUsageFuture(
382
+ location=location,
383
+ column_forecast='SuggestedForecast',
384
+ date=date,
385
+ df_fcst=df_fcst
386
+ ).daily_usage(suggested_forecast_df, 'AvgDailyUsage').fillna(0)
39
387
 
40
- for _, row in self.df_lead_time[columns].drop_duplicates().iterrows():
41
- item = row['Item']
42
- location = row['Location'] if self.location else None
43
- cobertura = int(row['ReorderFreq']) if not pd.isnull(row['ReorderFreq']) and row['ReorderFreq'] != 0 else self.default_coverage
44
- date = self.start_date
45
- dates = []
388
+ df_max = DailyUsageFuture(
389
+ location=location,
390
+ column_forecast='SuggestedForecast',
391
+ date=date,
392
+ df_fcst=df_fcst
393
+ ).daily_usage(df_avg, 'MaxDailyUsage').fillna(0)
394
+
395
+ return df_avg, df_max
46
396
 
47
- while date <= actual_date + pd.DateOffset(months=self.periods):
48
- dates.append(date.strftime('%Y%m%d'))
49
- date += timedelta(days=cobertura)
50
397
 
51
- item_dates[(item, location) if self.location else item] = dates
398
+ def _calculate_security_stock_data_complete(df_max, current_df_lead_time, default_coverage, period_index, dates):
399
+ """
400
+ Calculate security stock related data and prepare for reorder calculations.
401
+
402
+ This function merges daily usage data with lead time information and calculates
403
+ the suggested forecast period based on coverage ratios. For period 0, it uses
404
+ days to the next period instead of reorder frequency for more accurate consumption.
405
+
406
+ The process includes:
407
+ 1. Merging daily usage with lead time data
408
+ 2. Determining effective reorder frequency and coverage
409
+ 3. Calculating SuggestedForecastPeriod based on coverage ratio
410
+ 4. Special handling for period 0 using actual days to next period
411
+
412
+ Args:
413
+ df_max: DataFrame with maximum daily usage
414
+ current_df_lead_time: Lead time data for current item
415
+ default_coverage: Default coverage days
416
+ period_index: Current period index (0, 1, 2, ...)
417
+ dates: List of dates for this item
418
+
419
+ Returns:
420
+ pd.DataFrame: DataFrame with merged data and calculated fields including
421
+ SuggestedForecastPeriod adjusted for the specific period
422
+ """
423
+ metadata = ['Item', 'Location'] if 'Location' in df_max.columns else ['Item']
424
+ merge_columns = ['Item', 'Location', 'AvgLeadTime', 'MaxLeadTime'] if 'Location' in df_max.columns else ['Item', 'AvgLeadTime', 'MaxLeadTime']
425
+ df_sstock = pd.merge(df_max, current_df_lead_time[merge_columns], on=metadata, how='inner').drop_duplicates()
426
+
427
+ # Get ReorderFreq and Coverage
428
+ reorder_freq = current_df_lead_time['ReorderFreq'].values[0]
429
+ if pd.isnull(reorder_freq) or reorder_freq == 0:
430
+ reorder_freq = default_coverage
431
+
432
+ coverage = default_coverage
433
+ if 'Coverage' in current_df_lead_time.columns:
434
+ coverage_val = current_df_lead_time['Coverage'].values[0]
435
+ if not pd.isnull(coverage_val):
436
+ coverage = coverage_val
437
+ else:
438
+ coverage = reorder_freq + df_sstock['AvgLeadTime'].values[0]
439
+ else:
440
+ coverage = reorder_freq + df_sstock['AvgLeadTime'].values[0]
441
+
442
+ # Calculate SuggestedForecastPeriod
443
+ if period_index == 0 and dates is not None and len(dates) > 1:
444
+ # For period 0, use days to next period instead of reorder frequency
445
+ current_date = pd.to_datetime(dates[0], format='%Y%m%d')
446
+ next_date = pd.to_datetime(dates[1], format='%Y%m%d')
447
+ days_to_next_period = (next_date - current_date).days
448
+
449
+ # Formula: SuggestedForecast × (days_to_next_period / coverage)
450
+ suggested_forecast_period = np.ceil(df_sstock['SuggestedForecast'] * (days_to_next_period / coverage))
451
+ else:
452
+ # For other periods, use the original calculation with reorder frequency
453
+ suggested_forecast_period = np.ceil(df_sstock['SuggestedForecast'] * (reorder_freq / coverage))
454
+
455
+ df_sstock['SuggestedForecastPeriod'] = suggested_forecast_period.apply(
456
+ lambda x: int(round(x)) # SuggestedForecastPeriod is always integer
457
+ )
458
+
459
+ return df_sstock
52
460
 
53
- return item_dates
54
461
 
462
+ def _calculate_security_stock_complete(df, security_stock_ref, integer):
463
+ """Calculate security stock using configured method. Replicates exactly the logic from future_reorder_optimized."""
464
+ # EXACTLY like future_reorder_optimized line 528-536
465
+ if security_stock_ref:
466
+ if 'SecurityStockDaysRef' in df.columns:
467
+ security_stock_value = df['SecurityStockDaysRef'].iloc[0] * df['AvgDailyUsage'].iloc[0]
468
+ else:
469
+ security_stock_value = 0
470
+ else:
471
+ security_stock_value = (df['MaxDailyUsage'].iloc[0] * df['MaxLeadTime'].iloc[0]) - (df['AvgDailyUsage'].iloc[0] * df['AvgLeadTime'].iloc[0])
472
+
473
+ # Apply formatting and return as scalar
474
+ return _format_value_complete(security_stock_value, 'SecurityStock', integer)
55
475
 
56
- def reorder(self):
57
476
 
58
- '''Function to calculate the future reorder for inventory with dynamics coverage'''
477
+ def _calculate_inventory_days_complete(df, integer):
478
+ """Calculate inventory days using configured method."""
479
+ # Calculate future stockout days with safe division
480
+ future_stockout_days = np.where(
481
+ df['AvgDailyUsage'] > 0,
482
+ (df['FutureInventoryTransitArrival'] - df['SecurityStock']) / df['AvgDailyUsage'],
483
+ 0 # If no daily usage, return 0 days
484
+ )
485
+
486
+ # Apply formatting
487
+ return pd.Series(future_stockout_days).apply(lambda x: _format_value_complete(x, 'FutureStockoutDays', integer))
488
+
489
+
490
+ def _sum_transit_arrivals(transit_arrivals_str):
491
+ """Calculate the total quantity from TransitArrival string."""
492
+ if transit_arrivals_str == '[]' or not transit_arrivals_str:
493
+ return 0.0
494
+
495
+ try:
496
+ arrivals = ast.literal_eval(transit_arrivals_str)
497
+ return sum(arrival.get('quantity', 0) for arrival in arrivals)
498
+ except:
499
+ return 0.0
500
+
501
+
502
+ def _prepare_transit_schedule_complete(key, transit_amount, dates, df_transit, location):
503
+ """Prepare transit schedule based on df_transit or default logic."""
504
+ if transit_amount <= 0:
505
+ return []
506
+
507
+ transit_schedule = []
508
+
509
+ if df_transit is None:
510
+ # Default logic: complete transit arrives in period 1
511
+ if len(dates) > 1:
512
+ arrival_date = pd.to_datetime(dates[1], format='%Y%m%d')
513
+ transit_schedule.append({
514
+ 'quantity': transit_amount,
515
+ 'arrival_date': arrival_date
516
+ })
517
+ else:
518
+ # Use provided transit schedule
519
+ if location:
520
+ item, loc = key
521
+ mask = (df_transit['Item'] == item) & (df_transit['Location'] == loc)
522
+ else:
523
+ mask = df_transit['Item'] == key
524
+
525
+ transit_data = df_transit[mask].copy()
526
+
527
+ if not transit_data.empty:
528
+ # Validate total matches
529
+ total_scheduled = transit_data['Transit'].sum()
530
+ if abs(total_scheduled - transit_amount) > 0.01: # Allow small floating point differences
531
+ raise ValueError(f"Transit schedule total ({total_scheduled}) does not match inventory transit ({transit_amount}) for {key}")
532
+
533
+ # Create transit orders
534
+ for _, row in transit_data.iterrows():
535
+ arrival_date = pd.to_datetime(row['ArrivalDate'], format='%Y-%m-%d')
536
+ transit_schedule.append({
537
+ 'quantity': float(row['Transit']),
538
+ 'arrival_date': arrival_date
539
+ })
540
+ else:
541
+ # If no transit data provided for this item, use default logic
542
+ if len(dates) > 1:
543
+ arrival_date = pd.to_datetime(dates[1], format='%Y%m%d')
544
+ transit_schedule.append({
545
+ 'quantity': transit_amount,
546
+ 'arrival_date': arrival_date
547
+ })
548
+
549
+ return transit_schedule
550
+
551
+
552
+ def _process_current_period_complete(current_df_inv, df_sstock, key, date, transit_orders, dates, metadata, integer, security_stock_ref=False, df_transit=None):
553
+ """Process inventory for the current period (i=0). Replicates exactly the logic from future_reorder_optimized."""
554
+
555
+ # Get inventory data efficiently - EXACTLY like future_reorder_optimized line 410-414
556
+ try:
557
+ inventory_data = {
558
+ 'FutureInventory': current_df_inv['Inventory'].iloc[0],
559
+ 'FutureTransit': current_df_inv['Transit'].iloc[0],
560
+ 'PurchaseFactor': current_df_inv['PurchaseFactor'].iloc[0] if 'PurchaseFactor' in current_df_inv.columns else 1
561
+ }
562
+ except KeyError as e:
563
+ # Handle missing columns gracefully
564
+ inventory_data = {
565
+ 'FutureInventory': current_df_inv.get('Inventory', pd.Series([0])).iloc[0],
566
+ 'FutureTransit': current_df_inv.get('Transit', pd.Series([0])).iloc[0],
567
+ 'PurchaseFactor': current_df_inv.get('PurchaseFactor', pd.Series([1])).iloc[0]
568
+ }
569
+
570
+ # Vectorized calculations - EXACTLY like future_reorder_optimized line 417-428
571
+ df = df_sstock.copy()
572
+ df['FutureInventory'] = _format_value_complete(inventory_data['FutureInventory'], 'FutureInventory', integer)
573
+ df['FutureTransit'] = _format_value_complete(inventory_data['FutureTransit'], 'FutureTransit', integer)
574
+ df['FutureInventoryTransit'] = _format_value_complete(
575
+ inventory_data['FutureInventory'] + inventory_data['FutureTransit'],
576
+ 'FutureInventoryTransit', integer
577
+ )
578
+ df['PurchaseFactor'] = inventory_data['PurchaseFactor']
579
+
580
+ # Initialize transit orders - EXACTLY like future_reorder_optimized line 430-438
581
+ if key not in transit_orders:
582
+ transit_orders[key] = []
583
+
584
+ # Handle transit schedule
585
+ transit_qty = float(inventory_data['FutureTransit'])
586
+ if transit_qty > 0:
587
+ transit_schedule = _prepare_transit_schedule_complete(key, transit_qty, dates, df_transit, 'Location' in metadata)
588
+ transit_orders[key].extend(transit_schedule)
589
+
590
+ # Set initial values - EXACTLY like future_reorder_optimized line 440-452
591
+ df['TransitArrival'] = '[]'
592
+ df['SecurityStock'] = _calculate_security_stock_complete(df, security_stock_ref, integer)
593
+ df['SuggestedForecast'] = _format_value_complete(df['SuggestedForecast'].iloc[0], 'SuggestedForecast', integer)
594
+ df['ReorderPoint'] = _format_value_complete(
595
+ max(0, df['SuggestedForecast'].iloc[0] + df['SecurityStock'].iloc[0]), 'ReorderPoint', integer
596
+ )
597
+ df['ReorderQtyBase'] = _format_value_complete(
598
+ max(0, df['ReorderPoint'].iloc[0] - df['FutureInventoryTransit'].iloc[0]), 'ReorderQtyBase', integer
599
+ )
600
+ df['ReorderQty'] = 0
601
+ df['ReorderQtyDays'] = 0
602
+ df['ArrivalDate'] = ''
603
+
604
+ return df
59
605
 
60
- item_dates = self.future_date()
61
606
 
62
- metadata = ['Item']
607
+ def _process_transit_orders_complete(transit_orders, key, current_date, previous_date):
608
+ """Process transit orders and calculate arrivals for the current period."""
609
+ # Get orders for this key, return early if none
610
+ orders = transit_orders.get(key, [])
611
+ if not orders:
612
+ return 0, 0, []
613
+
614
+ new_transit = 0
615
+ remaining_orders = []
616
+ transit_arrivals = []
617
+ stock_from_arrivals = 0
618
+
619
+ for order in orders:
620
+ if order['arrival_date'] > previous_date and order['arrival_date'] <= current_date:
621
+ # Order arrives in this period
622
+ stock_from_arrivals += order['quantity']
623
+ transit_arrivals.append({
624
+ 'quantity': float(order['quantity']),
625
+ 'arrival_date': order['arrival_date'].strftime('%Y-%m-%d')
626
+ })
627
+ else:
628
+ # Order still in transit
629
+ new_transit += order['quantity']
630
+ remaining_orders.append(order)
631
+
632
+ transit_orders[key] = remaining_orders
633
+ return stock_from_arrivals, new_transit, transit_arrivals
634
+
635
+
636
+ def _process_future_period_complete(current_df_inv, df_sstock, df_previous, key, date, dates, i, transit_orders, metadata, integer, security_stock_ref=False):
637
+ """Process inventory for future periods (i>0). Replicates exactly the logic from future_reorder_optimized."""
638
+
639
+ # EXACTLY like future_reorder_optimized line 460-461
640
+ df = df_sstock.copy()
641
+ try:
642
+ df['PurchaseFactor'] = current_df_inv['PurchaseFactor'].iloc[0] if 'PurchaseFactor' in current_df_inv.columns else 1
643
+ except (KeyError, IndexError):
644
+ df['PurchaseFactor'] = 1
645
+
646
+ # Calculate consumption - EXACTLY like future_reorder_optimized line 463-465
647
+ consumption = df_previous['SuggestedForecastPeriod'].iloc[0]
648
+ previous_stock = df_previous['FutureInventory'].iloc[0] - consumption
649
+
650
+ # Process transit orders - EXACTLY like future_reorder_optimized line 467-473
651
+ current_date = pd.to_datetime(date, format='%Y%m%d')
652
+ previous_date = pd.to_datetime(dates[i-1], format='%Y%m%d')
653
+
654
+ stock_from_arrivals, new_transit, transit_arrivals = _process_transit_orders_complete(
655
+ transit_orders, key, current_date, previous_date
656
+ )
657
+
658
+ # Vectorized inventory updates - EXACTLY like future_reorder_optimized line 475-482
659
+ future_stock = max(0, previous_stock + stock_from_arrivals)
660
+ df['FutureInventory'] = _format_value_complete(future_stock, 'FutureInventory', integer)
661
+ df['FutureTransit'] = _format_value_complete(new_transit, 'FutureTransit', integer)
662
+ df['FutureInventoryTransit'] = _format_value_complete(
663
+ future_stock + new_transit, 'FutureInventoryTransit', integer
664
+ )
665
+ df['TransitArrival'] = str(transit_arrivals) if transit_arrivals else '[]'
666
+
667
+ # Vectorized reorder calculations - EXACTLY like future_reorder_optimized line 484-508
668
+ df['SecurityStock'] = _calculate_security_stock_complete(df, security_stock_ref, integer)
669
+ df['SuggestedForecast'] = _format_value_complete(df['SuggestedForecast'].iloc[0], 'SuggestedForecast', integer)
670
+ df['ReorderPoint'] = _format_value_complete(
671
+ max(0, df['SuggestedForecast'].iloc[0] + df['SecurityStock'].iloc[0]), 'ReorderPoint', integer
672
+ )
673
+ df['ReorderQtyBase'] = _format_value_complete(
674
+ max(0, df['ReorderPoint'].iloc[0] - df['FutureInventoryTransit'].iloc[0]), 'ReorderQtyBase', integer
675
+ )
676
+
677
+ # Calculate ReorderQty - EXACTLY like future_reorder_optimized line 494-500
678
+ reorder_qty_base = df['ReorderQtyBase'].iloc[0]
679
+ purchase_factor = df['PurchaseFactor'].iloc[0]
680
+
681
+ if reorder_qty_base > 0:
682
+ reorder_qty = np.ceil(reorder_qty_base / purchase_factor) * purchase_factor
683
+ else:
684
+ reorder_qty = 0
685
+
686
+ df['ReorderQty'] = _format_value_complete(reorder_qty, 'ReorderQty', integer)
687
+
688
+ # Calculate ReorderQtyDays - EXACTLY like future_reorder_optimized line 502-508
689
+ if df['ReorderQty'].iloc[0] > 0 and df['AvgDailyUsage'].iloc[0] > 0:
690
+ reorder_qty_days = df['ReorderQty'].iloc[0] / df['AvgDailyUsage'].iloc[0]
691
+ else:
692
+ reorder_qty_days = 0
693
+
694
+ df['ReorderQtyDays'] = _format_value_complete(reorder_qty_days, 'ReorderQtyDays', integer)
695
+
696
+ # Handle new orders - EXACTLY like future_reorder_optimized line 510-521
697
+ if df['ReorderQty'].iloc[0] > 0:
698
+ avg_lead_time = df['AvgLeadTime'].iloc[0]
699
+ arrival_date = current_date + timedelta(days=int(avg_lead_time))
700
+ transit_orders[key].append({
701
+ 'quantity': float(df['ReorderQty'].iloc[0]),
702
+ 'arrival_date': arrival_date
703
+ })
704
+ df['ArrivalDate'] = arrival_date.strftime('%Y-%m-%d')
705
+ else:
706
+ df['ArrivalDate'] = ''
707
+
708
+ return df
709
+
710
+
711
+ class FutureReorder():
712
+ """
713
+ Versión completa optimizada para procesamiento masivo de datasets grandes.
714
+ Incluye TODA la funcionalidad de la clase original pero optimizada para paralelización.
715
+ """
716
+
717
+ def __init__(self, df_inv, df_lead_time, df_prep, df_fcst, periods, start_date,
718
+ location=False, security_stock_ref=False, df_transit=None, integer=True,
719
+ complete_suggested=False, start_date_zero=None, batch_size=None, n_workers=None,
720
+ verbose=True):
721
+
722
+ # Original parameters - TODOS los parámetros de la clase original
723
+ self.df_inv = df_inv
724
+ self.df_lead_time = df_lead_time
725
+ self.df_prep = df_prep
726
+ self.df_fcst = df_fcst
727
+ self.default_coverage = 30
728
+ self.periods = periods
729
+ self.start_date = pd.to_datetime(start_date, format='%Y-%m-%d') if start_date is not None else None
730
+ self.location = location
731
+ self.security_stock_ref = security_stock_ref
732
+ self.df_transit = df_transit
733
+ self.integer = integer
734
+ self.complete_suggested = complete_suggested
735
+ self.start_date_zero = start_date_zero
736
+
737
+ # Optimization parameters with intelligent defaults
738
+ total_items = len(df_inv)
739
+
740
+ # Auto-configure batch_size based on dataset size
741
+ if batch_size is None:
742
+ if total_items <= 500:
743
+ self.batch_size = 50 # Small batches for small datasets
744
+ elif total_items <= 2000:
745
+ self.batch_size = 100 # Medium batches
746
+ else:
747
+ self.batch_size = 200 # Larger batches for big datasets
748
+ else:
749
+ self.batch_size = batch_size
750
+
751
+ # Auto-configure n_workers based on system and dataset
752
+ if n_workers is None:
753
+ available_cores = cpu_count()
754
+ if total_items <= 200:
755
+ self.n_workers = min(2, available_cores - 1) # Conservative for small datasets
756
+ elif total_items <= 1000:
757
+ self.n_workers = min(4, available_cores - 1) # Moderate parallelization
758
+ else:
759
+ self.n_workers = min(max(4, available_cores - 2), 8) # Aggressive for large datasets
760
+ else:
761
+ self.n_workers = n_workers
762
+
763
+ self.verbose = verbose
764
+
765
+ # Initialize metadata columns
766
+ self.metadata = ['Item']
63
767
  if self.location:
64
- metadata.append('Location')
65
-
66
- df_lead_time_rf = self.df_lead_time.copy()
67
- df_lead_time_rf['Coverage'] = df_lead_time_rf['ReorderFreq']
68
- df_lead_time_rf['AvgLeadTime'] = df_lead_time_rf['ReorderFreq']
69
- df_lead_time_rf['MaxLeadTime'] = df_lead_time_rf['ReorderFreq']
70
-
71
- SuggestedForecast_cov = {}
72
- SuggestedForecast_rf = {}
73
- df_forecast = {}
74
- df_avg_gen = {}
75
- df_max_gen = {}
76
- df_sstock = {}
77
- df_inventory = {}
78
- df = {}
79
-
80
- # Inicializar DataFrame
81
- columns = ['Date', 'Item'] + (['Location'] if self.location else [])
82
- data_frame = pd.DataFrame(columns=columns)
83
-
84
- # Iterar por cada combinación (Item, Location) o (Item) según use_location
85
- for key, dates in item_dates.items():
768
+ self.metadata.append('Location')
769
+
770
+ # Pre-filter dataframes based on df_inv to improve performance
771
+ self._prefilter_dataframes()
772
+
773
+ self._log(f"🚀 FutureReorder Massive Complete - Inicializado para {len(self.df_inv)} ítems")
774
+ self._log(f"⚙️ Configuración: batch_size={batch_size}, workers={self.n_workers}")
775
+
776
+ def _prefilter_dataframes(self):
777
+ """
778
+ Pre-filter all input dataframes based on df_inv to improve performance.
779
+ Only process data that exists in df_inv (inventory data).
780
+ """
781
+ if self.verbose:
782
+ original_sizes = {
783
+ 'df_lead_time': len(self.df_lead_time),
784
+ 'df_prep': len(self.df_prep),
785
+ 'df_fcst': len(self.df_fcst),
786
+ 'df_transit': len(self.df_transit) if self.df_transit is not None else 0
787
+ }
788
+ self._log("🔍 Pre-filtering dataframes based on df_inv...")
789
+
790
+ # Create base filter from df_inv
791
+ if self.location:
792
+ base_filter = self.df_inv[['Item', 'Location']].drop_duplicates()
793
+ else:
794
+ base_filter = self.df_inv[['Item']].drop_duplicates()
795
+
796
+ # Filter df_lead_time
797
+ if self.location:
798
+ self.df_lead_time = self.df_lead_time.merge(
799
+ base_filter,
800
+ on=['Item', 'Location'],
801
+ how='inner'
802
+ )
803
+ else:
804
+ self.df_lead_time = self.df_lead_time.merge(
805
+ base_filter,
806
+ on=['Item'],
807
+ how='inner'
808
+ )
809
+
810
+ # Filter df_prep - handle different column naming conventions
811
+ if self.location:
812
+ # Check if df_prep uses 'item_id' and 'location' columns
813
+ if 'item_id' in self.df_prep.columns and 'location' in self.df_prep.columns:
814
+ # Create renamed base filter for df_prep
815
+ base_filter_prep = base_filter.copy()
816
+ base_filter_prep = base_filter_prep.rename(columns={'Item': 'item_id', 'Location': 'location'})
817
+ self.df_prep = self.df_prep.merge(
818
+ base_filter_prep,
819
+ on=['item_id', 'location'],
820
+ how='inner'
821
+ )
822
+ else:
823
+ # Use standard column names
824
+ self.df_prep = self.df_prep.merge(
825
+ base_filter,
826
+ on=['Item', 'Location'],
827
+ how='inner'
828
+ )
829
+ else:
830
+ # Check if df_prep uses 'item_id' column
831
+ if 'item_id' in self.df_prep.columns:
832
+ base_filter_prep = base_filter.copy()
833
+ base_filter_prep = base_filter_prep.rename(columns={'Item': 'item_id'})
834
+ self.df_prep = self.df_prep.merge(
835
+ base_filter_prep,
836
+ on=['item_id'],
837
+ how='inner'
838
+ )
839
+ else:
840
+ self.df_prep = self.df_prep.merge(
841
+ base_filter,
842
+ on=['Item'],
843
+ how='inner'
844
+ )
845
+
846
+ # Filter df_fcst
847
+ if self.location:
848
+ self.df_fcst = self.df_fcst.merge(
849
+ base_filter,
850
+ on=['Item', 'Location'],
851
+ how='inner'
852
+ )
853
+ else:
854
+ self.df_fcst = self.df_fcst.merge(
855
+ base_filter,
856
+ on=['Item'],
857
+ how='inner'
858
+ )
859
+
860
+ # Filter df_transit if it exists
861
+ if self.df_transit is not None:
86
862
  if self.location:
87
- item, location = key
863
+ self.df_transit = self.df_transit.merge(
864
+ base_filter,
865
+ on=['Item', 'Location'],
866
+ how='inner'
867
+ )
88
868
  else:
89
- item = key
90
- location = None
869
+ self.df_transit = self.df_transit.merge(
870
+ base_filter,
871
+ on=['Item'],
872
+ how='inner'
873
+ )
874
+
875
+ if self.verbose:
876
+ new_sizes = {
877
+ 'df_lead_time': len(self.df_lead_time),
878
+ 'df_prep': len(self.df_prep),
879
+ 'df_fcst': len(self.df_fcst),
880
+ 'df_transit': len(self.df_transit) if self.df_transit is not None else 0
881
+ }
882
+
883
+ self._log("📊 Filtrado completado:")
884
+ for df_name, original_size in original_sizes.items():
885
+ new_size = new_sizes[df_name]
886
+ if original_size > 0:
887
+ reduction_pct = ((original_size - new_size) / original_size) * 100
888
+ self._log(f" • {df_name}: {original_size:,} → {new_size:,} (-{reduction_pct:.1f}%)")
889
+ else:
890
+ self._log(f" • {df_name}: {original_size:,} → {new_size:,}")
91
891
 
92
- for i, date in enumerate(dates):
93
- if self.location:
94
- current_df_lead_time_cov = self.df_lead_time[(self.df_lead_time['Item'] == item) &
95
- (self.df_lead_time['Location'] == location)]
892
+ def _log(self, message):
893
+ if self.verbose:
894
+ print(message)
895
+ sys.stdout.flush()
96
896
 
97
- current_df_lead_time_rf = df_lead_time_rf[(df_lead_time_rf['Item'] == item) &
98
- (df_lead_time_rf['Location'] == location)]
897
+ def future_date(self):
898
+ """
899
+ Generate future reorder dates for each item based on reorder frequency.
900
+ Versión optimizada de la función original.
901
+ """
902
+ # Determine the starting date for period 0 - EXACTLY like future_reorder_optimized line 148-155
903
+ if self.start_date_zero is not None:
904
+ # Use custom start date for period 0
905
+ actual_date = pd.to_datetime(self.start_date_zero, format='%Y-%m-%d')
906
+ else:
907
+ # Use current system date for period 0 (original behavior)
908
+ DOCKER_CONFIG_PATH = os.path.join('/opt/ml/processing/input', 'config.yml')
909
+ utils = Utils(config_file=DOCKER_CONFIG_PATH, logfile='data_io', log_path='output/logs')
910
+ timestamp = utils.set_timestamp()
911
+ actual_date = pd.to_datetime(str(int(float(timestamp[0:8]))), format='%Y%m%d')
912
+
913
+ # Use periods + 1 internally to calculate one extra period for transit calculations
914
+ # The extra period will be filtered out in the final results
915
+ end_date = actual_date + pd.DateOffset(months=self.periods + 1)
916
+
917
+ # Handle start_date = None case
918
+ if self.start_date is None:
919
+ # If start_date is None, use actual_date as the base for period 1
920
+ base_start_date = actual_date
921
+ else:
922
+ base_start_date = self.start_date
99
923
 
100
- current_df_inv = self.df_inv[(self.df_inv['Item'] == item) &
101
- (self.df_inv['Location'] == location)]
924
+ # Get unique items with their reorder frequencies
925
+ columns = self.metadata + ['ReorderFreq']
926
+ df_unique = self.df_lead_time[columns].drop_duplicates().copy()
927
+
928
+ # Process ReorderFreq values
929
+ df_unique['ReorderFreq'] = df_unique['ReorderFreq'].fillna(self.default_coverage)
930
+ df_unique.loc[df_unique['ReorderFreq'] == 0, 'ReorderFreq'] = self.default_coverage
931
+ df_unique['ReorderFreq'] = df_unique['ReorderFreq'].astype(int)
932
+
933
+ # Pre-allocate result dictionary
934
+ item_dates = {}
935
+
936
+ # Group by ReorderFreq for batch processing - more efficient for large datasets
937
+ for freq, group in df_unique.groupby('ReorderFreq'):
938
+ # Generate date range for this frequency
939
+ date_range = []
940
+
941
+ # Always include actual date (period 0)
942
+ date_range.append(actual_date)
943
+
944
+ # Include base_start_date if after actual_date
945
+ if base_start_date > actual_date:
946
+ date_range.append(base_start_date)
947
+
948
+ # Generate subsequent dates using pandas date_range for efficiency
949
+ num_periods = int((end_date - base_start_date).days / freq) + 1
950
+ future_dates = pd.date_range(
951
+ start=base_start_date + timedelta(days=freq),
952
+ periods=num_periods,
953
+ freq=f'{freq}D'
954
+ )
955
+ date_range.extend(future_dates[future_dates <= end_date])
956
+
957
+ # Convert to string format
958
+ date_strings = [d.strftime('%Y%m%d') for d in date_range]
959
+
960
+ # Assign to all items in this group
961
+ for _, row in group.iterrows():
962
+ if self.location:
963
+ key = (row['Item'], row['Location'])
964
+ else:
965
+ key = row['Item']
966
+ item_dates[key] = date_strings
967
+
968
+ return item_dates
102
969
 
970
+ def _prepare_batch_data(self, item_dates):
971
+ """Prepara datos por lotes de manera eficiente. Replicates exactly the logic from future_reorder_optimized."""
972
+ batch_data = []
973
+
974
+ for key, dates in item_dates.items():
975
+ try:
976
+ # Get dataframes for this item - EXACTLY like future_reorder_optimized _get_current_dataframes_optimized
977
+ if self.location:
978
+ item, location = key
103
979
  else:
104
- current_df_lead_time_cov = self.df_lead_time[self.df_lead_time['Item'] == item]
105
- current_df_lead_time_rf = df_lead_time_rf[df_lead_time_rf['Item'] == item]
106
- current_df_inv = self.df_inv[self.df_inv['Item'] == item]
980
+ item = key
981
+ location = None
107
982
 
108
- if current_df_lead_time_cov.empty or current_df_lead_time_rf.empty or current_df_inv.empty:
109
- continue
110
-
111
- # SuggestedForecast_Coverage
112
- SuggestedForecast_cov[i] = SuggestedForecast(df_LeadTimes=current_df_lead_time_cov,
113
- df_Forecast=self.df_fcst,
114
- df_Prep=self.df_prep,
115
- df_inv=current_df_inv,
116
- column_forecast='SuggestedForecast',
117
- columns_metadata=metadata,
118
- frequency_='M',
119
- location=self.location,
120
- actualdate=date,
121
- default_coverage_=self.default_coverage,
122
- join_='left').suggested_forecast()
983
+ # Create filter mask based on item
984
+ mask_lead_time = self.df_lead_time['Item'] == item
985
+ mask_inv = self.df_inv['Item'] == item
123
986
 
124
- SuggestedForecast_cov[i].rename(columns={'SuggestedForecast':'Suggested_Coverage'},inplace=True)
125
-
126
- # SuggestedForecast_ReorderFreq
127
- SuggestedForecast_rf[i] = SuggestedForecast(df_LeadTimes=current_df_lead_time_rf,
128
- df_Forecast=self.df_fcst,
129
- df_Prep=self.df_prep,
130
- df_inv=current_df_inv,
131
- column_forecast='SuggestedForecast',
132
- columns_metadata=metadata,
133
- frequency_='M',
134
- location=self.location,
135
- actualdate=date,
136
- default_coverage_=self.default_coverage,
137
- join_='left').suggested_forecast()
138
-
139
- SuggestedForecast_rf[i].rename(columns={'SuggestedForecast':'Suggested_ReorderFreq'},inplace=True)
140
- SuggestedForecast_rf[i] = SuggestedForecast_rf[i][metadata + ['Suggested_ReorderFreq']]
141
-
142
- # Concatenar
143
- df_forecast[i] = pd.merge(SuggestedForecast_cov[i], SuggestedForecast_rf[i], on=metadata, how='outer')
987
+ # Add location filter if needed
988
+ if self.location and location is not None:
989
+ mask_lead_time &= self.df_lead_time['Location'] == location
990
+ mask_inv &= self.df_inv['Location'] == location
144
991
 
145
- # Calcular AvgDailyUsage y MaxDailyUsage
146
- df_avg_gen[i] = DailyUsageFuture(location=self.location,
147
- column_forecast='SuggestedForecast',
148
- date=date,
149
- df_fcst=self.df_fcst).daily_usage(df_forecast[i], 'AvgDailyUsage').fillna(0)
150
-
151
- df_max_gen[i] = DailyUsageFuture(location=self.location,
152
- column_forecast='SuggestedForecast',
153
- date=date,
154
- df_fcst=self.df_fcst).daily_usage(df_avg_gen[i], 'MaxDailyUsage').fillna(0)
155
-
156
- #Ajustar AvgDailyUsage y MaxDailyUsage si es cero.
157
- df_avg_gen[i] = df_avg_gen[i].replace(0,0.001)
158
- df_max_gen[i] = df_max_gen[i].replace(0,0.0012)
159
-
160
- # Calcular Stock de Seguridad
161
- merge_columns = ['Item', 'Location', 'AvgLeadTime', 'MaxLeadTime'] if self.location else ['Item', 'AvgLeadTime', 'MaxLeadTime']
162
- df_sstock[i] = pd.merge(df_max_gen[i], current_df_lead_time_cov[merge_columns], on=metadata, how='inner').drop_duplicates()
163
-
164
- # Current Period
165
- if i == 0:
166
- inventory_columns = ['Item', 'Location', 'Inventory', 'Transit', 'PurchaseFactor'] if self.location else ['Item', 'Inventory', 'Transit', 'PurchaseFactor']
167
- df_inventory[i] = current_df_inv[inventory_columns]
168
- df_inventory[i]['InventoryTransit'] = df_inventory[i]['Inventory'] + df_inventory[i]['Transit']
169
- df_inventory[i] = df_inventory[i][metadata + ['InventoryTransit']]
170
- df[i] = pd.merge(df_inventory[i], df_sstock[i], on=metadata, how='inner')
171
-
172
- if self.security_stock_ref:
173
- df[i]['SecurityStock'] = df[i]['SecurityStockDaysRef'] * df[i]['AvgDailyUsage']
174
- else:
175
- df[i]['SecurityStock'] = (df[i]['MaxDailyUsage'] * df[i]['MaxLeadTime']) - (df[i]['AvgDailyUsage'] * df[i]['AvgLeadTime'])
176
-
177
- df[i]['ReorderPoint'] = (df[i]['Suggested_Coverage'] + df[i]['SecurityStock']).clip(lower=0)
178
- df[i]['ReorderQtyBase'] = (df[i]['ReorderPoint'] - df[i]['InventoryTransit']).clip(lower=1)
179
- df[i]['ReorderQty'] = ((df[i]['ReorderQtyBase'] / df[i]['PurchaseFactor']).apply(np.ceil)) * df[i]['PurchaseFactor']
180
- df[i]['ReorderQtyDays'] = (df[i]['ReorderQty'] / df[i]['AvgDailyUsage']).astype(int)
992
+ # Apply filters using boolean indexing
993
+ current_df_lead_time = self.df_lead_time[mask_lead_time]
994
+ current_df_inv = self.df_inv[mask_inv]
995
+
996
+ if not current_df_lead_time.empty and not current_df_inv.empty:
997
+ batch_data.append((key, dates, current_df_lead_time, current_df_inv))
181
998
 
182
- # Future Dates
183
- else:
184
- inventory_columns = ['Item', 'Location', 'PurchaseFactor'] if self.location else ['Item', 'PurchaseFactor']
185
- df_inventory[i] = current_df_inv[inventory_columns]
186
- df[i] = pd.merge(df_inventory[i], df_sstock[i], on=inventory_columns, how='inner')
999
+ except Exception as e:
1000
+ if self.verbose:
1001
+ print(f"Error preparando {key}: {e}")
1002
+ continue
1003
+
1004
+ return batch_data
187
1005
 
188
- if self.security_stock_ref:
189
- df[i]['SecurityStock'] = df[i]['SecurityStockDaysRef'] * df[i]['AvgDailyUsage']
190
- else:
191
- df[i]['SecurityStock'] = (df[i]['MaxDailyUsage'] * df[i]['MaxLeadTime']) - (df[i]['AvgDailyUsage'] * df[i]['AvgLeadTime'])
1006
+ def _prepare_final_dataframe(self, data_frame):
1007
+ """
1008
+ Prepare the final output dataframe with proper formatting and column selection.
1009
+ Versión completa de la función original.
1010
+ """
1011
+ leadtimes_columns = ['Item', 'Location', 'ReorderFreq', 'Coverage'] if self.location else ['Item', 'ReorderFreq', 'Coverage']
1012
+ leadtimes = self.df_lead_time[leadtimes_columns]
1013
+ df_final = pd.merge(data_frame, leadtimes, on=self.metadata, how='left').fillna(0)
1014
+
1015
+ # Format date and rename to PurchaseDate
1016
+ df_final['PurchaseDate'] = pd.to_datetime(df_final['Date'], format='%Y%m%d').dt.strftime('%Y-%m-%d')
1017
+ df_final = df_final.drop('Date', axis=1)
1018
+
1019
+ # Ensure ArrivalDate is present (in case some records don't have it)
1020
+ if 'ArrivalDate' not in df_final.columns:
1021
+ df_final['ArrivalDate'] = ''
1022
+
1023
+ # Apply formatting to fields that are ALWAYS integers
1024
+ always_integer_fields = ['PurchaseFactor', 'AvgLeadTime', 'MaxLeadTime', 'ReorderQtyDays', 'ReorderFreq', 'Coverage']
1025
+ for field in always_integer_fields:
1026
+ if field in df_final.columns:
1027
+ df_final[field] = df_final[field].apply(lambda x: _format_value_complete(x, field, True))
1028
+
1029
+ # Apply formatting to fields that are ALWAYS decimals
1030
+ always_decimal_fields = ['AvgDailyUsage', 'MaxDailyUsage']
1031
+ for field in always_decimal_fields:
1032
+ if field in df_final.columns:
1033
+ df_final[field] = df_final[field].apply(lambda x: _format_value_complete(x, field, False))
1034
+
1035
+ # Select final columns
1036
+ if self.location:
1037
+ final_cols = [
1038
+ 'PurchaseDate', 'Item', 'ItemDescription', 'Location', 'SuggestedForecast',
1039
+ 'SuggestedForecastPeriod', 'FutureInventoryTransit', 'FutureInventory',
1040
+ 'FutureTransit', 'FutureInventoryTransitArrival', 'FutureStockoutDays', 'TransitArrival',
1041
+ 'ReorderQtyBase', 'ReorderQty', 'ReorderQtyDays', 'ArrivalDate', 'PurchaseFactor',
1042
+ 'ReorderPoint', 'SecurityStock', 'AvgDailyUsage', 'MaxDailyUsage', 'AvgLeadTime',
1043
+ 'MaxLeadTime', 'ReorderFreq', 'Coverage'
1044
+ ]
1045
+ else:
1046
+ final_cols = [
1047
+ 'PurchaseDate', 'Item', 'ItemDescription', 'SuggestedForecast',
1048
+ 'SuggestedForecastPeriod', 'FutureInventoryTransit', 'FutureInventory',
1049
+ 'FutureTransit', 'FutureInventoryTransitArrival', 'FutureStockoutDays', 'TransitArrival',
1050
+ 'ReorderQtyBase', 'ReorderQty', 'ReorderQtyDays', 'ArrivalDate', 'PurchaseFactor',
1051
+ 'ReorderPoint', 'SecurityStock', 'AvgDailyUsage', 'MaxDailyUsage', 'AvgLeadTime',
1052
+ 'MaxLeadTime', 'ReorderFreq', 'Coverage'
1053
+ ]
1054
+
1055
+ return df_final[final_cols]
192
1056
 
193
- df[i]['InventoryTransit'] = ((df[i-1]['InventoryTransit'] - df[i-1]['Suggested_ReorderFreq']) + df[i-1]['ReorderQty']).clip(lower=0)
194
- df[i]['ReorderPoint'] = (df[i]['Suggested_Coverage'] + df[i]['SecurityStock']).clip(lower=0)
195
- df[i]['ReorderQtyBase'] = (df[i]['ReorderPoint'] - df[i]['InventoryTransit']).clip(lower=1)
196
- df[i]['ReorderQty'] = ((df[i]['ReorderQtyBase'] / df[i]['PurchaseFactor']).apply(np.ceil)) * df[i]['PurchaseFactor']
197
- df[i]['ReorderQtyDays'] = (df[i]['ReorderQty'] / df[i]['AvgDailyUsage']).astype(int)
198
-
1057
+ def _filter_periods(self, df):
1058
+ """
1059
+ Filter out period 0 and last period from results.
1060
+ Period 0 is used only as calculation base.
1061
+ Last period is filtered because it doesn't have next period transit data.
1062
+
1063
+ Special case: When start_date=None, don't filter the first period
1064
+ because it represents the actual current period.
1065
+ """
1066
+ if df.empty:
1067
+ return df
1068
+
1069
+ # Convert PurchaseDate to datetime for filtering
1070
+ df['PurchaseDate_dt'] = pd.to_datetime(df['PurchaseDate'])
1071
+
1072
+ # Get unique dates and sort them
1073
+ unique_dates = sorted(df['PurchaseDate_dt'].unique())
1074
+
1075
+ # Determine filtering logic based on start_date parameter
1076
+ if self.start_date is None:
1077
+ # When start_date=None, only filter the last period
1078
+ # Keep period 0 as it represents the current period
1079
+ if len(unique_dates) <= 1:
1080
+ self._log("⚠️ Warning: Only 1 period available, cannot filter last period")
1081
+ return pd.DataFrame(columns=df.columns.drop('PurchaseDate_dt'))
1082
+
1083
+ last_date = unique_dates[-1]
1084
+ filtered_df = df[df['PurchaseDate_dt'] != last_date].copy()
1085
+
1086
+ self._log(f"🔍 Filtered periods (start_date=None): Only removed last period ({last_date.strftime('%Y-%m-%d')})")
1087
+
1088
+ else:
1089
+ # When start_date is specified, filter both first and last periods (original logic)
1090
+ if len(unique_dates) <= 2:
1091
+ self._log("⚠️ Warning: Only 2 or fewer periods available after filtering")
1092
+ return pd.DataFrame(columns=df.columns.drop('PurchaseDate_dt'))
1093
+
1094
+ first_date = unique_dates[0]
1095
+ last_date = unique_dates[-1]
1096
+
1097
+ filtered_df = df[
1098
+ (df['PurchaseDate_dt'] != first_date) &
1099
+ (df['PurchaseDate_dt'] != last_date)
1100
+ ].copy()
1101
+
1102
+ self._log(f"🔍 Filtered periods: Removed period 0 ({first_date.strftime('%Y-%m-%d')}) and last period ({last_date.strftime('%Y-%m-%d')})")
1103
+
1104
+ # Drop the temporary datetime column
1105
+ filtered_df = filtered_df.drop('PurchaseDate_dt', axis=1)
1106
+
1107
+ return filtered_df
199
1108
 
200
- # Insert columns
201
- df[i].insert(loc=0, column='Date', value=date)
202
- df[i]['Item'] = item
1109
+ def reorder(self):
1110
+ """
1111
+ Main method to calculate future reorder recommendations at massive scale.
1112
+
1113
+ This high-performance method orchestrates the complete inventory reorder calculation
1114
+ process using parallel processing and intelligent resource management. It handles
1115
+ large datasets efficiently through batching, multiprocessing, and optimized algorithms.
1116
+
1117
+ Processing Pipeline:
1118
+ 1. Generate future dates based on reorder frequencies
1119
+ 2. Pre-filter and prepare data for batch processing
1120
+ 3. Split items into optimally-sized batches
1121
+ 4. Process batches in parallel using multiple CPU cores
1122
+ 5. Combine and format results with proper data types
1123
+ 6. Apply period filtering (removes period 0 and last period)
1124
+ 7. Return comprehensive reorder recommendations
1125
+
1126
+ Performance Features:
1127
+ - Auto-configures batch sizes based on dataset size
1128
+ - Uses ProcessPoolExecutor for true parallel processing
1129
+ - Provides real-time progress tracking and ETA calculations
1130
+ - Implements intelligent error handling and recovery
1131
+ - Optimizes memory usage through efficient data structures
1132
+
1133
+ Period Filtering Logic:
1134
+ - When start_date=None: Only removes last period (keeps period 0 as current)
1135
+ - When start_date specified: Removes both period 0 and last period
1136
+ - Last period is always removed due to incomplete transit data
1137
+
1138
+ Returns:
1139
+ pd.DataFrame: Complete reorder recommendations with columns:
1140
+ - PurchaseDate: Date when reorder should be evaluated
1141
+ - Item, ItemDescription, (Location): Item identification
1142
+ - Forecast metrics: SuggestedForecast, SuggestedForecastPeriod
1143
+ - Inventory levels: FutureInventoryTransit, FutureInventory, FutureTransit
1144
+ - FutureInventoryTransitArrival: Stock + arrivals in the period
1145
+ - FutureStockoutDays: Days of inventory coverage
1146
+ - Transit information: TransitArrival details
1147
+ - Reorder metrics: ReorderQtyBase, ReorderQty, ReorderQtyDays
1148
+ - Order information: ArrivalDate of current period's order
1149
+ - Planning parameters: PurchaseFactor, ReorderPoint, SecurityStock
1150
+ - Usage rates: AvgDailyUsage, MaxDailyUsage
1151
+ - Lead times: AvgLeadTime, MaxLeadTime
1152
+ - Coverage parameters: ReorderFreq, Coverage
203
1153
 
204
- if self.location:
205
- df[i]['Location'] = location
206
-
207
- data_frame = pd.concat([data_frame, df[i]], ignore_index=True)
208
-
209
- # Final DataFrame
210
- leadtimes_columns = ['Item', 'Location', 'ReorderFreq', 'Coverage'] if self.location else ['Item', 'ReorderFreq', 'Coverage']
211
- leadtimes = self.df_lead_time[leadtimes_columns]
212
- df_final = pd.merge(data_frame, leadtimes, on=metadata, how='left').fillna(0)
213
-
214
- df_final['Date'] = pd.to_datetime(df_final['Date'], format='%Y%m%d').dt.strftime('%Y-%m-%d')
215
- df_final = df_final.rename(columns={'InventoryTransit': 'FutureInventory'})
216
- cols_to_round = ['SecurityStock', 'FutureInventory', 'Suggested_Coverage', 'Suggested_ReorderFreq', 'ReorderPoint', 'ReorderQtyBase']
217
- df_final[cols_to_round] = df_final[cols_to_round].apply(np.ceil)
218
- final_cols = ['Date', 'Item', 'ItemDescription', 'Location', 'Suggested_Coverage', 'Suggested_ReorderFreq', 'FutureInventory', 'ReorderQtyBase', 'ReorderQty', 'ReorderQtyDays', 'PurchaseFactor', 'ReorderPoint', 'SecurityStock',
219
- 'AvgDailyUsage', 'MaxDailyUsage', 'AvgLeadTime', 'MaxLeadTime', 'ReorderFreq', 'Coverage'] if self.location \
220
- else ['Date', 'Item', 'ItemDescription', 'Suggested_Coverage', 'Suggested_ReorderFreq', 'FutureInventory', 'ReorderQtyBase', 'ReorderQty', 'ReorderQtyDays', 'PurchaseFactor', 'ReorderPoint', 'SecurityStock',
221
- 'AvgDailyUsage', 'MaxDailyUsage', 'AvgLeadTime', 'MaxLeadTime', 'ReorderFreq', 'Coverage']
222
- df_final = df_final[final_cols]
223
-
224
- return df_final
1154
+ Example usage:
1155
+ >>> reorder_system = FutureReorderMassiveComplete(
1156
+ ... df_inv=inventory_df,
1157
+ ... df_lead_time=lead_time_df,
1158
+ ... df_prep=prep_df,
1159
+ ... df_fcst=forecast_df,
1160
+ ... periods=6,
1161
+ ... start_date=None, # Use current date
1162
+ ... batch_size=100, # Optional: auto-configured if None
1163
+ ... n_workers=4 # Optional: auto-configured if None
1164
+ ... )
1165
+ >>> results = reorder_system.reorder()
1166
+ >>> print(f"Generated {len(results)} reorder recommendations")
1167
+ """
1168
+ start_time = time.time()
1169
+
1170
+ self._log("🚀 FutureReorder Massive Complete - Processing Started")
1171
+
1172
+ # Generate future dates
1173
+ self._log("📅 Generando fechas futuras...")
1174
+ item_dates = self.future_date()
1175
+
1176
+ if not item_dates:
1177
+ columns = ['Date', 'Item'] + (['Location'] if self.location else [])
1178
+ return pd.DataFrame(columns=columns)
1179
+
1180
+ # Prepare batch data first to get accurate count of items that will actually be processed
1181
+ batch_data = self._prepare_batch_data(item_dates)
1182
+
1183
+ # Calculate accurate statistics based on items that will actually be processed
1184
+ total_items = len(batch_data)
1185
+ if batch_data:
1186
+ # Get dates for items that will actually be processed
1187
+ processed_item_dates = {item_data[0]: item_data[1] for item_data in batch_data}
1188
+ avg_periods = np.mean([len(dates) for dates in processed_item_dates.values()])
1189
+ total_calculations = sum(len(dates) for dates in processed_item_dates.values())
1190
+ else:
1191
+ avg_periods = 0
1192
+ total_calculations = 0
1193
+
1194
+ self._log(f"📊 Dataset Info:")
1195
+ self._log(f" • Total Items: {total_items}")
1196
+ self._log(f" • Average Periods per Item: {avg_periods:.1f}")
1197
+ self._log(f" • Total Calculations: {total_calculations}")
1198
+
1199
+ # batch_data already prepared above for accurate counting
1200
+ if not batch_data:
1201
+ self._log("⚠️ No items to process after filtering")
1202
+ columns = ['Date', 'Item'] + (['Location'] if self.location else [])
1203
+ return pd.DataFrame(columns=columns)
1204
+
1205
+ self._log("🔧 Datos preparados por lotes...")
1206
+
1207
+ # Split into batches for parallel processing
1208
+ batches = []
1209
+ for i in range(0, len(batch_data), self.batch_size):
1210
+ batch = batch_data[i:i + self.batch_size]
1211
+ batch_args = (
1212
+ batch, self.df_fcst, self.df_prep, self.metadata,
1213
+ self.location, self.default_coverage, self.complete_suggested,
1214
+ self.security_stock_ref, self.integer, self.verbose, self.df_transit
1215
+ )
1216
+ batches.append(batch_args)
1217
+
1218
+ total_batches = len(batches)
1219
+ items_per_batch = len(batch_data) / total_batches if total_batches > 0 else 0
1220
+
1221
+ self._log(f"⚙️ Processing Config:")
1222
+ self._log(f" • Batch Size: {self.batch_size}")
1223
+ self._log(f" • Workers: {self.n_workers}")
1224
+ self._log(f" • Total Batches: {total_batches}")
1225
+ self._log(f" • Items per Batch: {items_per_batch:.1f}")
1226
+
1227
+ current_time = datetime.now().strftime('%H:%M:%S')
1228
+ self._log(f"⏱️ Starting processing at {current_time}")
1229
+
1230
+ # Process batches in parallel
1231
+ results = []
1232
+ completed_batches = 0
1233
+
1234
+ with ProcessPoolExecutor(max_workers=self.n_workers) as executor:
1235
+ # Submit all batches
1236
+ future_to_batch = {executor.submit(process_item_batch_complete, batch_args): i
1237
+ for i, batch_args in enumerate(batches)}
1238
+
1239
+ # Collect results as they complete
1240
+ for future in as_completed(future_to_batch):
1241
+ batch_idx = future_to_batch[future]
1242
+ try:
1243
+ result = future.result()
1244
+ if not result.empty:
1245
+ results.append(result)
1246
+
1247
+ completed_batches += 1
1248
+ progress = (completed_batches / total_batches) * 100
1249
+
1250
+ elapsed_time = time.time() - start_time
1251
+ if completed_batches > 0:
1252
+ eta_seconds = (elapsed_time / completed_batches) * (total_batches - completed_batches)
1253
+ eta_str = f"{int(eta_seconds // 60)}m {int(eta_seconds % 60)}s"
1254
+ else:
1255
+ eta_str = "calculating..."
1256
+
1257
+ self._log(f"✅ Batch {completed_batches}/{total_batches} completed ({progress:.1f}%) - ETA: {eta_str}")
1258
+
1259
+ except Exception as e:
1260
+ self._log(f"❌ Error in batch {batch_idx}: {e}")
1261
+ continue
1262
+
1263
+ # Combine all results
1264
+ if results:
1265
+ self._log("🔗 Combinando resultados...")
1266
+ final_result = pd.concat(results, ignore_index=True)
1267
+
1268
+ # Prepare final dataframe with proper formatting
1269
+ final_result = self._prepare_final_dataframe(final_result)
1270
+
1271
+ # Filter out period 0 and last period from results
1272
+ final_result = self._filter_periods(final_result)
1273
+
1274
+ total_time = time.time() - start_time
1275
+ self._log(f"🎉 Processing completed in {total_time:.2f}s")
1276
+ self._log(f"📈 Final result: {len(final_result)} records")
1277
+
1278
+ return final_result
1279
+ else:
1280
+ self._log("⚠️ No results generated")
1281
+ columns = ['Date', 'Item'] + (['Location'] if self.location else [])
1282
+ return pd.DataFrame(columns=columns)