datupapi 1.111.0__py3-none-any.whl → 1.112.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,96 +1,732 @@
1
1
  import pandas as pd
2
+ import numpy as np
2
3
  import os
3
4
  import ast
4
- import numpy as np
5
- from datetime import timedelta
5
+ import time
6
+ import sys
7
+ from datetime import timedelta, datetime
8
+ from concurrent.futures import ProcessPoolExecutor, as_completed
9
+ from multiprocessing import cpu_count
10
+ import gc
11
+ from typing import Dict, List, Tuple, Optional, Union
6
12
  from datupapi.utils.utils import Utils
7
13
  from datupapi.inventory.src.SuggestedForecast.suggested_forecast import SuggestedForecast
8
14
  from datupapi.inventory.src.FutureInventory.daily_usage_future import DailyUsageFuture
9
15
 
10
16
 
11
- class FutureReorder():
17
+ def process_item_batch_complete(batch_args):
12
18
  """
13
- A class for calculating future inventory reorder points and quantities.
19
+ Process a batch of items in parallel with complete functionality.
14
20
 
15
- This class implements a sophisticated inventory management system that:
16
- - Calculates optimal reorder points based on forecasted demand
17
- - Manages in-transit inventory and arrival schedules
18
- - Determines safety stock levels using statistical or reference methods
19
- - Generates reorder recommendations for multiple future periods
20
- - Supports both single-location and multi-location inventory
21
+ This function executes in a separate process and handles batch processing
22
+ of inventory items for reorder calculations. It provides optimized error
23
+ handling and progress tracking for large-scale inventory processing.
21
24
 
22
- The system uses dynamic coverage strategies to optimize inventory levels
23
- while maintaining adequate safety stock to prevent stockouts.
25
+ Args:
26
+ batch_args (tuple): Contains all necessary data for batch processing:
27
+ - batch_items: List of item data tuples (key, dates, lead_time_df, inv_df)
28
+ - df_fcst: Forecast data DataFrame
29
+ - df_prep: Preparation data DataFrame
30
+ - metadata: List of metadata columns
31
+ - location: Boolean indicating if location processing is enabled
32
+ - default_coverage: Default coverage days
33
+ - complete_suggested: Boolean for complete suggested forecast mode
34
+ - security_stock_ref: Boolean for reference-based security stock calculation
35
+ - integer: Boolean for integer formatting of quantities
36
+ - verbose: Boolean for detailed logging
37
+ - df_transit: Transit schedule DataFrame (optional)
24
38
 
25
- Output Fields:
26
- - FutureInventoryTransit: Total future inventory (stock + transit)
27
- - FutureInventory: Future inventory in stock only
28
- - FutureTransit: Future inventory in transit only
29
- - FutureInventoryTransitArrival: Future inventory in stock + arrivals in the period
39
+ Returns:
40
+ pd.DataFrame: Combined results for all items in the batch, or empty DataFrame if errors
30
41
  """
42
+ try:
43
+ (batch_items, df_fcst, df_prep, metadata, location, default_coverage,
44
+ complete_suggested, security_stock_ref, integer, verbose, df_transit) = batch_args
45
+
46
+ results = []
47
+ processed_count = 0
48
+ error_count = 0
49
+
50
+ for item_data in batch_items:
51
+ key, dates, current_df_lead_time, current_df_inv = item_data
52
+
53
+ try:
54
+ # Procesar este ítem usando la lógica completa con timeout implícito
55
+ item_result = _process_item_complete(
56
+ key, dates, current_df_lead_time, current_df_inv,
57
+ df_fcst, df_prep, metadata, location, default_coverage,
58
+ complete_suggested, security_stock_ref, integer,
59
+ df_transit
60
+ )
61
+
62
+ if item_result is not None and not item_result.empty:
63
+ results.append(item_result)
64
+ processed_count += 1
65
+ else:
66
+ error_count += 1
67
+
68
+ except Exception as e:
69
+ error_count += 1
70
+ if verbose and error_count <= 3: # Limit error messages to avoid spam
71
+ print(f"⚠️ Error procesando {key}: {str(e)[:100]}...")
72
+ continue
73
+
74
+ # Log batch summary if there were errors
75
+ if verbose and error_count > 0:
76
+ print(f"📊 Batch summary: {processed_count} processed, {error_count} errors")
77
+
78
+ # Combine all items in this batch
79
+ if results:
80
+ return pd.concat(results, ignore_index=True)
81
+ else:
82
+ return pd.DataFrame()
83
+
84
+ except Exception as e:
85
+ print(f"❌ Error crítico en batch: {str(e)}")
86
+ return pd.DataFrame()
31
87
 
32
- def __init__(self, df_inv, df_lead_time, df_prep, df_fcst, periods, start_date, location=False, security_stock_ref=False, df_transit=None, integer=True, complete_suggested=False, start_date_zero=None):
33
- """
34
- Initialize the FutureReorder instance.
35
-
36
- Args:
37
- df_inv (pd.DataFrame): Current inventory data with columns:
38
- - Item: Item identifier
39
- - Location: Location identifier (if location=True)
40
- - Inventory: Current on-hand stock
41
- - Transit: In-transit quantity
42
- - PurchaseFactor: Minimum order multiple
88
+
89
+ def _process_item_complete(key, dates, current_df_lead_time, current_df_inv,
90
+ df_fcst, df_prep, metadata, location, default_coverage,
91
+ complete_suggested, security_stock_ref, integer, df_transit=None):
92
+ """
93
+ Process a single item through all periods with complete functionality.
94
+
95
+ This function handles the complete inventory reorder calculation for a single item
96
+ across all time periods. It optimizes performance by pre-allocating data structures
97
+ and reducing repetitive calls to forecast and daily usage calculations.
98
+
99
+ The process includes:
100
+ 1. Calculating suggested forecasts for each period
101
+ 2. Computing daily usage rates (average and maximum)
102
+ 3. Determining security stock requirements
103
+ 4. Processing current period inventory
104
+ 5. Calculating future period reorder needs
105
+ 6. Managing transit order schedules
106
+ 7. Computing final inventory metrics
107
+
108
+ Args:
109
+ key: Item identifier (str) or (item, location) tuple
110
+ dates: List of calculation dates in 'YYYYMMDD' format
111
+ current_df_lead_time: Lead time data for this item
112
+ current_df_inv: Current inventory data for this item
113
+ df_fcst: Forecast data DataFrame
114
+ df_prep: Preparation data DataFrame
115
+ metadata: List of metadata columns
116
+ location: Boolean indicating location-based processing
117
+ default_coverage: Default coverage days
118
+ complete_suggested: Boolean for complete suggested forecast mode
119
+ security_stock_ref: Boolean for reference-based security stock
120
+ integer: Boolean for integer formatting
121
+ df_transit: Transit schedule DataFrame (optional)
122
+
123
+ Returns:
124
+ pd.DataFrame: Complete reorder calculations for all periods of this item
125
+ """
126
+ try:
127
+ # Pre-allocate dictionaries for intermediate results
128
+ suggested_forecasts = {}
129
+ df_avgs = {}
130
+ df_maxs = {}
131
+ df_sstocks = {}
132
+ period_results = {}
133
+
134
+ # Initialize transit orders for this item
135
+ transit_orders = {key: []}
136
+
137
+ # Track last suggested forecast value for complete_suggested feature
138
+ last_suggested_value = None
139
+
140
+ # Pre-calculate common values to avoid repeated calculations
141
+ coverage = current_df_lead_time['Coverage'].iloc[0] if 'Coverage' in current_df_lead_time.columns else default_coverage
142
+ if pd.isna(coverage):
143
+ coverage = default_coverage
144
+
145
+ reorder_freq = current_df_lead_time['ReorderFreq'].iloc[0]
146
+ if pd.isna(reorder_freq) or reorder_freq == 0:
147
+ reorder_freq = default_coverage
148
+
149
+ # Process each period with optimized error handling
150
+ for i, date in enumerate(dates):
151
+ try:
152
+ # Calculate suggested forecast with better error handling
153
+ suggested_forecasts[i] = _calculate_suggested_forecast_complete(
154
+ current_df_lead_time, current_df_inv, date, last_suggested_value,
155
+ df_fcst, df_prep, metadata, location, default_coverage, complete_suggested
156
+ )
157
+
158
+ # Update last_suggested_value for next iteration
159
+ if 'SuggestedForecast' in suggested_forecasts[i].columns:
160
+ last_suggested_value = suggested_forecasts[i]['SuggestedForecast'].iloc[0]
161
+
162
+ # Calculate daily usage with optimized calls
163
+ df_avgs[i], df_maxs[i] = _calculate_daily_usage_complete(
164
+ suggested_forecasts[i], date, df_fcst, location
165
+ )
166
+
167
+ # Calculate security stock data with pre-calculated values
168
+ df_sstocks[i] = _calculate_security_stock_data_complete(
169
+ df_maxs[i], current_df_lead_time, default_coverage, i, dates
170
+ )
171
+
172
+ # Process period based on whether it's current or future
173
+ if i == 0:
174
+ period_results[i] = _process_current_period_complete(
175
+ current_df_inv, df_sstocks[i], key, date, transit_orders, dates,
176
+ metadata, integer, security_stock_ref, df_transit
177
+ )
178
+ else:
179
+ period_results[i] = _process_future_period_complete(
180
+ current_df_inv, df_sstocks[i], period_results[i-1],
181
+ key, date, dates, i, transit_orders, metadata, integer, security_stock_ref
182
+ )
183
+
184
+ # Add metadata columns efficiently
185
+ period_results[i]['Date'] = date
186
+ if location:
187
+ item, loc = key
188
+ period_results[i]['Item'] = item
189
+ period_results[i]['Location'] = loc
190
+ else:
191
+ period_results[i]['Item'] = key
192
+
193
+ except Exception as e:
194
+ # Log error but continue with next period
195
+ print(f"Warning: Error processing period {i} for item {key}: {str(e)}")
196
+ continue
197
+
198
+ # After processing all periods, update FutureInventoryTransitArrival
199
+ for i in range(len(dates)):
200
+ if i < len(dates) - 1: # If there's a next period
201
+ # Get next period's TransitArrival
202
+ next_transit_arrival = period_results[i + 1]['TransitArrival'].iloc[0]
203
+ transit_arrival_sum = _sum_transit_arrivals(next_transit_arrival)
204
+ else: # Last period - no next period
205
+ transit_arrival_sum = 0
43
206
 
44
- df_lead_time (pd.DataFrame): Lead time and reorder parameters:
45
- - Item: Item identifier
46
- - Location: Location identifier (if location=True)
47
- - ReorderFreq: Days between reorders (default: 30)
48
- - AvgLeadTime: Average lead time in days
49
- - MaxLeadTime: Maximum lead time in days
50
- - Coverage: Total coverage days (optional)
51
- - SecurityStockDaysRef: Reference days for safety stock (optional)
207
+ # Update FutureInventoryTransitArrival
208
+ period_results[i]['FutureInventoryTransitArrival'] = _format_value_complete(
209
+ period_results[i]['FutureInventory'].iloc[0] + transit_arrival_sum,
210
+ 'FutureInventoryTransitArrival', integer
211
+ )
52
212
 
53
- df_prep (pd.DataFrame): Preparation data for forecast calculations
213
+ # Recalculate FutureStockoutDays with the updated FutureInventoryTransitArrival
214
+ period_results[i]['FutureStockoutDays'] = _calculate_inventory_days_complete(
215
+ period_results[i], integer
216
+ )
217
+
218
+ # Combine all periods for this item
219
+ if period_results:
220
+ # Stack all period results at once
221
+ item_df = pd.concat(period_results.values(), ignore_index=True)
54
222
 
55
- df_fcst (pd.DataFrame): Forecast data containing demand predictions
223
+ # Reorder columns for consistency
224
+ cols = ['Date', 'Item']
225
+ if location:
226
+ cols.append('Location')
227
+ other_cols = [col for col in item_df.columns if col not in cols]
228
+ item_df = item_df[cols + other_cols]
56
229
 
57
- periods (int): Number of future periods to calculate
230
+ return item_df
231
+
232
+ return None
233
+
234
+ except Exception as e:
235
+ # Handle any unexpected errors at the item level
236
+ print(f"Error processing item {key}: {str(e)}")
237
+ return None
238
+
239
+
240
+ def _format_value_complete(value, field_name, integer):
241
+ """Apply appropriate formatting based on field type and integer setting."""
242
+ # Handle pandas Series - extract scalar value
243
+ if isinstance(value, pd.Series):
244
+ if len(value) == 1:
245
+ value = value.iloc[0]
246
+ else:
247
+ raise ValueError(f"Expected scalar value for {field_name}, got Series with {len(value)} elements")
248
+
249
+ # Handle NaN, None, and infinite values
250
+ if pd.isna(value) or value is None:
251
+ return 0
252
+ if np.isinf(value):
253
+ return 0
254
+
255
+ # Fields that are ALWAYS integers
256
+ always_integer_fields = [
257
+ 'PurchaseFactor', 'AvgLeadTime', 'MaxLeadTime',
258
+ 'ReorderQtyDays', 'ReorderFreq', 'Coverage', 'FutureStockoutDays'
259
+ ]
260
+
261
+ # Fields that are ALWAYS decimals (2 decimal places)
262
+ always_decimal_fields = ['AvgDailyUsage', 'MaxDailyUsage']
263
+
264
+ # Fields that change based on integer setting
265
+ quantity_fields = [
266
+ 'FutureInventoryTransit', 'FutureInventory', 'FutureTransit',
267
+ 'FutureInventoryTransitArrival', 'SuggestedForecast', 'SuggestedForecastPeriod',
268
+ 'ReorderPoint', 'ReorderQtyBase', 'ReorderQty', 'SecurityStock', 'Inventory', 'Transit'
269
+ ]
270
+
271
+ if field_name in always_integer_fields:
272
+ return int(round(value))
273
+ elif field_name in always_decimal_fields:
274
+ return round(value, 2)
275
+ elif field_name in quantity_fields:
276
+ if integer:
277
+ return int(round(value))
278
+ else:
279
+ return round(value, 2)
280
+ else:
281
+ # Default: return as is
282
+ return value
283
+
284
+
285
+ def _calculate_suggested_forecast_complete(current_df_lead_time, current_df_inv, date, last_suggested_value,
286
+ df_fcst, df_prep, metadata, location, default_coverage, complete_suggested):
287
+ """Calculate suggested forecast for the given date using the SuggestedForecast class."""
288
+ # Convert current date to datetime
289
+ current_date = pd.to_datetime(date, format='%Y%m%d')
290
+
291
+ # Get the maximum forecast date available
292
+ max_forecast_date = df_fcst['Date'].max()
293
+
294
+ # Get coverage value for this item
295
+ coverage = current_df_lead_time['Coverage'].iloc[0] if 'Coverage' in current_df_lead_time.columns else default_coverage
296
+ if pd.isna(coverage):
297
+ coverage = default_coverage
298
+
299
+ # Calculate the required forecast end date
300
+ required_forecast_end_date = current_date + timedelta(days=int(coverage))
301
+
302
+ # Check if we have sufficient forecast data
303
+ if max_forecast_date < required_forecast_end_date:
304
+ if complete_suggested:
305
+ if last_suggested_value is not None:
306
+ # Use the last calculated SuggestedForecast value
307
+ result_df = current_df_inv[metadata].copy()
308
+ result_df['SuggestedForecast'] = last_suggested_value
309
+
310
+ # Add PurchaseFactor and ItemDescription from inventory data using safe access
311
+ result_df['PurchaseFactor'] = current_df_inv.get('PurchaseFactor', pd.Series([1])).iloc[0]
312
+ result_df['ItemDescription'] = current_df_inv.get('ItemDescription', pd.Series([''])).iloc[0]
313
+
314
+ return result_df
315
+ else:
316
+ # For the first period when complete_suggested=True but no previous value exists
317
+ try:
318
+ return SuggestedForecast(
319
+ df_LeadTimes=current_df_lead_time,
320
+ df_Forecast=df_fcst,
321
+ df_Prep=df_prep,
322
+ df_inv=current_df_inv,
323
+ column_forecast='SuggestedForecast',
324
+ columns_metadata=metadata,
325
+ frequency_='M',
326
+ location=location,
327
+ actualdate=date,
328
+ default_coverage_=default_coverage,
329
+ join_='left'
330
+ ).suggested_forecast()
331
+ except Exception as e:
332
+ # Get item identifier for error message
333
+ item = current_df_inv['Item'].iloc[0]
334
+ location_msg = ""
335
+ if location and 'Location' in current_df_inv.columns:
336
+ loc = current_df_inv['Location'].iloc[0]
337
+ location_msg = f" at location {loc}"
338
+
339
+ error_msg = (
340
+ f"Cannot calculate initial forecast for item {item}{location_msg}. "
341
+ f"Forecast data extends only to {max_forecast_date.strftime('%Y-%m-%d')}, "
342
+ f"but coverage of {int(coverage)} days from {current_date.strftime('%Y-%m-%d')} "
343
+ f"requires forecast data until {required_forecast_end_date.strftime('%Y-%m-%d')}. "
344
+ f"Original error: {str(e)}"
345
+ )
346
+ raise ValueError(error_msg)
347
+ else:
348
+ # Get item identifier for error message
349
+ item = current_df_inv['Item'].iloc[0]
350
+ location_msg = ""
351
+ if location and 'Location' in current_df_inv.columns:
352
+ loc = current_df_inv['Location'].iloc[0]
353
+ location_msg = f" at location {loc}"
58
354
 
59
- start_date (str): Starting date for calculations (format: 'YYYY-MM-DD')
355
+ error_msg = (
356
+ f"Insufficient forecast data for item {item}{location_msg}. "
357
+ f"Forecast data extends only to {max_forecast_date.strftime('%Y-%m-%d')}, "
358
+ f"but coverage of {int(coverage)} days from {current_date.strftime('%Y-%m-%d')} "
359
+ f"requires forecast data until {required_forecast_end_date.strftime('%Y-%m-%d')}."
360
+ )
361
+ raise ValueError(error_msg)
362
+
363
+ # If validation passes, proceed with the original calculation
364
+ return SuggestedForecast(
365
+ df_LeadTimes=current_df_lead_time,
366
+ df_Forecast=df_fcst,
367
+ df_Prep=df_prep,
368
+ df_inv=current_df_inv,
369
+ column_forecast='SuggestedForecast',
370
+ columns_metadata=metadata,
371
+ frequency_='M',
372
+ location=location,
373
+ actualdate=date,
374
+ default_coverage_=default_coverage,
375
+ join_='left'
376
+ ).suggested_forecast()
377
+
378
+
379
+ def _calculate_daily_usage_complete(suggested_forecast_df, date, df_fcst, location):
380
+ """Calculate average and maximum daily usage rates."""
381
+ df_avg = DailyUsageFuture(
382
+ location=location,
383
+ column_forecast='SuggestedForecast',
384
+ date=date,
385
+ df_fcst=df_fcst
386
+ ).daily_usage(suggested_forecast_df, 'AvgDailyUsage').fillna(0)
387
+
388
+ df_max = DailyUsageFuture(
389
+ location=location,
390
+ column_forecast='SuggestedForecast',
391
+ date=date,
392
+ df_fcst=df_fcst
393
+ ).daily_usage(df_avg, 'MaxDailyUsage').fillna(0)
394
+
395
+ return df_avg, df_max
396
+
397
+
398
+ def _calculate_security_stock_data_complete(df_max, current_df_lead_time, default_coverage, period_index, dates):
399
+ """
400
+ Calculate security stock related data and prepare for reorder calculations.
401
+
402
+ This function merges daily usage data with lead time information and calculates
403
+ the suggested forecast period based on coverage ratios. For period 0, it uses
404
+ days to the next period instead of reorder frequency for more accurate consumption.
405
+
406
+ The process includes:
407
+ 1. Merging daily usage with lead time data
408
+ 2. Determining effective reorder frequency and coverage
409
+ 3. Calculating SuggestedForecastPeriod based on coverage ratio
410
+ 4. Special handling for period 0 using actual days to next period
411
+
412
+ Args:
413
+ df_max: DataFrame with maximum daily usage
414
+ current_df_lead_time: Lead time data for current item
415
+ default_coverage: Default coverage days
416
+ period_index: Current period index (0, 1, 2, ...)
417
+ dates: List of dates for this item
418
+
419
+ Returns:
420
+ pd.DataFrame: DataFrame with merged data and calculated fields including
421
+ SuggestedForecastPeriod adjusted for the specific period
422
+ """
423
+ metadata = ['Item', 'Location'] if 'Location' in df_max.columns else ['Item']
424
+ merge_columns = ['Item', 'Location', 'AvgLeadTime', 'MaxLeadTime'] if 'Location' in df_max.columns else ['Item', 'AvgLeadTime', 'MaxLeadTime']
425
+ df_sstock = pd.merge(df_max, current_df_lead_time[merge_columns], on=metadata, how='inner').drop_duplicates()
426
+
427
+ # Get ReorderFreq and Coverage
428
+ reorder_freq = current_df_lead_time['ReorderFreq'].values[0]
429
+ if pd.isnull(reorder_freq) or reorder_freq == 0:
430
+ reorder_freq = default_coverage
431
+
432
+ coverage = default_coverage
433
+ if 'Coverage' in current_df_lead_time.columns:
434
+ coverage_val = current_df_lead_time['Coverage'].values[0]
435
+ if not pd.isnull(coverage_val):
436
+ coverage = coverage_val
437
+ else:
438
+ coverage = reorder_freq + df_sstock['AvgLeadTime'].values[0]
439
+ else:
440
+ coverage = reorder_freq + df_sstock['AvgLeadTime'].values[0]
441
+
442
+ # Calculate SuggestedForecastPeriod
443
+ if period_index == 0 and dates is not None and len(dates) > 1:
444
+ # For period 0, use days to next period instead of reorder frequency
445
+ current_date = pd.to_datetime(dates[0], format='%Y%m%d')
446
+ next_date = pd.to_datetime(dates[1], format='%Y%m%d')
447
+ days_to_next_period = (next_date - current_date).days
448
+
449
+ # Formula: SuggestedForecast × (days_to_next_period / coverage)
450
+ suggested_forecast_period = np.ceil(df_sstock['SuggestedForecast'] * (days_to_next_period / coverage))
451
+ else:
452
+ # For other periods, use the original calculation with reorder frequency
453
+ suggested_forecast_period = np.ceil(df_sstock['SuggestedForecast'] * (reorder_freq / coverage))
454
+
455
+ df_sstock['SuggestedForecastPeriod'] = suggested_forecast_period.apply(
456
+ lambda x: int(round(x)) # SuggestedForecastPeriod is always integer
457
+ )
458
+
459
+ return df_sstock
460
+
461
+
462
+ def _calculate_security_stock_complete(df, security_stock_ref, integer):
463
+ """Calculate security stock using configured method. Replicates exactly the logic from future_reorder_optimized."""
464
+ # EXACTLY like future_reorder_optimized line 528-536
465
+ if security_stock_ref:
466
+ if 'SecurityStockDaysRef' in df.columns:
467
+ security_stock_value = df['SecurityStockDaysRef'].iloc[0] * df['AvgDailyUsage'].iloc[0]
468
+ else:
469
+ security_stock_value = 0
470
+ else:
471
+ security_stock_value = (df['MaxDailyUsage'].iloc[0] * df['MaxLeadTime'].iloc[0]) - (df['AvgDailyUsage'].iloc[0] * df['AvgLeadTime'].iloc[0])
472
+
473
+ # Apply formatting and return as scalar
474
+ return _format_value_complete(security_stock_value, 'SecurityStock', integer)
475
+
476
+
477
+ def _calculate_inventory_days_complete(df, integer):
478
+ """Calculate inventory days using configured method."""
479
+ # Calculate future stockout days with safe division
480
+ future_stockout_days = np.where(
481
+ df['AvgDailyUsage'] > 0,
482
+ (df['FutureInventoryTransitArrival'] - df['SecurityStock']) / df['AvgDailyUsage'],
483
+ 0 # If no daily usage, return 0 days
484
+ )
485
+
486
+ # Apply formatting
487
+ return pd.Series(future_stockout_days).apply(lambda x: _format_value_complete(x, 'FutureStockoutDays', integer))
488
+
489
+
490
+ def _sum_transit_arrivals(transit_arrivals_str):
491
+ """Calculate the total quantity from TransitArrival string."""
492
+ if transit_arrivals_str == '[]' or not transit_arrivals_str:
493
+ return 0.0
494
+
495
+ try:
496
+ arrivals = ast.literal_eval(transit_arrivals_str)
497
+ return sum(arrival.get('quantity', 0) for arrival in arrivals)
498
+ except:
499
+ return 0.0
500
+
501
+
502
+ def _prepare_transit_schedule_complete(key, transit_amount, dates, df_transit, location):
503
+ """Prepare transit schedule based on df_transit or default logic."""
504
+ if transit_amount <= 0:
505
+ return []
506
+
507
+ transit_schedule = []
508
+
509
+ if df_transit is None:
510
+ # Default logic: complete transit arrives in period 1
511
+ if len(dates) > 1:
512
+ arrival_date = pd.to_datetime(dates[1], format='%Y%m%d')
513
+ transit_schedule.append({
514
+ 'quantity': transit_amount,
515
+ 'arrival_date': arrival_date
516
+ })
517
+ else:
518
+ # Use provided transit schedule
519
+ if location:
520
+ item, loc = key
521
+ mask = (df_transit['Item'] == item) & (df_transit['Location'] == loc)
522
+ else:
523
+ mask = df_transit['Item'] == key
60
524
 
61
- location (bool, optional): Whether to process by location. Defaults to False.
525
+ transit_data = df_transit[mask].copy()
526
+
527
+ if not transit_data.empty:
528
+ # Validate total matches
529
+ total_scheduled = transit_data['Transit'].sum()
530
+ if abs(total_scheduled - transit_amount) > 0.01: # Allow small floating point differences
531
+ raise ValueError(f"Transit schedule total ({total_scheduled}) does not match inventory transit ({transit_amount}) for {key}")
62
532
 
63
- security_stock_ref (bool, optional): Use reference days method for safety stock
64
- calculation instead of statistical method. Defaults to False.
65
-
66
- df_transit (pd.DataFrame, optional): Transit arrival schedule with columns:
67
- - Item: Item identifier
68
- - Location: Location identifier (if location=True)
69
- - Transit: Partial transit quantity
70
- - ArrivalDate: Arrival date (format: 'YYYY-MM-DD')
71
- If None, complete transit arrives in period 1. Defaults to None.
72
-
73
- integer (bool, optional): Controls numeric formatting of quantity fields.
74
- When True, quantity fields are displayed as integers.
75
- When False, quantity fields are displayed with decimals.
76
- Defaults to True.
77
-
78
- complete_suggested (bool, optional): When True, uses the last calculated
79
- SuggestedForecast value for periods without forecast data instead of
80
- raising an error. Defaults to False.
533
+ # Create transit orders
534
+ for _, row in transit_data.iterrows():
535
+ arrival_date = pd.to_datetime(row['ArrivalDate'], format='%Y-%m-%d')
536
+ transit_schedule.append({
537
+ 'quantity': float(row['Transit']),
538
+ 'arrival_date': arrival_date
539
+ })
540
+ else:
541
+ # If no transit data provided for this item, use default logic
542
+ if len(dates) > 1:
543
+ arrival_date = pd.to_datetime(dates[1], format='%Y%m%d')
544
+ transit_schedule.append({
545
+ 'quantity': transit_amount,
546
+ 'arrival_date': arrival_date
547
+ })
81
548
 
82
- start_date_zero (str, optional): Custom start date for period 0 (format: 'YYYY-MM-DD').
83
- When None (default), uses the current system date for period 0.
84
- When specified, uses this date as the starting point for period 0 instead
85
- of the current system date. Defaults to None.
86
- """
549
+ return transit_schedule
550
+
551
+
552
+ def _process_current_period_complete(current_df_inv, df_sstock, key, date, transit_orders, dates, metadata, integer, security_stock_ref=False, df_transit=None):
553
+ """Process inventory for the current period (i=0). Replicates exactly the logic from future_reorder_optimized."""
554
+
555
+ # Get inventory data efficiently - EXACTLY like future_reorder_optimized line 410-414
556
+ try:
557
+ inventory_data = {
558
+ 'FutureInventory': current_df_inv['Inventory'].iloc[0],
559
+ 'FutureTransit': current_df_inv['Transit'].iloc[0],
560
+ 'PurchaseFactor': current_df_inv['PurchaseFactor'].iloc[0] if 'PurchaseFactor' in current_df_inv.columns else 1
561
+ }
562
+ except KeyError as e:
563
+ # Handle missing columns gracefully
564
+ inventory_data = {
565
+ 'FutureInventory': current_df_inv.get('Inventory', pd.Series([0])).iloc[0],
566
+ 'FutureTransit': current_df_inv.get('Transit', pd.Series([0])).iloc[0],
567
+ 'PurchaseFactor': current_df_inv.get('PurchaseFactor', pd.Series([1])).iloc[0]
568
+ }
569
+
570
+ # Vectorized calculations - EXACTLY like future_reorder_optimized line 417-428
571
+ df = df_sstock.copy()
572
+ df['FutureInventory'] = _format_value_complete(inventory_data['FutureInventory'], 'FutureInventory', integer)
573
+ df['FutureTransit'] = _format_value_complete(inventory_data['FutureTransit'], 'FutureTransit', integer)
574
+ df['FutureInventoryTransit'] = _format_value_complete(
575
+ inventory_data['FutureInventory'] + inventory_data['FutureTransit'],
576
+ 'FutureInventoryTransit', integer
577
+ )
578
+ df['PurchaseFactor'] = inventory_data['PurchaseFactor']
579
+
580
+ # Initialize transit orders - EXACTLY like future_reorder_optimized line 430-438
581
+ if key not in transit_orders:
582
+ transit_orders[key] = []
583
+
584
+ # Handle transit schedule
585
+ transit_qty = float(inventory_data['FutureTransit'])
586
+ if transit_qty > 0:
587
+ transit_schedule = _prepare_transit_schedule_complete(key, transit_qty, dates, df_transit, 'Location' in metadata)
588
+ transit_orders[key].extend(transit_schedule)
589
+
590
+ # Set initial values - EXACTLY like future_reorder_optimized line 440-452
591
+ df['TransitArrival'] = '[]'
592
+ df['SecurityStock'] = _calculate_security_stock_complete(df, security_stock_ref, integer)
593
+ df['SuggestedForecast'] = _format_value_complete(df['SuggestedForecast'].iloc[0], 'SuggestedForecast', integer)
594
+ df['ReorderPoint'] = _format_value_complete(
595
+ max(0, df['SuggestedForecast'].iloc[0] + df['SecurityStock'].iloc[0]), 'ReorderPoint', integer
596
+ )
597
+ df['ReorderQtyBase'] = _format_value_complete(
598
+ max(0, df['ReorderPoint'].iloc[0] - df['FutureInventoryTransit'].iloc[0]), 'ReorderQtyBase', integer
599
+ )
600
+ df['ReorderQty'] = 0
601
+ df['ReorderQtyDays'] = 0
602
+ df['ArrivalDate'] = ''
603
+
604
+ return df
605
+
606
+
607
+ def _process_transit_orders_complete(transit_orders, key, current_date, previous_date):
608
+ """Process transit orders and calculate arrivals for the current period."""
609
+ # Get orders for this key, return early if none
610
+ orders = transit_orders.get(key, [])
611
+ if not orders:
612
+ return 0, 0, []
613
+
614
+ new_transit = 0
615
+ remaining_orders = []
616
+ transit_arrivals = []
617
+ stock_from_arrivals = 0
618
+
619
+ for order in orders:
620
+ if order['arrival_date'] > previous_date and order['arrival_date'] <= current_date:
621
+ # Order arrives in this period
622
+ stock_from_arrivals += order['quantity']
623
+ transit_arrivals.append({
624
+ 'quantity': float(order['quantity']),
625
+ 'arrival_date': order['arrival_date'].strftime('%Y-%m-%d')
626
+ })
627
+ else:
628
+ # Order still in transit
629
+ new_transit += order['quantity']
630
+ remaining_orders.append(order)
631
+
632
+ transit_orders[key] = remaining_orders
633
+ return stock_from_arrivals, new_transit, transit_arrivals
634
+
635
+
636
+ def _process_future_period_complete(current_df_inv, df_sstock, df_previous, key, date, dates, i, transit_orders, metadata, integer, security_stock_ref=False):
637
+ """Process inventory for future periods (i>0). Replicates exactly the logic from future_reorder_optimized."""
638
+
639
+ # EXACTLY like future_reorder_optimized line 460-461
640
+ df = df_sstock.copy()
641
+ try:
642
+ df['PurchaseFactor'] = current_df_inv['PurchaseFactor'].iloc[0] if 'PurchaseFactor' in current_df_inv.columns else 1
643
+ except (KeyError, IndexError):
644
+ df['PurchaseFactor'] = 1
645
+
646
+ # Calculate consumption - EXACTLY like future_reorder_optimized line 463-465
647
+ consumption = df_previous['SuggestedForecastPeriod'].iloc[0]
648
+ previous_stock = df_previous['FutureInventory'].iloc[0] - consumption
649
+
650
+ # Process transit orders - EXACTLY like future_reorder_optimized line 467-473
651
+ current_date = pd.to_datetime(date, format='%Y%m%d')
652
+ previous_date = pd.to_datetime(dates[i-1], format='%Y%m%d')
653
+
654
+ stock_from_arrivals, new_transit, transit_arrivals = _process_transit_orders_complete(
655
+ transit_orders, key, current_date, previous_date
656
+ )
657
+
658
+ # Vectorized inventory updates - EXACTLY like future_reorder_optimized line 475-482
659
+ future_stock = max(0, previous_stock + stock_from_arrivals)
660
+ df['FutureInventory'] = _format_value_complete(future_stock, 'FutureInventory', integer)
661
+ df['FutureTransit'] = _format_value_complete(new_transit, 'FutureTransit', integer)
662
+ df['FutureInventoryTransit'] = _format_value_complete(
663
+ future_stock + new_transit, 'FutureInventoryTransit', integer
664
+ )
665
+ df['TransitArrival'] = str(transit_arrivals) if transit_arrivals else '[]'
666
+
667
+ # Vectorized reorder calculations - EXACTLY like future_reorder_optimized line 484-508
668
+ df['SecurityStock'] = _calculate_security_stock_complete(df, security_stock_ref, integer)
669
+ df['SuggestedForecast'] = _format_value_complete(df['SuggestedForecast'].iloc[0], 'SuggestedForecast', integer)
670
+ df['ReorderPoint'] = _format_value_complete(
671
+ max(0, df['SuggestedForecast'].iloc[0] + df['SecurityStock'].iloc[0]), 'ReorderPoint', integer
672
+ )
673
+ df['ReorderQtyBase'] = _format_value_complete(
674
+ max(0, df['ReorderPoint'].iloc[0] - df['FutureInventoryTransit'].iloc[0]), 'ReorderQtyBase', integer
675
+ )
676
+
677
+ # Calculate ReorderQty - EXACTLY like future_reorder_optimized line 494-500
678
+ reorder_qty_base = df['ReorderQtyBase'].iloc[0]
679
+ purchase_factor = df['PurchaseFactor'].iloc[0]
680
+
681
+ if reorder_qty_base > 0:
682
+ reorder_qty = np.ceil(reorder_qty_base / purchase_factor) * purchase_factor
683
+ else:
684
+ reorder_qty = 0
685
+
686
+ df['ReorderQty'] = _format_value_complete(reorder_qty, 'ReorderQty', integer)
687
+
688
+ # Calculate ReorderQtyDays - EXACTLY like future_reorder_optimized line 502-508
689
+ if df['ReorderQty'].iloc[0] > 0 and df['AvgDailyUsage'].iloc[0] > 0:
690
+ reorder_qty_days = df['ReorderQty'].iloc[0] / df['AvgDailyUsage'].iloc[0]
691
+ else:
692
+ reorder_qty_days = 0
693
+
694
+ df['ReorderQtyDays'] = _format_value_complete(reorder_qty_days, 'ReorderQtyDays', integer)
695
+
696
+ # Handle new orders - EXACTLY like future_reorder_optimized line 510-521
697
+ if df['ReorderQty'].iloc[0] > 0:
698
+ avg_lead_time = df['AvgLeadTime'].iloc[0]
699
+ arrival_date = current_date + timedelta(days=int(avg_lead_time))
700
+ transit_orders[key].append({
701
+ 'quantity': float(df['ReorderQty'].iloc[0]),
702
+ 'arrival_date': arrival_date
703
+ })
704
+ df['ArrivalDate'] = arrival_date.strftime('%Y-%m-%d')
705
+ else:
706
+ df['ArrivalDate'] = ''
707
+
708
+ return df
709
+
710
+
711
+ class FutureReorder():
712
+ """
713
+ Versión completa optimizada para procesamiento masivo de datasets grandes.
714
+ Incluye TODA la funcionalidad de la clase original pero optimizada para paralelización.
715
+ """
716
+
717
+ def __init__(self, df_inv, df_lead_time, df_prep, df_fcst, periods, start_date,
718
+ location=False, security_stock_ref=False, df_transit=None, integer=True,
719
+ complete_suggested=False, start_date_zero=None, batch_size=None, n_workers=None,
720
+ verbose=True):
721
+
722
+ # Original parameters - TODOS los parámetros de la clase original
87
723
  self.df_inv = df_inv
88
724
  self.df_lead_time = df_lead_time
89
725
  self.df_prep = df_prep
90
726
  self.df_fcst = df_fcst
91
727
  self.default_coverage = 30
92
728
  self.periods = periods
93
- self.start_date = pd.to_datetime(start_date, format='%Y-%m-%d')
729
+ self.start_date = pd.to_datetime(start_date, format='%Y-%m-%d') if start_date is not None else None
94
730
  self.location = location
95
731
  self.security_stock_ref = security_stock_ref
96
732
  self.df_transit = df_transit
@@ -98,90 +734,172 @@ class FutureReorder():
98
734
  self.complete_suggested = complete_suggested
99
735
  self.start_date_zero = start_date_zero
100
736
 
101
- # Initialize metadata columns based on location usage
737
+ # Optimization parameters with intelligent defaults
738
+ total_items = len(df_inv)
739
+
740
+ # Auto-configure batch_size based on dataset size
741
+ if batch_size is None:
742
+ if total_items <= 500:
743
+ self.batch_size = 50 # Small batches for small datasets
744
+ elif total_items <= 2000:
745
+ self.batch_size = 100 # Medium batches
746
+ else:
747
+ self.batch_size = 200 # Larger batches for big datasets
748
+ else:
749
+ self.batch_size = batch_size
750
+
751
+ # Auto-configure n_workers based on system and dataset
752
+ if n_workers is None:
753
+ available_cores = cpu_count()
754
+ if total_items <= 200:
755
+ self.n_workers = min(2, available_cores - 1) # Conservative for small datasets
756
+ elif total_items <= 1000:
757
+ self.n_workers = min(4, available_cores - 1) # Moderate parallelization
758
+ else:
759
+ self.n_workers = min(max(4, available_cores - 2), 8) # Aggressive for large datasets
760
+ else:
761
+ self.n_workers = n_workers
762
+
763
+ self.verbose = verbose
764
+
765
+ # Initialize metadata columns
102
766
  self.metadata = ['Item']
103
767
  if self.location:
104
768
  self.metadata.append('Location')
769
+
770
+ # Pre-filter dataframes based on df_inv to improve performance
771
+ self._prefilter_dataframes()
772
+
773
+ self._log(f"🚀 FutureReorder Massive Complete - Inicializado para {len(self.df_inv)} ítems")
774
+ self._log(f"⚙️ Configuración: batch_size={batch_size}, workers={self.n_workers}")
105
775
 
106
-
107
- def _format_value(self, value, field_name):
776
+ def _prefilter_dataframes(self):
108
777
  """
109
- Apply appropriate formatting based on field type and integer setting.
110
-
111
- Args:
112
- value: The numeric value to format (scalar or Series)
113
- field_name: The name of the field to determine formatting rules
114
-
115
- Returns:
116
- Formatted value (int or float with 2 decimals)
778
+ Pre-filter all input dataframes based on df_inv to improve performance.
779
+ Only process data that exists in df_inv (inventory data).
117
780
  """
118
- # Handle pandas Series - extract scalar value
119
- if isinstance(value, pd.Series):
120
- if len(value) == 1:
121
- value = value.iloc[0]
122
- else:
123
- raise ValueError(f"Expected scalar value for {field_name}, got Series with {len(value)} elements")
124
-
125
- # Handle NaN, None, and infinite values
126
- if pd.isna(value) or value is None:
127
- return 0
128
- if np.isinf(value):
129
- return 0
130
-
131
- # Fields that are ALWAYS integers
132
- always_integer_fields = [
133
- 'PurchaseFactor', 'AvgLeadTime', 'MaxLeadTime',
134
- 'ReorderQtyDays', 'ReorderFreq', 'Coverage', 'FutureStockoutDays'
135
- ]
781
+ if self.verbose:
782
+ original_sizes = {
783
+ 'df_lead_time': len(self.df_lead_time),
784
+ 'df_prep': len(self.df_prep),
785
+ 'df_fcst': len(self.df_fcst),
786
+ 'df_transit': len(self.df_transit) if self.df_transit is not None else 0
787
+ }
788
+ self._log("🔍 Pre-filtering dataframes based on df_inv...")
136
789
 
137
- # Fields that are ALWAYS decimals (2 decimal places)
138
- always_decimal_fields = ['AvgDailyUsage', 'MaxDailyUsage']
790
+ # Create base filter from df_inv
791
+ if self.location:
792
+ base_filter = self.df_inv[['Item', 'Location']].drop_duplicates()
793
+ else:
794
+ base_filter = self.df_inv[['Item']].drop_duplicates()
139
795
 
140
- # Fields that change based on self.integer setting
141
- quantity_fields = [
142
- 'FutureInventoryTransit', 'FutureInventory', 'FutureTransit',
143
- 'FutureInventoryTransitArrival', 'SuggestedForecast', 'SuggestedForecastPeriod',
144
- 'ReorderPoint', 'ReorderQtyBase', 'ReorderQty', 'SecurityStock', 'Inventory', 'Transit'
145
- ]
796
+ # Filter df_lead_time
797
+ if self.location:
798
+ self.df_lead_time = self.df_lead_time.merge(
799
+ base_filter,
800
+ on=['Item', 'Location'],
801
+ how='inner'
802
+ )
803
+ else:
804
+ self.df_lead_time = self.df_lead_time.merge(
805
+ base_filter,
806
+ on=['Item'],
807
+ how='inner'
808
+ )
146
809
 
147
- if field_name in always_integer_fields:
148
- return int(round(value))
149
- elif field_name in always_decimal_fields:
150
- return round(value, 2)
151
- elif field_name in quantity_fields:
152
- if self.integer:
153
- return int(round(value))
810
+ # Filter df_prep - handle different column naming conventions
811
+ if self.location:
812
+ # Check if df_prep uses 'item_id' and 'location' columns
813
+ if 'item_id' in self.df_prep.columns and 'location' in self.df_prep.columns:
814
+ # Create renamed base filter for df_prep
815
+ base_filter_prep = base_filter.copy()
816
+ base_filter_prep = base_filter_prep.rename(columns={'Item': 'item_id', 'Location': 'location'})
817
+ self.df_prep = self.df_prep.merge(
818
+ base_filter_prep,
819
+ on=['item_id', 'location'],
820
+ how='inner'
821
+ )
154
822
  else:
155
- return round(value, 2)
823
+ # Use standard column names
824
+ self.df_prep = self.df_prep.merge(
825
+ base_filter,
826
+ on=['Item', 'Location'],
827
+ how='inner'
828
+ )
156
829
  else:
157
- # Default: return as is
158
- return value
830
+ # Check if df_prep uses 'item_id' column
831
+ if 'item_id' in self.df_prep.columns:
832
+ base_filter_prep = base_filter.copy()
833
+ base_filter_prep = base_filter_prep.rename(columns={'Item': 'item_id'})
834
+ self.df_prep = self.df_prep.merge(
835
+ base_filter_prep,
836
+ on=['item_id'],
837
+ how='inner'
838
+ )
839
+ else:
840
+ self.df_prep = self.df_prep.merge(
841
+ base_filter,
842
+ on=['Item'],
843
+ how='inner'
844
+ )
845
+
846
+ # Filter df_fcst
847
+ if self.location:
848
+ self.df_fcst = self.df_fcst.merge(
849
+ base_filter,
850
+ on=['Item', 'Location'],
851
+ how='inner'
852
+ )
853
+ else:
854
+ self.df_fcst = self.df_fcst.merge(
855
+ base_filter,
856
+ on=['Item'],
857
+ how='inner'
858
+ )
859
+
860
+ # Filter df_transit if it exists
861
+ if self.df_transit is not None:
862
+ if self.location:
863
+ self.df_transit = self.df_transit.merge(
864
+ base_filter,
865
+ on=['Item', 'Location'],
866
+ how='inner'
867
+ )
868
+ else:
869
+ self.df_transit = self.df_transit.merge(
870
+ base_filter,
871
+ on=['Item'],
872
+ how='inner'
873
+ )
874
+
875
+ if self.verbose:
876
+ new_sizes = {
877
+ 'df_lead_time': len(self.df_lead_time),
878
+ 'df_prep': len(self.df_prep),
879
+ 'df_fcst': len(self.df_fcst),
880
+ 'df_transit': len(self.df_transit) if self.df_transit is not None else 0
881
+ }
882
+
883
+ self._log("📊 Filtrado completado:")
884
+ for df_name, original_size in original_sizes.items():
885
+ new_size = new_sizes[df_name]
886
+ if original_size > 0:
887
+ reduction_pct = ((original_size - new_size) / original_size) * 100
888
+ self._log(f" • {df_name}: {original_size:,} → {new_size:,} (-{reduction_pct:.1f}%)")
889
+ else:
890
+ self._log(f" • {df_name}: {original_size:,} → {new_size:,}")
159
891
 
892
+ def _log(self, message):
893
+ if self.verbose:
894
+ print(message)
895
+ sys.stdout.flush()
160
896
 
161
897
  def future_date(self):
162
898
  """
163
899
  Generate future reorder dates for each item based on reorder frequency.
164
-
165
- This method creates a schedule of dates when reorders should be evaluated
166
- for each item (or item-location combination). The schedule includes:
167
- 1. Current date (always first)
168
- 2. Start date (if after current date)
169
- 3. Subsequent dates at reorder frequency intervals
170
-
171
- This optimized version groups items by reorder frequency for better performance
172
- with large datasets.
173
-
174
- Returns:
175
- dict: Dictionary mapping item (or (item, location) tuple) to list of
176
- reorder dates in 'YYYYMMDD' format.
177
-
178
- Example:
179
- {
180
- 'ITEM001': ['20240101', '20240115', '20240214', ...],
181
- ('ITEM002', 'LOC1'): ['20240101', '20240120', '20240219', ...]
182
- }
900
+ Versión optimizada de la función original.
183
901
  """
184
- # Determine the starting date for period 0
902
+ # Determine the starting date for period 0 - EXACTLY like future_reorder_optimized line 148-155
185
903
  if self.start_date_zero is not None:
186
904
  # Use custom start date for period 0
187
905
  actual_date = pd.to_datetime(self.start_date_zero, format='%Y-%m-%d')
@@ -192,7 +910,16 @@ class FutureReorder():
192
910
  timestamp = utils.set_timestamp()
193
911
  actual_date = pd.to_datetime(str(int(float(timestamp[0:8]))), format='%Y%m%d')
194
912
 
195
- end_date = actual_date + pd.DateOffset(months=self.periods)
913
+ # Use periods + 1 internally to calculate one extra period for transit calculations
914
+ # The extra period will be filtered out in the final results
915
+ end_date = actual_date + pd.DateOffset(months=self.periods + 1)
916
+
917
+ # Handle start_date = None case
918
+ if self.start_date is None:
919
+ # If start_date is None, use actual_date as the base for period 1
920
+ base_start_date = actual_date
921
+ else:
922
+ base_start_date = self.start_date
196
923
 
197
924
  # Get unique items with their reorder frequencies
198
925
  columns = self.metadata + ['ReorderFreq']
@@ -211,17 +938,17 @@ class FutureReorder():
211
938
  # Generate date range for this frequency
212
939
  date_range = []
213
940
 
214
- # Always include actual date
941
+ # Always include actual date (period 0)
215
942
  date_range.append(actual_date)
216
943
 
217
- # Include start_date if after actual_date
218
- if self.start_date > actual_date:
219
- date_range.append(self.start_date)
944
+ # Include base_start_date if after actual_date
945
+ if base_start_date > actual_date:
946
+ date_range.append(base_start_date)
220
947
 
221
948
  # Generate subsequent dates using pandas date_range for efficiency
222
- num_periods = int((end_date - self.start_date).days / freq) + 1
949
+ num_periods = int((end_date - base_start_date).days / freq) + 1
223
950
  future_dates = pd.date_range(
224
- start=self.start_date + timedelta(days=freq),
951
+ start=base_start_date + timedelta(days=freq),
225
952
  periods=num_periods,
226
953
  freq=f'{freq}D'
227
954
  )
@@ -240,695 +967,46 @@ class FutureReorder():
240
967
 
241
968
  return item_dates
242
969
 
243
-
244
- def _get_current_dataframes(self, item, location=None):
245
- """
246
- Get filtered dataframes for current item/location combination.
247
-
248
- Args:
249
- item (str): Item identifier to filter for
250
- location (str, optional): Location identifier if using multi-location mode
251
-
252
- Returns:
253
- tuple: (current_df_lead_time, current_df_inv)
254
- - current_df_lead_time: Lead time data filtered for item/location
255
- - current_df_inv: Inventory data filtered for item/location
256
- """
257
- # Create filter mask based on item
258
- mask_lead_time = self.df_lead_time['Item'] == item
259
- mask_inv = self.df_inv['Item'] == item
260
-
261
- # Add location filter if needed
262
- if self.location and location is not None:
263
- mask_lead_time &= self.df_lead_time['Location'] == location
264
- mask_inv &= self.df_inv['Location'] == location
265
-
266
- # Apply filters using boolean indexing
267
- current_df_lead_time = self.df_lead_time[mask_lead_time]
268
- current_df_inv = self.df_inv[mask_inv]
269
-
270
- return current_df_lead_time, current_df_inv
271
-
272
-
273
- def _calculate_suggested_forecast(self, current_df_lead_time, current_df_inv, date, last_suggested_value=None):
274
- """
275
- Calculate suggested forecast for the given date using the SuggestedForecast class.
276
-
277
- This method now validates that sufficient forecast data exists to cover the
278
- required coverage period. If forecast data doesn't extend far enough into
279
- the future, it either raises an error or uses the last calculated value
280
- based on the complete_suggested parameter.
281
-
282
- Args:
283
- current_df_lead_time (pd.DataFrame): Lead time data for current item
284
- current_df_inv (pd.DataFrame): Inventory data for current item
285
- date (str): Date for forecast calculation in 'YYYYMMDD' format
286
- last_suggested_value (float, optional): Last calculated SuggestedForecast value
287
- to use when complete_suggested is True and forecast data is insufficient
288
-
289
- Returns:
290
- pd.DataFrame: DataFrame containing suggested forecast values
291
-
292
- Raises:
293
- ValueError: If forecast data doesn't extend far enough to cover the required period
294
- and complete_suggested is False or no previous value is available
295
- """
296
- # Convert current date to datetime
297
- current_date = pd.to_datetime(date, format='%Y%m%d')
298
-
299
- # Get the maximum forecast date available
300
- max_forecast_date = self.df_fcst['Date'].max()
970
+ def _prepare_batch_data(self, item_dates):
971
+ """Prepara datos por lotes de manera eficiente. Replicates exactly the logic from future_reorder_optimized."""
972
+ batch_data = []
301
973
 
302
- # Get coverage value for this item
303
- coverage = current_df_lead_time['Coverage'].iloc[0]
304
-
305
- # Calculate the required forecast end date
306
- required_forecast_end_date = current_date + timedelta(days=int(coverage))
307
-
308
- # Check if we have sufficient forecast data
309
- if max_forecast_date < required_forecast_end_date:
310
- # Get item identifier for error message
311
- item = current_df_inv['Item'].iloc[0]
312
- location_msg = ""
313
- if self.location and 'Location' in current_df_inv.columns:
314
- location = current_df_inv['Location'].iloc[0]
315
- location_msg = f" at location {location}"
316
-
317
- if self.complete_suggested:
318
- if last_suggested_value is not None:
319
- # Use the last calculated SuggestedForecast value
320
- # Create a DataFrame with the same structure as the normal output
321
- result_df = current_df_inv[self.metadata].copy()
322
- result_df['SuggestedForecast'] = last_suggested_value
323
-
324
- # Add PurchaseFactor and ItemDescription from inventory data
325
- if 'PurchaseFactor' in current_df_inv.columns:
326
- result_df['PurchaseFactor'] = current_df_inv['PurchaseFactor'].iloc[0]
327
- else:
328
- result_df['PurchaseFactor'] = 1 # Default value if not present
329
-
330
- if 'ItemDescription' in current_df_inv.columns:
331
- result_df['ItemDescription'] = current_df_inv['ItemDescription'].iloc[0]
332
- else:
333
- result_df['ItemDescription'] = '' # Default value if not present
334
-
335
- return result_df
974
+ for key, dates in item_dates.items():
975
+ try:
976
+ # Get dataframes for this item - EXACTLY like future_reorder_optimized _get_current_dataframes_optimized
977
+ if self.location:
978
+ item, location = key
336
979
  else:
337
- # For the first period when complete_suggested=True but no previous value exists,
338
- # try to calculate with available data up to max_forecast_date
339
- # This allows at least the first period to be calculated
340
- try:
341
- return SuggestedForecast(
342
- df_LeadTimes=current_df_lead_time,
343
- df_Forecast=self.df_fcst,
344
- df_Prep=self.df_prep,
345
- df_inv=current_df_inv,
346
- column_forecast='SuggestedForecast',
347
- columns_metadata=self.metadata,
348
- frequency_='M',
349
- location=self.location,
350
- actualdate=date,
351
- default_coverage_=self.default_coverage,
352
- join_='left'
353
- ).suggested_forecast()
354
- except Exception as e:
355
- # If even the basic calculation fails, raise a more informative error
356
- error_msg = (
357
- f"Cannot calculate initial forecast for item {item}{location_msg}. "
358
- f"Forecast data extends only to {max_forecast_date.strftime('%Y-%m-%d')}, "
359
- f"but coverage of {int(coverage)} days from {current_date.strftime('%Y-%m-%d')} "
360
- f"requires forecast data until {required_forecast_end_date.strftime('%Y-%m-%d')}. "
361
- f"Original error: {str(e)}"
362
- )
363
- raise ValueError(error_msg)
364
- else:
365
- error_msg = (
366
- f"Insufficient forecast data for item {item}{location_msg}. "
367
- f"Forecast data extends only to {max_forecast_date.strftime('%Y-%m-%d')}, "
368
- f"but coverage of {int(coverage)} days from {current_date.strftime('%Y-%m-%d')} "
369
- f"requires forecast data until {required_forecast_end_date.strftime('%Y-%m-%d')}."
370
- )
371
- raise ValueError(error_msg)
372
-
373
- # If validation passes, proceed with the original calculation
374
- return SuggestedForecast(
375
- df_LeadTimes=current_df_lead_time,
376
- df_Forecast=self.df_fcst,
377
- df_Prep=self.df_prep,
378
- df_inv=current_df_inv,
379
- column_forecast='SuggestedForecast',
380
- columns_metadata=self.metadata,
381
- frequency_='M',
382
- location=self.location,
383
- actualdate=date,
384
- default_coverage_=self.default_coverage,
385
- join_='left'
386
- ).suggested_forecast()
387
-
388
-
389
- def _calculate_daily_usage(self, suggested_forecast_df, date):
390
- """
391
- Calculate average and maximum daily usage rates.
392
-
393
- This method computes both average and maximum daily consumption rates
394
- which are used for inventory planning and safety stock calculations.
395
-
396
- Args:
397
- suggested_forecast_df (pd.DataFrame): DataFrame with forecast data
398
- date (str): Current calculation date in 'YYYYMMDD' format
399
-
400
- Returns:
401
- tuple: (df_avg, df_max)
402
- - df_avg: DataFrame with average daily usage
403
- - df_max: DataFrame with maximum daily usage
404
- """
405
- df_avg = DailyUsageFuture(
406
- location=self.location,
407
- column_forecast='SuggestedForecast',
408
- date=date,
409
- df_fcst=self.df_fcst
410
- ).daily_usage(suggested_forecast_df, 'AvgDailyUsage').fillna(0)
411
-
412
- df_max = DailyUsageFuture(
413
- location=self.location,
414
- column_forecast='SuggestedForecast',
415
- date=date,
416
- df_fcst=self.df_fcst
417
- ).daily_usage(df_avg, 'MaxDailyUsage').fillna(0)
418
-
419
- return df_avg, df_max
420
-
421
-
422
- def _calculate_security_stock_data(self, df_max, current_df_lead_time, period_index=None, dates=None):
423
- """
424
- Calculate security stock related data and prepare for reorder calculations.
425
-
426
- This method:
427
- 1. Merges daily usage with lead time data
428
- 2. Determines effective reorder frequency and coverage
429
- 3. Calculates SuggestedForecastPeriod based on coverage ratio
430
- 4. For period 0, uses days to next period instead of reorder frequency
431
-
432
- Args:
433
- df_max (pd.DataFrame): DataFrame with maximum daily usage
434
- current_df_lead_time (pd.DataFrame): Lead time data for current item
435
- period_index (int, optional): Current period index (0, 1, 2, ...)
436
- dates (list, optional): List of dates for this item
437
-
438
- Returns:
439
- pd.DataFrame: DataFrame with merged data and calculated fields:
440
- - All fields from df_max
441
- - AvgLeadTime, MaxLeadTime from lead time data
442
- - SuggestedForecastPeriod: Adjusted forecast for the period
443
- """
444
- merge_columns = ['Item', 'Location', 'AvgLeadTime', 'MaxLeadTime'] if self.location else ['Item', 'AvgLeadTime', 'MaxLeadTime']
445
- df_sstock = pd.merge(df_max, current_df_lead_time[merge_columns], on=self.metadata, how='inner').drop_duplicates()
446
-
447
- # Get ReorderFreq and Coverage
448
- reorder_freq = current_df_lead_time['ReorderFreq'].values[0]
449
- if pd.isnull(reorder_freq) or reorder_freq == 0:
450
- reorder_freq = self.default_coverage
451
-
452
- coverage = self.default_coverage
453
- if 'Coverage' in current_df_lead_time.columns:
454
- coverage_val = current_df_lead_time['Coverage'].values[0]
455
- if not pd.isnull(coverage_val):
456
- coverage = coverage_val
457
- else:
458
- coverage = reorder_freq + df_sstock['AvgLeadTime'].values[0]
459
- else:
460
- coverage = reorder_freq + df_sstock['AvgLeadTime'].values[0]
461
-
462
- # Calculate SuggestedForecastPeriod
463
- if period_index == 0 and dates is not None and len(dates) > 1:
464
- # For period 0, use days to next period instead of reorder frequency
465
- # This allows uniform consumption calculation in all future periods
466
- current_date = pd.to_datetime(dates[0], format='%Y%m%d')
467
- next_date = pd.to_datetime(dates[1], format='%Y%m%d')
468
- days_to_next_period = (next_date - current_date).days
469
-
470
- # Formula: SuggestedForecast × (days_to_next_period / coverage)
471
- # This represents the forecasted consumption from period 0 to period 1
472
- suggested_forecast_period = np.ceil(df_sstock['SuggestedForecast'] * (days_to_next_period / coverage))
473
- else:
474
- # For other periods, use the original calculation with reorder frequency
475
- # Formula: SuggestedForecast × (reorder_freq / coverage)
476
- suggested_forecast_period = np.ceil(df_sstock['SuggestedForecast'] * (reorder_freq / coverage))
477
-
478
- df_sstock['SuggestedForecastPeriod'] = df_sstock.apply(
479
- lambda row: self._format_value(suggested_forecast_period.iloc[row.name], 'SuggestedForecastPeriod'),
480
- axis=1
481
- )
482
-
483
- return df_sstock
484
-
485
-
486
- def _calculate_security_stock(self, df):
487
- """
488
- Calculate security stock using configured method.
489
-
490
- Two methods are available:
491
- 1. Statistical method (default):
492
- SecurityStock = (MaxDailyUsage × MaxLeadTime) - (AvgDailyUsage × AvgLeadTime)
493
- This represents the difference between worst-case and average scenarios.
494
-
495
- 2. Reference days method (if security_stock_ref=True):
496
- SecurityStock = SecurityStockDaysRef × AvgDailyUsage
497
- Uses a predefined number of days of coverage.
498
-
499
- Args:
500
- df (pd.DataFrame): DataFrame containing required calculation fields
501
-
502
- Returns:
503
- pd.Series: Calculated security stock values
504
- """
505
- if self.security_stock_ref:
506
- security_stock = df['SecurityStockDaysRef'] * df['AvgDailyUsage']
507
- else:
508
- security_stock = (df['MaxDailyUsage'] * df['MaxLeadTime']) - (df['AvgDailyUsage'] * df['AvgLeadTime'])
509
-
510
- # Apply formatting
511
- return security_stock.apply(lambda x: self._format_value(x, 'SecurityStock'))
512
-
513
-
514
- def _calculate_inventory_days(self, df):
515
- """
516
- Calculate inventory days using configured method.
517
-
518
- FutureStockoutDays = (FutureInventoryTransitArrival - SecurityStock) / AvgDailyUsage
519
-
520
- Args:
521
- df (pd.DataFrame): DataFrame containing required calculation fields
522
-
523
- Returns:
524
- pd.Series: Calculated future stockout days
525
- """
526
- # Calculate future stockout days with safe division
527
- # Avoid division by zero by checking AvgDailyUsage
528
- future_stockout_days = np.where(
529
- df['AvgDailyUsage'] > 0,
530
- (df['FutureInventoryTransitArrival'] - df['SecurityStock']) / df['AvgDailyUsage'],
531
- 0 # If no daily usage, return 0 days
532
- )
533
-
534
- # Apply formatting
535
- return pd.Series(future_stockout_days).apply(lambda x: self._format_value(x, 'FutureStockoutDays'))
536
-
537
-
538
- def _sum_transit_arrivals(self, transit_arrivals_str):
539
- """
540
- Calculate the total quantity from TransitArrival string.
541
-
542
- Args:
543
- transit_arrivals_str (str): String representation of transit arrivals list
544
- e.g., '[{"quantity": 100.0, "arrival_date": "2024-01-15"}]'
545
-
546
- Returns:
547
- float: Total quantity of all arrivals in the period
548
- """
549
-
550
- if transit_arrivals_str == '[]' or not transit_arrivals_str:
551
- return 0.0
552
-
553
- try:
554
- arrivals = ast.literal_eval(transit_arrivals_str)
555
- return sum(arrival.get('quantity', 0) for arrival in arrivals)
556
- except:
557
- return 0.0
558
-
559
-
560
- def _prepare_transit_schedule(self, key, transit_amount, dates):
561
- """
562
- Prepare transit schedule based on df_transit or default logic.
563
-
564
- Args:
565
- key (tuple or str): Item identifier (item) or (item, location)
566
- transit_amount (float): Total transit amount from df_inv
567
- dates (list): List of dates for this item
568
-
569
- Returns:
570
- list: List of transit orders with 'quantity' and 'arrival_date'
571
- """
572
- if transit_amount <= 0:
573
- return []
574
-
575
- transit_schedule = []
576
-
577
- if self.df_transit is None:
578
- # Default logic: complete transit arrives in period 1
579
- if len(dates) > 1:
580
- arrival_date = pd.to_datetime(dates[1], format='%Y%m%d')
581
- transit_schedule.append({
582
- 'quantity': transit_amount,
583
- 'arrival_date': arrival_date
584
- })
585
- else:
586
- # Use provided transit schedule
587
- if self.location:
588
- item, location = key
589
- mask = (self.df_transit['Item'] == item) & (self.df_transit['Location'] == location)
590
- else:
591
- mask = self.df_transit['Item'] == key
980
+ item = key
981
+ location = None
982
+
983
+ # Create filter mask based on item
984
+ mask_lead_time = self.df_lead_time['Item'] == item
985
+ mask_inv = self.df_inv['Item'] == item
592
986
 
593
- transit_data = self.df_transit[mask].copy()
594
-
595
- if not transit_data.empty:
596
- # Validate total matches
597
- total_scheduled = transit_data['Transit'].sum()
598
- if abs(total_scheduled - transit_amount) > 0.01: # Allow small floating point differences
599
- raise ValueError(f"Transit schedule total ({total_scheduled}) does not match inventory transit ({transit_amount}) for {key}")
987
+ # Add location filter if needed
988
+ if self.location and location is not None:
989
+ mask_lead_time &= self.df_lead_time['Location'] == location
990
+ mask_inv &= self.df_inv['Location'] == location
600
991
 
601
- # Create transit orders
602
- for _, row in transit_data.iterrows():
603
- arrival_date = pd.to_datetime(row['ArrivalDate'], format='%Y-%m-%d')
604
- transit_schedule.append({
605
- 'quantity': float(row['Transit']),
606
- 'arrival_date': arrival_date
607
- })
608
- else:
609
- # If no transit data provided for this item, use default logic
610
- if len(dates) > 1:
611
- arrival_date = pd.to_datetime(dates[1], format='%Y%m%d')
612
- transit_schedule.append({
613
- 'quantity': transit_amount,
614
- 'arrival_date': arrival_date
615
- })
616
-
617
- return transit_schedule
618
-
619
-
620
- def _process_current_period(self, current_df_inv, df_sstock, key, date, transit_orders, dates):
621
- """
622
- Process inventory for the current period (i=0).
623
-
624
- This optimized version uses vectorized operations where possible and
625
- minimizes redundant calculations.
626
-
627
- Args:
628
- current_df_inv (pd.DataFrame): Current inventory data
629
- df_sstock (pd.DataFrame): Security stock calculation data
630
- key (tuple or str): Item identifier (item) or (item, location)
631
- date (str): Current date in 'YYYYMMDD' format
632
- transit_orders (dict): Dictionary tracking in-transit orders
633
- dates (list): List of all dates for this item
634
-
635
- Returns:
636
- pd.DataFrame: Processed inventory data for the current period
637
- """
638
- inventory_columns = ['Item', 'Location', 'Inventory', 'Transit', 'PurchaseFactor'] if self.location else ['Item', 'Inventory', 'Transit', 'PurchaseFactor']
639
- df_inventory = current_df_inv[inventory_columns].copy()
640
-
641
- # Vectorized initialization of inventory values with formatting
642
- df_inventory['FutureInventory'] = df_inventory['Inventory'].apply(
643
- lambda x: self._format_value(x, 'FutureInventory')
644
- )
645
- df_inventory['FutureTransit'] = df_inventory['Transit'].apply(
646
- lambda x: self._format_value(x, 'FutureTransit')
647
- )
648
- df_inventory['FutureInventoryTransit'] = df_inventory.apply(
649
- lambda row: self._format_value(row['Inventory'] + row['Transit'], 'FutureInventoryTransit'),
650
- axis=1
651
- )
652
-
653
- # Initialize transit orders for this item
654
- if key not in transit_orders:
655
- transit_orders[key] = []
656
-
657
- # Handle initial transit
658
- transit_qty = float(df_inventory['Transit'].iloc[0])
659
-
660
- # Prepare transit schedule
661
- transit_schedule = self._prepare_transit_schedule(key, transit_qty, dates)
662
-
663
- # Add scheduled transits to transit_orders
664
- transit_orders[key].extend(transit_schedule)
665
-
666
- # For period 0, TransitArrival should always be empty list
667
- df_inventory['TransitArrival'] = '[]'
668
-
669
- # Select relevant columns
670
- df_inventory = df_inventory[self.metadata + ['FutureInventoryTransit', 'FutureInventory', 'FutureTransit', 'TransitArrival']]
671
-
672
- # Merge with stock data
673
- df = pd.merge(df_inventory, df_sstock, on=self.metadata, how='inner')
674
-
675
- # Vectorized calculations for all rows at once
676
- df['SuggestedForecastPeriod'] = df_sstock['SuggestedForecastPeriod']
677
- df['SecurityStock'] = self._calculate_security_stock(df)
678
-
679
- # Apply formatting to calculated fields
680
- df['SuggestedForecast'] = df['SuggestedForecast'].apply(
681
- lambda x: self._format_value(x, 'SuggestedForecast')
682
- )
683
- df['ReorderPoint'] = df.apply(
684
- lambda row: self._format_value(max(0, row['SuggestedForecast'] + row['SecurityStock']), 'ReorderPoint'),
685
- axis=1
686
- )
687
- df['ReorderQtyBase'] = df.apply(
688
- lambda row: self._format_value(max(0, row['ReorderPoint'] - row['FutureInventoryTransit']), 'ReorderQtyBase'),
689
- axis=1
690
- )
691
-
692
- # First period has no reorder - vectorized assignment
693
- df['ReorderQty'] = 0
694
- df['ReorderQtyDays'] = 0
695
- df['ArrivalDate'] = '' # No order in period 0
696
-
697
- # Note: FutureInventoryTransitArrival and FutureStockoutDays are calculated later
698
- # in _process_item_optimized after all periods are processed
699
-
700
- return df
701
-
702
-
703
- def _process_transit_orders(self, transit_orders, key, current_date, previous_date):
704
- """
705
- Process transit orders and calculate arrivals for the current period.
706
-
707
- This optimized method uses vectorization for better performance with large
708
- numbers of transit orders. It manages the lifecycle of transit orders:
709
- 1. Identifies orders arriving in the current period
710
- 2. Moves arrived quantities from transit to stock
711
- 3. Updates remaining transit orders
712
- 4. Maintains arrival history for reporting
713
-
714
- Args:
715
- transit_orders (dict): Dictionary of active transit orders by item/location
716
- key (tuple or str): Item identifier (item) or (item, location)
717
- current_date (pd.Timestamp): Current period date
718
- previous_date (pd.Timestamp): Previous period date
719
-
720
- Returns:
721
- tuple: (stock_from_arrivals, new_transit, transit_arrivals)
722
- - stock_from_arrivals: Total quantity arriving in this period
723
- - new_transit: Total quantity still in transit
724
- - transit_arrivals: List of arrival records for this period
725
- """
726
- # Get orders for this key, return early if none
727
- orders = transit_orders.get(key, [])
728
- if not orders:
729
- return 0, 0, []
730
-
731
- # For small numbers of orders, use loops implementation
732
- # as it has less overhead
733
- if len(orders) < 10:
734
- new_transit = 0
735
- remaining_orders = []
736
- transit_arrivals = []
737
- stock_from_arrivals = 0
738
-
739
- for order in orders:
740
- if order['arrival_date'] > previous_date and order['arrival_date'] <= current_date:
741
- # Order arrives in this period
742
- stock_from_arrivals += order['quantity']
743
- transit_arrivals.append({
744
- 'quantity': float(order['quantity']),
745
- 'arrival_date': order['arrival_date'].strftime('%Y-%m-%d')
746
- })
747
- else:
748
- # Order still in transit
749
- new_transit += order['quantity']
750
- remaining_orders.append(order)
751
-
752
- transit_orders[key] = remaining_orders
753
- return stock_from_arrivals, new_transit, transit_arrivals
754
-
755
- # For larger numbers of orders, use vectorized approach
756
- # Extract data into numpy arrays for faster processing
757
- quantities = np.array([order['quantity'] for order in orders], dtype=np.float64)
758
- arrival_dates = np.array([order['arrival_date'] for order in orders])
759
-
760
- # Vectorized date comparison
761
- mask_arrived = (arrival_dates > previous_date) & (arrival_dates <= current_date)
762
-
763
- # Calculate totals using numpy operations
764
- stock_from_arrivals = float(quantities[mask_arrived].sum()) if mask_arrived.any() else 0
765
- new_transit = float(quantities[~mask_arrived].sum()) if (~mask_arrived).any() else 0
766
-
767
- # Create transit arrivals list
768
- transit_arrivals = []
769
- if mask_arrived.any():
770
- arrived_indices = np.where(mask_arrived)[0]
771
- transit_arrivals = [
772
- {
773
- 'quantity': float(quantities[i]),
774
- 'arrival_date': arrival_dates[i].strftime('%Y-%m-%d')
775
- }
776
- for i in arrived_indices
777
- ]
778
-
779
- # Update transit orders with remaining orders
780
- if (~mask_arrived).any():
781
- remaining_indices = np.where(~mask_arrived)[0]
782
- transit_orders[key] = [orders[i] for i in remaining_indices]
783
- else:
784
- transit_orders[key] = []
785
-
786
- return stock_from_arrivals, new_transit, transit_arrivals
787
-
788
-
789
- def _process_future_period(self, current_df_inv, df_sstock, df_previous, key, date, dates, i, transit_orders):
790
- """
791
- Process inventory for future periods (i>0).
792
-
793
- This method:
794
- 1. Calculates consumption using SuggestedForecastPeriod from previous period
795
- 2. Updates stock levels considering consumption and arrivals
796
- 3. Determines if reorder is needed
797
- 4. Calculates reorder quantity if needed
798
- 5. Adds new orders to transit tracking
992
+ # Apply filters using boolean indexing
993
+ current_df_lead_time = self.df_lead_time[mask_lead_time]
994
+ current_df_inv = self.df_inv[mask_inv]
799
995
 
800
- Args:
801
- current_df_inv (pd.DataFrame): Current inventory data
802
- df_sstock (pd.DataFrame): Security stock calculation data
803
- df_previous (pd.DataFrame): Previous period's results
804
- key (tuple or str): Item identifier (item) or (item, location)
805
- date (str): Current date in 'YYYYMMDD' format
806
- dates (list): List of all dates for this item
807
- i (int): Current period index
808
- transit_orders (dict): Dictionary tracking in-transit orders
809
-
810
- Returns:
811
- pd.DataFrame: Processed inventory data for the period including:
812
- - Updated inventory levels
813
- - Reorder recommendations
814
- - Transit arrival information
815
- """
816
- inventory_columns = ['Item', 'Location', 'PurchaseFactor'] if self.location else ['Item', 'PurchaseFactor']
817
- df_inventory = current_df_inv[inventory_columns].copy()
818
- df = pd.merge(df_inventory, df_sstock, on=inventory_columns, how='inner')
819
- df['SuggestedForecastPeriod'] = df_sstock['SuggestedForecastPeriod']
820
-
821
- # Calculate consumption using SuggestedForecastPeriod from previous period
822
- consumption = df_previous['SuggestedForecastPeriod'].values[0]
823
-
824
- previous_stock = df_previous['FutureInventory'].values[0] - consumption
825
-
826
- # Process transit orders
827
- current_date = pd.to_datetime(date, format='%Y%m%d')
828
- previous_date = pd.to_datetime(dates[i-1], format='%Y%m%d')
829
-
830
- stock_from_arrivals, new_transit, transit_arrivals = self._process_transit_orders(
831
- transit_orders, key, current_date, previous_date
832
- )
833
-
834
- # Update inventory values with formatting
835
- future_stock = max(0, previous_stock + stock_from_arrivals)
836
- df['FutureInventory'] = self._format_value(future_stock, 'FutureInventory')
837
- df['FutureTransit'] = self._format_value(new_transit, 'FutureTransit')
838
- df['FutureInventoryTransit'] = self._format_value(
839
- future_stock + new_transit,
840
- 'FutureInventoryTransit'
841
- )
842
- df['TransitArrival'] = str(transit_arrivals) if transit_arrivals else '[]'
843
-
844
- # Calculate security stock and reorder values
845
- df['SecurityStock'] = self._calculate_security_stock(df)
846
-
847
- # Apply formatting to calculated fields
848
- df['SuggestedForecast'] = df['SuggestedForecast'].apply(
849
- lambda x: self._format_value(x, 'SuggestedForecast')
850
- )
851
- df['ReorderPoint'] = df.apply(
852
- lambda row: self._format_value(max(0, row['SuggestedForecast'] + row['SecurityStock']), 'ReorderPoint'),
853
- axis=1
854
- )
855
- df['ReorderQtyBase'] = df.apply(
856
- lambda row: self._format_value(max(0, row['ReorderPoint'] - row['FutureInventoryTransit']), 'ReorderQtyBase'),
857
- axis=1
858
- )
859
-
860
- # Calculate ReorderQty only if ReorderQtyBase > 0
861
- reorder_qty = np.where(
862
- df['ReorderQtyBase'] > 0,
863
- ((df['ReorderQtyBase'] / df['PurchaseFactor']).apply(np.ceil)) * df['PurchaseFactor'],
864
- 0
865
- )
866
- df['ReorderQty'] = df.apply(
867
- lambda row: self._format_value(reorder_qty[row.name], 'ReorderQty'),
868
- axis=1
869
- )
870
-
871
- # Calculate ReorderQtyDays, avoiding division by zero
872
- reorder_qty_days = np.where(
873
- (df['ReorderQty'] > 0) & (df['AvgDailyUsage'] > 0),
874
- df['ReorderQty'] / df['AvgDailyUsage'],
875
- 0
876
- )
877
- df['ReorderQtyDays'] = df.apply(
878
- lambda row: self._format_value(reorder_qty_days[row.name], 'ReorderQtyDays'),
879
- axis=1
880
- )
881
-
882
- # Add new order to transit if needed
883
- if df['ReorderQty'].values[0] > 0:
884
- avg_lead_time = df['AvgLeadTime'].values[0]
885
- arrival_date = current_date + timedelta(days=int(avg_lead_time))
886
- # Store the raw value for transit calculations
887
- transit_orders[key].append({
888
- 'quantity': float(df['ReorderQty'].values[0]),
889
- 'arrival_date': arrival_date
890
- })
891
- # Store arrival date for this period's order
892
- df['ArrivalDate'] = arrival_date.strftime('%Y-%m-%d')
893
- else:
894
- # No order in this period
895
- df['ArrivalDate'] = ''
896
-
897
-
898
- # Note: FutureInventoryTransitArrival and FutureStockoutDays are calculated later
899
- # in _process_item_optimized after all periods are processed
996
+ if not current_df_lead_time.empty and not current_df_inv.empty:
997
+ batch_data.append((key, dates, current_df_lead_time, current_df_inv))
998
+
999
+ except Exception as e:
1000
+ if self.verbose:
1001
+ print(f"Error preparando {key}: {e}")
1002
+ continue
900
1003
 
901
- return df
902
-
1004
+ return batch_data
903
1005
 
904
1006
  def _prepare_final_dataframe(self, data_frame):
905
1007
  """
906
1008
  Prepare the final output dataframe with proper formatting and column selection.
907
-
908
- This method:
909
- 1. Merges with lead time data to add reorder parameters
910
- 2. Formats dates to YYYY-MM-DD format
911
- 3. Renames columns for clarity
912
- 4. Rounds numeric values to 2 decimal places
913
- 5. Selects and orders final columns
914
-
915
- Args:
916
- data_frame (pd.DataFrame): Raw calculation results
917
-
918
- Returns:
919
- pd.DataFrame: Formatted output with columns:
920
- - PurchaseDate, Item, ItemDescription, (Location)
921
- - Forecast metrics: SuggestedForecast, SuggestedForecastPeriod
922
- - Inventory levels: FutureInventoryTransit (total), FutureInventory (stock), FutureTransit (transit)
923
- - FutureInventoryTransitArrival: FutureInventory + arrivals in the period
924
- - FutureStockoutDays: Days of inventory coverage
925
- - Transit information: TransitArrival
926
- - Reorder metrics: ReorderQtyBase, ReorderQty, ReorderQtyDays
927
- - Order information: ArrivalDate (arrival date of current period's order)
928
- - Planning parameters: PurchaseFactor, ReorderPoint, SecurityStock
929
- - Usage rates: AvgDailyUsage, MaxDailyUsage
930
- - Lead times: AvgLeadTime, MaxLeadTime
931
- - Coverage parameters: ReorderFreq, Coverage
1009
+ Versión completa de la función original.
932
1010
  """
933
1011
  leadtimes_columns = ['Item', 'Location', 'ReorderFreq', 'Coverage'] if self.location else ['Item', 'ReorderFreq', 'Coverage']
934
1012
  leadtimes = self.df_lead_time[leadtimes_columns]
@@ -946,13 +1024,13 @@ class FutureReorder():
946
1024
  always_integer_fields = ['PurchaseFactor', 'AvgLeadTime', 'MaxLeadTime', 'ReorderQtyDays', 'ReorderFreq', 'Coverage']
947
1025
  for field in always_integer_fields:
948
1026
  if field in df_final.columns:
949
- df_final[field] = df_final[field].apply(lambda x: self._format_value(x, field))
1027
+ df_final[field] = df_final[field].apply(lambda x: _format_value_complete(x, field, True))
950
1028
 
951
1029
  # Apply formatting to fields that are ALWAYS decimals
952
1030
  always_decimal_fields = ['AvgDailyUsage', 'MaxDailyUsage']
953
1031
  for field in always_decimal_fields:
954
1032
  if field in df_final.columns:
955
- df_final[field] = df_final[field].apply(lambda x: self._format_value(x, field))
1033
+ df_final[field] = df_final[field].apply(lambda x: _format_value_complete(x, field, False))
956
1034
 
957
1035
  # Select final columns
958
1036
  if self.location:
@@ -976,186 +1054,229 @@ class FutureReorder():
976
1054
 
977
1055
  return df_final[final_cols]
978
1056
 
1057
+ def _filter_periods(self, df):
1058
+ """
1059
+ Filter out period 0 and last period from results.
1060
+ Period 0 is used only as calculation base.
1061
+ Last period is filtered because it doesn't have next period transit data.
1062
+
1063
+ Special case: When start_date=None, don't filter the first period
1064
+ because it represents the actual current period.
1065
+ """
1066
+ if df.empty:
1067
+ return df
1068
+
1069
+ # Convert PurchaseDate to datetime for filtering
1070
+ df['PurchaseDate_dt'] = pd.to_datetime(df['PurchaseDate'])
1071
+
1072
+ # Get unique dates and sort them
1073
+ unique_dates = sorted(df['PurchaseDate_dt'].unique())
1074
+
1075
+ # Determine filtering logic based on start_date parameter
1076
+ if self.start_date is None:
1077
+ # When start_date=None, only filter the last period
1078
+ # Keep period 0 as it represents the current period
1079
+ if len(unique_dates) <= 1:
1080
+ self._log("⚠️ Warning: Only 1 period available, cannot filter last period")
1081
+ return pd.DataFrame(columns=df.columns.drop('PurchaseDate_dt'))
1082
+
1083
+ last_date = unique_dates[-1]
1084
+ filtered_df = df[df['PurchaseDate_dt'] != last_date].copy()
1085
+
1086
+ self._log(f"🔍 Filtered periods (start_date=None): Only removed last period ({last_date.strftime('%Y-%m-%d')})")
1087
+
1088
+ else:
1089
+ # When start_date is specified, filter both first and last periods (original logic)
1090
+ if len(unique_dates) <= 2:
1091
+ self._log("⚠️ Warning: Only 2 or fewer periods available after filtering")
1092
+ return pd.DataFrame(columns=df.columns.drop('PurchaseDate_dt'))
1093
+
1094
+ first_date = unique_dates[0]
1095
+ last_date = unique_dates[-1]
1096
+
1097
+ filtered_df = df[
1098
+ (df['PurchaseDate_dt'] != first_date) &
1099
+ (df['PurchaseDate_dt'] != last_date)
1100
+ ].copy()
1101
+
1102
+ self._log(f"🔍 Filtered periods: Removed period 0 ({first_date.strftime('%Y-%m-%d')}) and last period ({last_date.strftime('%Y-%m-%d')})")
1103
+
1104
+ # Drop the temporary datetime column
1105
+ filtered_df = filtered_df.drop('PurchaseDate_dt', axis=1)
1106
+
1107
+ return filtered_df
979
1108
 
980
1109
  def reorder(self):
981
1110
  """
982
- Main method to calculate future reorder recommendations.
983
-
984
- This optimized version uses batch processing and vectorization to improve
985
- performance, especially for large datasets. The method:
986
- 1. Generates future dates based on reorder frequencies
987
- 2. Groups items for batch processing when possible
988
- 3. Pre-allocates data structures to minimize memory operations
989
- 4. Uses vectorized calculations where applicable
990
- 5. Formats and returns consolidated results
1111
+ Main method to calculate future reorder recommendations at massive scale.
1112
+
1113
+ This high-performance method orchestrates the complete inventory reorder calculation
1114
+ process using parallel processing and intelligent resource management. It handles
1115
+ large datasets efficiently through batching, multiprocessing, and optimized algorithms.
1116
+
1117
+ Processing Pipeline:
1118
+ 1. Generate future dates based on reorder frequencies
1119
+ 2. Pre-filter and prepare data for batch processing
1120
+ 3. Split items into optimally-sized batches
1121
+ 4. Process batches in parallel using multiple CPU cores
1122
+ 5. Combine and format results with proper data types
1123
+ 6. Apply period filtering (removes period 0 and last period)
1124
+ 7. Return comprehensive reorder recommendations
1125
+
1126
+ Performance Features:
1127
+ - Auto-configures batch sizes based on dataset size
1128
+ - Uses ProcessPoolExecutor for true parallel processing
1129
+ - Provides real-time progress tracking and ETA calculations
1130
+ - Implements intelligent error handling and recovery
1131
+ - Optimizes memory usage through efficient data structures
1132
+
1133
+ Period Filtering Logic:
1134
+ - When start_date=None: Only removes last period (keeps period 0 as current)
1135
+ - When start_date specified: Removes both period 0 and last period
1136
+ - Last period is always removed due to incomplete transit data
991
1137
 
992
1138
  Returns:
993
- pd.DataFrame: Complete reorder recommendations for all items/locations
994
- and time periods. See _prepare_final_dataframe() for
995
- detailed column descriptions.
996
-
1139
+ pd.DataFrame: Complete reorder recommendations with columns:
1140
+ - PurchaseDate: Date when reorder should be evaluated
1141
+ - Item, ItemDescription, (Location): Item identification
1142
+ - Forecast metrics: SuggestedForecast, SuggestedForecastPeriod
1143
+ - Inventory levels: FutureInventoryTransit, FutureInventory, FutureTransit
1144
+ - FutureInventoryTransitArrival: Stock + arrivals in the period
1145
+ - FutureStockoutDays: Days of inventory coverage
1146
+ - Transit information: TransitArrival details
1147
+ - Reorder metrics: ReorderQtyBase, ReorderQty, ReorderQtyDays
1148
+ - Order information: ArrivalDate of current period's order
1149
+ - Planning parameters: PurchaseFactor, ReorderPoint, SecurityStock
1150
+ - Usage rates: AvgDailyUsage, MaxDailyUsage
1151
+ - Lead times: AvgLeadTime, MaxLeadTime
1152
+ - Coverage parameters: ReorderFreq, Coverage
1153
+
997
1154
  Example usage:
998
- >>> reorder_system = FutureReorder(
1155
+ >>> reorder_system = FutureReorderMassiveComplete(
999
1156
  ... df_inv=inventory_df,
1000
1157
  ... df_lead_time=lead_time_df,
1001
1158
  ... df_prep=prep_df,
1002
1159
  ... df_fcst=forecast_df,
1003
1160
  ... periods=6,
1004
- ... start_date='2024-01-01'
1161
+ ... start_date=None, # Use current date
1162
+ ... batch_size=100, # Optional: auto-configured if None
1163
+ ... n_workers=4 # Optional: auto-configured if None
1005
1164
  ... )
1006
1165
  >>> results = reorder_system.reorder()
1007
- >>> results.head()
1008
- # Returns DataFrame with reorder recommendations
1166
+ >>> print(f"Generated {len(results)} reorder recommendations")
1009
1167
  """
1168
+ start_time = time.time()
1010
1169
 
1170
+ self._log("🚀 FutureReorder Massive Complete - Processing Started")
1171
+
1172
+ # Generate future dates
1173
+ self._log("📅 Generando fechas futuras...")
1011
1174
  item_dates = self.future_date()
1012
1175
 
1013
- # Pre-allocate list for results instead of concatenating DataFrames
1014
- all_results = []
1176
+ if not item_dates:
1177
+ columns = ['Date', 'Item'] + (['Location'] if self.location else [])
1178
+ return pd.DataFrame(columns=columns)
1179
+
1180
+ # Prepare batch data first to get accurate count of items that will actually be processed
1181
+ batch_data = self._prepare_batch_data(item_dates)
1182
+
1183
+ # Calculate accurate statistics based on items that will actually be processed
1184
+ total_items = len(batch_data)
1185
+ if batch_data:
1186
+ # Get dates for items that will actually be processed
1187
+ processed_item_dates = {item_data[0]: item_data[1] for item_data in batch_data}
1188
+ avg_periods = np.mean([len(dates) for dates in processed_item_dates.values()])
1189
+ total_calculations = sum(len(dates) for dates in processed_item_dates.values())
1190
+ else:
1191
+ avg_periods = 0
1192
+ total_calculations = 0
1015
1193
 
1016
- # Group items by number of periods for potential batch processing
1017
- items_by_period_count = {}
1018
- for key, dates in item_dates.items():
1019
- period_count = len(dates)
1020
- if period_count not in items_by_period_count:
1021
- items_by_period_count[period_count] = []
1022
- items_by_period_count[period_count].append((key, dates))
1023
-
1024
- # Process each group
1025
- for period_count, items_group in items_by_period_count.items():
1026
- # For each item in the group
1027
- for key, dates in items_group:
1028
- if self.location:
1029
- item, location = key
1030
- else:
1031
- item = key
1032
- location = None
1033
-
1034
- # Get current dataframes
1035
- current_df_lead_time, current_df_inv = self._get_current_dataframes(item, location)
1036
-
1037
- if current_df_lead_time.empty or current_df_inv.empty:
1038
- continue
1039
-
1040
- # Process this item using optimized approach
1041
- item_results = self._process_item_optimized(
1042
- key, item, location, dates, current_df_lead_time, current_df_inv
1043
- )
1044
-
1045
- if item_results is not None and not item_results.empty:
1046
- all_results.append(item_results)
1194
+ self._log(f"📊 Dataset Info:")
1195
+ self._log(f" • Total Items: {total_items}")
1196
+ self._log(f" • Average Periods per Item: {avg_periods:.1f}")
1197
+ self._log(f" • Total Calculations: {total_calculations}")
1047
1198
 
1048
- # Combine all results efficiently
1049
- if all_results:
1050
- data_frame = pd.concat(all_results, ignore_index=True)
1051
- else:
1199
+ # batch_data already prepared above for accurate counting
1200
+ if not batch_data:
1201
+ self._log("⚠️ No items to process after filtering")
1052
1202
  columns = ['Date', 'Item'] + (['Location'] if self.location else [])
1053
- data_frame = pd.DataFrame(columns=columns)
1203
+ return pd.DataFrame(columns=columns)
1204
+
1205
+ self._log("🔧 Datos preparados por lotes...")
1206
+
1207
+ # Split into batches for parallel processing
1208
+ batches = []
1209
+ for i in range(0, len(batch_data), self.batch_size):
1210
+ batch = batch_data[i:i + self.batch_size]
1211
+ batch_args = (
1212
+ batch, self.df_fcst, self.df_prep, self.metadata,
1213
+ self.location, self.default_coverage, self.complete_suggested,
1214
+ self.security_stock_ref, self.integer, self.verbose, self.df_transit
1215
+ )
1216
+ batches.append(batch_args)
1054
1217
 
1055
- # Prepare and return final dataframe
1056
- return self._prepare_final_dataframe(data_frame)
1057
-
1058
-
1059
- def _process_item_optimized(self, key, item, location, dates, current_df_lead_time, current_df_inv):
1060
- """
1061
- Process a single item through all periods using optimized approach.
1062
-
1063
- This method pre-allocates arrays and uses vectorized operations where possible
1064
- to improve performance.
1065
-
1066
- Args:
1067
- key: Item key (item or (item, location))
1068
- item: Item identifier
1069
- location: Location identifier (if applicable)
1070
- dates: List of dates to process
1071
- current_df_lead_time: Lead time data for this item
1072
- current_df_inv: Inventory data for this item
1073
-
1074
- Returns:
1075
- pd.DataFrame: Results for all periods of this item
1076
- """
1218
+ total_batches = len(batches)
1219
+ items_per_batch = len(batch_data) / total_batches if total_batches > 0 else 0
1077
1220
 
1078
- # Pre-allocate dictionaries for intermediate results
1079
- suggested_forecasts = {}
1080
- df_avgs = {}
1081
- df_maxs = {}
1082
- df_sstocks = {}
1083
- period_results = {}
1221
+ self._log(f"⚙️ Processing Config:")
1222
+ self._log(f" • Batch Size: {self.batch_size}")
1223
+ self._log(f" • Workers: {self.n_workers}")
1224
+ self._log(f" • Total Batches: {total_batches}")
1225
+ self._log(f" • Items per Batch: {items_per_batch:.1f}")
1084
1226
 
1085
- # Initialize transit orders for this item
1086
- transit_orders = {key: []}
1227
+ current_time = datetime.now().strftime('%H:%M:%S')
1228
+ self._log(f"⏱️ Starting processing at {current_time}")
1087
1229
 
1088
- # Track last suggested forecast value for complete_suggested feature
1089
- last_suggested_value = None
1090
-
1091
- # Process each period
1092
- for i, date in enumerate(dates):
1093
- # Calculate suggested forecast (cached if possible)
1094
- suggested_forecasts[i] = self._calculate_suggested_forecast(
1095
- current_df_lead_time, current_df_inv, date, last_suggested_value
1096
- )
1097
-
1098
- # Update last_suggested_value for next iteration
1099
- if 'SuggestedForecast' in suggested_forecasts[i].columns:
1100
- last_suggested_value = suggested_forecasts[i]['SuggestedForecast'].iloc[0]
1101
-
1102
- # Calculate daily usage
1103
- df_avgs[i], df_maxs[i] = self._calculate_daily_usage(
1104
- suggested_forecasts[i], date
1105
- )
1106
-
1107
- # Calculate security stock data
1108
- df_sstocks[i] = self._calculate_security_stock_data(
1109
- df_maxs[i], current_df_lead_time, period_index=i, dates=dates
1110
- )
1111
-
1112
- # Process period based on whether it's current or future
1113
- if i == 0:
1114
- period_results[i] = self._process_current_period(
1115
- current_df_inv, df_sstocks[i], key, date, transit_orders, dates
1116
- )
1117
- else:
1118
- period_results[i] = self._process_future_period(
1119
- current_df_inv, df_sstocks[i], period_results[i-1],
1120
- key, date, dates, i, transit_orders
1121
- )
1230
+ # Process batches in parallel
1231
+ results = []
1232
+ completed_batches = 0
1233
+
1234
+ with ProcessPoolExecutor(max_workers=self.n_workers) as executor:
1235
+ # Submit all batches
1236
+ future_to_batch = {executor.submit(process_item_batch_complete, batch_args): i
1237
+ for i, batch_args in enumerate(batches)}
1122
1238
 
1123
- # Add metadata columns efficiently
1124
- period_results[i]['Date'] = date
1125
- period_results[i]['Item'] = item
1126
- if self.location:
1127
- period_results[i]['Location'] = location
1239
+ # Collect results as they complete
1240
+ for future in as_completed(future_to_batch):
1241
+ batch_idx = future_to_batch[future]
1242
+ try:
1243
+ result = future.result()
1244
+ if not result.empty:
1245
+ results.append(result)
1246
+
1247
+ completed_batches += 1
1248
+ progress = (completed_batches / total_batches) * 100
1249
+
1250
+ elapsed_time = time.time() - start_time
1251
+ if completed_batches > 0:
1252
+ eta_seconds = (elapsed_time / completed_batches) * (total_batches - completed_batches)
1253
+ eta_str = f"{int(eta_seconds // 60)}m {int(eta_seconds % 60)}s"
1254
+ else:
1255
+ eta_str = "calculating..."
1256
+
1257
+ self._log(f"✅ Batch {completed_batches}/{total_batches} completed ({progress:.1f}%) - ETA: {eta_str}")
1258
+
1259
+ except Exception as e:
1260
+ self._log(f"❌ Error in batch {batch_idx}: {e}")
1261
+ continue
1128
1262
 
1129
- # After processing all periods, update FutureInventoryTransitArrival with next period's TransitArrival
1130
- for i in range(len(dates)):
1131
- if i < len(dates) - 1: # If there's a next period
1132
- # Get next period's TransitArrival
1133
- next_transit_arrival = period_results[i + 1]['TransitArrival'].iloc[0]
1134
- transit_arrival_sum = self._sum_transit_arrivals(next_transit_arrival)
1135
- else: # Last period - no next period
1136
- transit_arrival_sum = 0
1263
+ # Combine all results
1264
+ if results:
1265
+ self._log("🔗 Combinando resultados...")
1266
+ final_result = pd.concat(results, ignore_index=True)
1137
1267
 
1138
- # Update FutureInventoryTransitArrival
1139
- period_results[i]['FutureInventoryTransitArrival'] = self._format_value(
1140
- period_results[i]['FutureInventory'].iloc[0] + transit_arrival_sum,
1141
- 'FutureInventoryTransitArrival'
1142
- )
1268
+ # Prepare final dataframe with proper formatting
1269
+ final_result = self._prepare_final_dataframe(final_result)
1143
1270
 
1144
- # Recalculate FutureStockoutDays with the updated FutureInventoryTransitArrival
1145
- period_results[i]['FutureStockoutDays'] = self._calculate_inventory_days(period_results[i])
1146
-
1147
- # Combine all periods for this item
1148
- if period_results:
1149
- # Stack all period results at once
1150
- item_df = pd.concat(period_results.values(), ignore_index=True)
1271
+ # Filter out period 0 and last period from results
1272
+ final_result = self._filter_periods(final_result)
1151
1273
 
1152
- # Reorder columns for consistency
1153
- cols = ['Date', 'Item']
1154
- if self.location:
1155
- cols.append('Location')
1156
- other_cols = [col for col in item_df.columns if col not in cols]
1157
- item_df = item_df[cols + other_cols]
1274
+ total_time = time.time() - start_time
1275
+ self._log(f"🎉 Processing completed in {total_time:.2f}s")
1276
+ self._log(f"📈 Final result: {len(final_result)} records")
1158
1277
 
1159
- return item_df
1160
-
1161
- return None
1278
+ return final_result
1279
+ else:
1280
+ self._log("⚠️ No results generated")
1281
+ columns = ['Date', 'Item'] + (['Location'] if self.location else [])
1282
+ return pd.DataFrame(columns=columns)