datupapi 1.112.0__py3-none-any.whl → 1.112.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -40,7 +40,7 @@ class InventoryFormat():
40
40
 
41
41
  colmuns_meta = ["Customer","Country","ProductType","Weight","Dimension","Color","Origen","Gama","Marca","MateriaPrima",
42
42
  "JefeProducto","JefeProductoDescription","GrupoCompra","Familia","Seccion","Categoria","SubCategoria","Linea",
43
- "SubLinea","Canal","InventoryUnit","Comments","DeliveryFactor","PurchaseOrderUnit","PalletFactor","MOQ"]
43
+ "SubLinea","Canal","InventoryUnit","Comments","DeliveryFactor","PurchaseOrderUnit","PalletFactor","MOQ","Metadata"]
44
44
 
45
45
  columns_advance = ["BackSuggestedForecast","NextSuggestedForecast","BackReorderQtyBase","BackReorderQty","NextReorderQtyBase",
46
46
  "NextReorderQty","MinOrderQty","MaxOrderQty","OtifOrder","TotalOrder","DelayDays","ShortFall"]
@@ -14,6 +14,101 @@ from datupapi.inventory.src.SuggestedForecast.suggested_forecast import Suggeste
14
14
  from datupapi.inventory.src.FutureInventory.daily_usage_future import DailyUsageFuture
15
15
 
16
16
 
17
+ def _generate_item_dates_worker(key, df_lead_time, periods, period2, start_date, start_date_zero, default_coverage, location):
18
+ """
19
+ Generate dates for a single item in the worker process context.
20
+ This function replicates the logic from future_date() but for a single item.
21
+
22
+ Args:
23
+ key: Item identifier (str) or (item, location) tuple
24
+ df_lead_time: Lead time DataFrame (filtered for this item)
25
+ periods: Number of periods to generate (for ReorderFreq > 20)
26
+ period2: Number of periods to generate (for ReorderFreq <= 20)
27
+ start_date: Start date for period 1 (can be None)
28
+ start_date_zero: Custom start date for period 0 (can be None)
29
+ default_coverage: Default coverage days
30
+ location: Boolean indicating location-based processing
31
+ (Note: This parameter is kept for interface consistency but is not
32
+ directly used in date generation logic, as dates depend on ReorderFreq
33
+ which is already in the filtered df_lead_time)
34
+
35
+ Returns:
36
+ List[str]: List of dates in 'YYYYMMDD' format
37
+ """
38
+ try:
39
+ # Determine the starting date for period 0
40
+ if start_date_zero is not None:
41
+ # Use custom start date for period 0
42
+ actual_date = pd.to_datetime(start_date_zero, format='%Y-%m-%d')
43
+ else:
44
+ # Use current system date for period 0 (original behavior)
45
+ DOCKER_CONFIG_PATH = os.path.join('/opt/ml/processing/input', 'config.yml')
46
+ utils = Utils(config_file=DOCKER_CONFIG_PATH, logfile='data_io', log_path='output/logs')
47
+ timestamp = utils.set_timestamp()
48
+ actual_date = pd.to_datetime(str(int(float(timestamp[0:8]))), format='%Y%m%d')
49
+
50
+ # Determine which period count to use based on ReorderFreq
51
+ reorder_freq = df_lead_time['ReorderFreq'].iloc[0]
52
+ if pd.isna(reorder_freq) or reorder_freq == 0:
53
+ reorder_freq = default_coverage
54
+ reorder_freq = int(reorder_freq)
55
+
56
+ # Use period2 for ReorderFreq <= 20, otherwise use periods
57
+ if reorder_freq <= 20:
58
+ effective_periods = period2
59
+ else:
60
+ effective_periods = periods
61
+
62
+ # Use effective_periods + 1 internally to calculate one extra period for transit calculations
63
+ end_date = actual_date + pd.DateOffset(months=effective_periods + 1)
64
+
65
+ # Handle start_date = None case
66
+ if start_date is None:
67
+ # If start_date is None, use actual_date as the base for period 1
68
+ base_start_date = actual_date
69
+ else:
70
+ base_start_date = pd.to_datetime(start_date, format='%Y-%m-%d')
71
+
72
+ # ReorderFreq was already calculated above, no need to recalculate
73
+
74
+ # Generate date range for this item
75
+ date_range = []
76
+
77
+ # Always include actual date (period 0)
78
+ date_range.append(actual_date)
79
+
80
+ # Include base_start_date if after actual_date
81
+ if base_start_date > actual_date:
82
+ date_range.append(base_start_date)
83
+
84
+ # Generate subsequent dates using a controlled loop instead of pd.date_range
85
+ current_date = base_start_date + timedelta(days=reorder_freq)
86
+ while current_date <= end_date:
87
+ date_range.append(current_date)
88
+ current_date += timedelta(days=reorder_freq)
89
+
90
+ # Convert to string format
91
+ date_strings = [d.strftime('%Y%m%d') for d in date_range]
92
+
93
+ return date_strings
94
+
95
+ except Exception as e:
96
+ print(f"Error generating dates for item {key}: {str(e)}")
97
+ # Return a minimal date list with just the current date
98
+ try:
99
+ if start_date_zero is not None:
100
+ actual_date = pd.to_datetime(start_date_zero, format='%Y-%m-%d')
101
+ else:
102
+ DOCKER_CONFIG_PATH = os.path.join('/opt/ml/processing/input', 'config.yml')
103
+ utils = Utils(config_file=DOCKER_CONFIG_PATH, logfile='data_io', log_path='output/logs')
104
+ timestamp = utils.set_timestamp()
105
+ actual_date = pd.to_datetime(str(int(float(timestamp[0:8]))), format='%Y%m%d')
106
+ return [actual_date.strftime('%Y%m%d')]
107
+ except:
108
+ # Last resort: return today's date
109
+ return [datetime.now().strftime('%Y%m%d')]
110
+
111
+
17
112
  def process_item_batch_complete(batch_args):
18
113
  """
19
114
  Process a batch of items in parallel with complete functionality.
@@ -24,7 +119,7 @@ def process_item_batch_complete(batch_args):
24
119
 
25
120
  Args:
26
121
  batch_args (tuple): Contains all necessary data for batch processing:
27
- - batch_items: List of item data tuples (key, dates, lead_time_df, inv_df)
122
+ - batch_items: List of item data tuples (key, lead_time_df, inv_df)
28
123
  - df_fcst: Forecast data DataFrame
29
124
  - df_prep: Preparation data DataFrame
30
125
  - metadata: List of metadata columns
@@ -35,22 +130,32 @@ def process_item_batch_complete(batch_args):
35
130
  - integer: Boolean for integer formatting of quantities
36
131
  - verbose: Boolean for detailed logging
37
132
  - df_transit: Transit schedule DataFrame (optional)
133
+ - periods: Number of periods to generate
134
+ - start_date: Start date for period 1 (can be None)
135
+ - start_date_zero: Custom start date for period 0 (can be None)
38
136
 
39
137
  Returns:
40
138
  pd.DataFrame: Combined results for all items in the batch, or empty DataFrame if errors
41
139
  """
42
140
  try:
43
141
  (batch_items, df_fcst, df_prep, metadata, location, default_coverage,
44
- complete_suggested, security_stock_ref, integer, verbose, df_transit) = batch_args
142
+ complete_suggested, security_stock_ref, integer, verbose, df_transit,
143
+ periods, period2, start_date, start_date_zero) = batch_args
45
144
 
46
145
  results = []
47
146
  processed_count = 0
48
147
  error_count = 0
49
148
 
50
149
  for item_data in batch_items:
51
- key, dates, current_df_lead_time, current_df_inv = item_data
150
+ key, current_df_lead_time, current_df_inv = item_data
52
151
 
53
152
  try:
153
+ # Generate dates for this item locally in the worker process
154
+ dates = _generate_item_dates_worker(
155
+ key, current_df_lead_time, periods, period2, start_date,
156
+ start_date_zero, default_coverage, location
157
+ )
158
+
54
159
  # Procesar este ítem usando la lógica completa con timeout implícito
55
160
  item_result = _process_item_complete(
56
161
  key, dates, current_df_lead_time, current_df_inv,
@@ -157,7 +262,11 @@ def _process_item_complete(key, dates, current_df_lead_time, current_df_inv,
157
262
 
158
263
  # Update last_suggested_value for next iteration
159
264
  if 'SuggestedForecast' in suggested_forecasts[i].columns:
160
- last_suggested_value = suggested_forecasts[i]['SuggestedForecast'].iloc[0]
265
+ new_suggested_value = suggested_forecasts[i]['SuggestedForecast'].iloc[0]
266
+
267
+ # Only update if the new value is not NaN
268
+ if not pd.isna(new_suggested_value):
269
+ last_suggested_value = new_suggested_value
161
270
 
162
271
  # Calculate daily usage with optimized calls
163
272
  df_avgs[i], df_maxs[i] = _calculate_daily_usage_complete(
@@ -192,7 +301,27 @@ def _process_item_complete(key, dates, current_df_lead_time, current_df_inv,
192
301
 
193
302
  except Exception as e:
194
303
  # Log error but continue with next period
195
- print(f"Warning: Error processing period {i} for item {key}: {str(e)}")
304
+ import traceback
305
+ tb = traceback.extract_tb(e.__traceback__)
306
+ function_name = tb[-1].name if tb else 'unknown'
307
+ line_number = tb[-1].lineno if tb else 'unknown'
308
+
309
+ print(f"Warning: Error processing period {i} for item {key}:")
310
+ print(f" Function: {function_name} (line {line_number})")
311
+ print(f" Error: {str(e)}")
312
+ print(f" Error type: {type(e).__name__}")
313
+ print(f" Date value: {repr(date)} (type: {type(date)})")
314
+ print(f" Period index: {i}")
315
+ print(f" Total dates available: {len(dates)}")
316
+
317
+ # Print more context for debugging
318
+ if hasattr(e, '__cause__') and e.__cause__:
319
+ print(f" Caused by: {str(e.__cause__)}")
320
+
321
+ # Print the full traceback for error analysis
322
+ print(f" Full traceback:")
323
+ traceback.print_exc()
324
+
196
325
  continue
197
326
 
198
327
  # After processing all periods, update FutureInventoryTransitArrival
@@ -233,7 +362,26 @@ def _process_item_complete(key, dates, current_df_lead_time, current_df_inv,
233
362
 
234
363
  except Exception as e:
235
364
  # Handle any unexpected errors at the item level
236
- print(f"Error processing item {key}: {str(e)}")
365
+ import traceback
366
+ tb = traceback.extract_tb(e.__traceback__)
367
+ function_name = tb[-1].name if tb else 'unknown'
368
+ line_number = tb[-1].lineno if tb else 'unknown'
369
+
370
+ print(f"Error processing item {key}:")
371
+ print(f" Function: {function_name} (line {line_number})")
372
+ print(f" Error: {str(e)}")
373
+ print(f" Error type: {type(e).__name__}")
374
+ print(f" Item dates: {dates[:5] if dates else 'None'}... (showing first 5)")
375
+ print(f" Total dates: {len(dates) if dates else 0}")
376
+
377
+ # Print more context for debugging
378
+ if hasattr(e, '__cause__') and e.__cause__:
379
+ print(f" Caused by: {str(e.__cause__)}")
380
+
381
+ # Print the full traceback for error analysis
382
+ print(f" Full traceback:")
383
+ traceback.print_exc()
384
+
237
385
  return None
238
386
 
239
387
 
@@ -282,11 +430,116 @@ def _format_value_complete(value, field_name, integer):
282
430
  return value
283
431
 
284
432
 
433
+ def _suggested_forecast_fallback(current_df_lead_time, current_df_inv, date, df_fcst, metadata, location, default_coverage):
434
+ """
435
+ Simplified SuggestedForecast fallback function for multiprocessing compatibility.
436
+
437
+ This function provides a basic forecast calculation when the main SuggestedForecast class fails
438
+ due to multiprocessing issues. It calculates the forecast using an average-based approach:
439
+
440
+ 1. Sum all forecasts in the coverage period
441
+ 2. Calculate daily average (sum / total_days_in_period)
442
+ 3. Multiply by coverage days
443
+ 4. Round up to nearest integer
444
+
445
+ Args:
446
+ current_df_lead_time: Lead time DataFrame for this item
447
+ current_df_inv: Inventory DataFrame for this item
448
+ date: Date string in 'YYYYMMDD' format
449
+ df_fcst: Forecast DataFrame
450
+ metadata: List of metadata columns
451
+ location: Boolean indicating location-based processing
452
+ default_coverage: Default coverage days
453
+
454
+ Returns:
455
+ pd.DataFrame: DataFrame with SuggestedForecast column
456
+ """
457
+ try:
458
+ # Parse the date
459
+ current_date = pd.to_datetime(date, format='%Y%m%d')
460
+
461
+ # Get coverage for this item
462
+ coverage = current_df_lead_time['Coverage'].iloc[0] if 'Coverage' in current_df_lead_time.columns else default_coverage
463
+ if pd.isna(coverage):
464
+ coverage = default_coverage
465
+ coverage = int(coverage)
466
+
467
+ # Calculate forecast end date
468
+ forecast_end_date = current_date + timedelta(days=coverage)
469
+
470
+ # Filter forecast data for this item and date range
471
+ if location:
472
+ item = current_df_inv['Item'].iloc[0]
473
+ loc = current_df_inv['Location'].iloc[0]
474
+ forecast_mask = (df_fcst['Item'] == item) & (df_fcst['Location'] == loc)
475
+ else:
476
+ item = current_df_inv['Item'].iloc[0]
477
+ forecast_mask = df_fcst['Item'] == item
478
+
479
+ # Add date range filter - get all forecast data for this item
480
+ forecast_mask &= (df_fcst['Date'] >= current_date) & (df_fcst['Date'] <= forecast_end_date)
481
+
482
+ item_forecast = df_fcst[forecast_mask]
483
+
484
+ # Calculate suggested forecast using average-based approach
485
+ if not item_forecast.empty and 'Forecast' in item_forecast.columns:
486
+ # Step 1: Sum all forecasts in the period
487
+ total_forecast = item_forecast['Forecast'].sum()
488
+
489
+ # Step 2: Calculate total days in the forecast period
490
+ # Simplification: assume 30 days per month for calculation
491
+ total_days_in_period = len(item_forecast) # Number of forecast records
492
+ if total_days_in_period == 0:
493
+ suggested_forecast = 0.0
494
+ else:
495
+ # Step 3: Calculate daily average
496
+ daily_average = total_forecast / total_days_in_period
497
+
498
+ # Step 4: Multiply by coverage days
499
+ suggested_forecast = daily_average * coverage
500
+
501
+ # Step 5: Round up to nearest integer
502
+ suggested_forecast = np.ceil(suggested_forecast)
503
+
504
+
505
+ else:
506
+ # Fallback: use 0 if no forecast data available
507
+ suggested_forecast = 0.0
508
+ item = current_df_inv['Item'].iloc[0]
509
+ location_msg = ""
510
+ if location and 'Location' in current_df_inv.columns:
511
+ loc = current_df_inv['Location'].iloc[0]
512
+ location_msg = f" at location {loc}"
513
+ print(f" ⚠️ No forecast data found for item {item}{location_msg}, using 0")
514
+
515
+ # Create result DataFrame
516
+ result_df = current_df_inv[metadata].copy()
517
+ result_df['SuggestedForecast'] = suggested_forecast
518
+
519
+ # Add required columns
520
+ result_df['PurchaseFactor'] = current_df_inv.get('PurchaseFactor', pd.Series([1])).iloc[0]
521
+ result_df['ItemDescription'] = current_df_inv.get('ItemDescription', pd.Series([''])).iloc[0]
522
+
523
+ return result_df
524
+
525
+ except Exception as e:
526
+ print(f" ❌ Fallback SuggestedForecast also failed: {str(e)}")
527
+ # Last resort: return basic structure with 0 forecast
528
+ result_df = current_df_inv[metadata].copy()
529
+ result_df['SuggestedForecast'] = 0.0
530
+ result_df['PurchaseFactor'] = current_df_inv.get('PurchaseFactor', pd.Series([1])).iloc[0]
531
+ result_df['ItemDescription'] = current_df_inv.get('ItemDescription', pd.Series([''])).iloc[0]
532
+ return result_df
533
+
534
+
285
535
  def _calculate_suggested_forecast_complete(current_df_lead_time, current_df_inv, date, last_suggested_value,
286
- df_fcst, df_prep, metadata, location, default_coverage, complete_suggested):
536
+ df_fcst, df_prep, metadata, location, default_coverage, complete_suggested):
287
537
  """Calculate suggested forecast for the given date using the SuggestedForecast class."""
288
538
  # Convert current date to datetime
289
- current_date = pd.to_datetime(date, format='%Y%m%d')
539
+ try:
540
+ current_date = pd.to_datetime(date, format='%Y%m%d')
541
+ except Exception as e:
542
+ raise ValueError(f"_calculate_suggested_forecast_complete: Invalid date '{date}' - {str(e)}")
290
543
 
291
544
  # Get the maximum forecast date available
292
545
  max_forecast_date = df_fcst['Date'].max()
@@ -329,21 +582,36 @@ def _calculate_suggested_forecast_complete(current_df_lead_time, current_df_inv,
329
582
  join_='left'
330
583
  ).suggested_forecast()
331
584
  except Exception as e:
332
- # Get item identifier for error message
333
- item = current_df_inv['Item'].iloc[0]
334
- location_msg = ""
335
- if location and 'Location' in current_df_inv.columns:
336
- loc = current_df_inv['Location'].iloc[0]
337
- location_msg = f" at location {loc}"
585
+ print(f" ❌ Initial calculation failed: {str(e)}")
586
+ print(f" 🔄 Attempting fallback SuggestedForecast calculation...")
338
587
 
339
- error_msg = (
340
- f"Cannot calculate initial forecast for item {item}{location_msg}. "
341
- f"Forecast data extends only to {max_forecast_date.strftime('%Y-%m-%d')}, "
342
- f"but coverage of {int(coverage)} days from {current_date.strftime('%Y-%m-%d')} "
343
- f"requires forecast data until {required_forecast_end_date.strftime('%Y-%m-%d')}. "
344
- f"Original error: {str(e)}"
345
- )
346
- raise ValueError(error_msg)
588
+ try:
589
+ # Use simplified fallback function
590
+ fallback_result = _suggested_forecast_fallback(
591
+ current_df_lead_time, current_df_inv, date, df_fcst,
592
+ metadata, location, default_coverage
593
+ )
594
+
595
+ return fallback_result
596
+
597
+ except Exception as fallback_error:
598
+ print(f" ❌ Fallback initial calculation also failed: {str(fallback_error)}")
599
+
600
+ # Get item identifier for error message
601
+ item = current_df_inv['Item'].iloc[0]
602
+ location_msg = ""
603
+ if location and 'Location' in current_df_inv.columns:
604
+ loc = current_df_inv['Location'].iloc[0]
605
+ location_msg = f" at location {loc}"
606
+
607
+ error_msg = (
608
+ f"Cannot calculate initial forecast for item {item}{location_msg}. "
609
+ f"Forecast data extends only to {max_forecast_date.strftime('%Y-%m-%d')}, "
610
+ f"but coverage of {int(coverage)} days from {current_date.strftime('%Y-%m-%d')} "
611
+ f"requires forecast data until {required_forecast_end_date.strftime('%Y-%m-%d')}. "
612
+ f"Original error: {str(e)}"
613
+ )
614
+ raise ValueError(error_msg)
347
615
  else:
348
616
  # Get item identifier for error message
349
617
  item = current_df_inv['Item'].iloc[0]
@@ -361,36 +629,73 @@ def _calculate_suggested_forecast_complete(current_df_lead_time, current_df_inv,
361
629
  raise ValueError(error_msg)
362
630
 
363
631
  # If validation passes, proceed with the original calculation
364
- return SuggestedForecast(
365
- df_LeadTimes=current_df_lead_time,
366
- df_Forecast=df_fcst,
367
- df_Prep=df_prep,
368
- df_inv=current_df_inv,
369
- column_forecast='SuggestedForecast',
370
- columns_metadata=metadata,
371
- frequency_='M',
372
- location=location,
373
- actualdate=date,
374
- default_coverage_=default_coverage,
375
- join_='left'
376
- ).suggested_forecast()
632
+ try:
633
+ result = SuggestedForecast(
634
+ df_LeadTimes=current_df_lead_time,
635
+ df_Forecast=df_fcst,
636
+ df_Prep=df_prep,
637
+ df_inv=current_df_inv,
638
+ column_forecast='SuggestedForecast',
639
+ columns_metadata=metadata,
640
+ frequency_='M',
641
+ location=location,
642
+ actualdate=date,
643
+ default_coverage_=default_coverage,
644
+ join_='left'
645
+ ).suggested_forecast()
646
+
647
+
648
+ return result
649
+
650
+ except Exception as e:
651
+ print(f" ❌ Normal calculation failed: {str(e)}")
652
+ print(f" 🔄 Attempting fallback SuggestedForecast calculation...")
653
+
654
+ try:
655
+ # Use simplified fallback function
656
+ fallback_result = _suggested_forecast_fallback(
657
+ current_df_lead_time, current_df_inv, date, df_fcst,
658
+ metadata, location, default_coverage
659
+ )
660
+
661
+
662
+ return fallback_result
663
+
664
+ except Exception as fallback_error:
665
+ print(f" ❌ Fallback calculation also failed: {str(fallback_error)}")
666
+ # Re-raise the original error
667
+ raise e
377
668
 
378
669
 
379
670
  def _calculate_daily_usage_complete(suggested_forecast_df, date, df_fcst, location):
380
671
  """Calculate average and maximum daily usage rates."""
381
- df_avg = DailyUsageFuture(
382
- location=location,
383
- column_forecast='SuggestedForecast',
384
- date=date,
385
- df_fcst=df_fcst
386
- ).daily_usage(suggested_forecast_df, 'AvgDailyUsage').fillna(0)
387
-
388
- df_max = DailyUsageFuture(
389
- location=location,
390
- column_forecast='SuggestedForecast',
391
- date=date,
392
- df_fcst=df_fcst
393
- ).daily_usage(df_avg, 'MaxDailyUsage').fillna(0)
672
+
673
+ try:
674
+ df_avg = DailyUsageFuture(
675
+ location=location,
676
+ column_forecast='SuggestedForecast',
677
+ date=date,
678
+ df_fcst=df_fcst
679
+ ).daily_usage(suggested_forecast_df, 'AvgDailyUsage').fillna(0)
680
+
681
+ df_max = DailyUsageFuture(
682
+ location=location,
683
+ column_forecast='SuggestedForecast',
684
+ date=date,
685
+ df_fcst=df_fcst
686
+ ).daily_usage(df_avg, 'MaxDailyUsage').fillna(0)
687
+
688
+ except Exception as e:
689
+ print(f" ❌ DailyUsageFuture error: {str(e)}")
690
+ print(f" ❌ Error type: {type(e).__name__}")
691
+
692
+ # Print more detailed error info
693
+ import traceback
694
+ print(f" ❌ Full traceback:")
695
+ traceback.print_exc()
696
+
697
+ # Re-raise the original error to maintain the error flow
698
+ raise e
394
699
 
395
700
  return df_avg, df_max
396
701
 
@@ -442,8 +747,29 @@ def _calculate_security_stock_data_complete(df_max, current_df_lead_time, defaul
442
747
  # Calculate SuggestedForecastPeriod
443
748
  if period_index == 0 and dates is not None and len(dates) > 1:
444
749
  # For period 0, use days to next period instead of reorder frequency
445
- current_date = pd.to_datetime(dates[0], format='%Y%m%d')
446
- next_date = pd.to_datetime(dates[1], format='%Y%m%d')
750
+ try:
751
+ # Validate dates array and indices
752
+ if len(dates) < 2:
753
+ raise ValueError(f"Insufficient dates for period 0 calculation: need at least 2 dates, got {len(dates)}")
754
+
755
+ # Validate date formats before conversion
756
+ if not isinstance(dates[0], str) or len(dates[0]) != 8:
757
+ raise ValueError(f"Invalid dates[0] format: {repr(dates[0])} (expected 8-character string)")
758
+ if not isinstance(dates[1], str) or len(dates[1]) != 8:
759
+ raise ValueError(f"Invalid dates[1] format: {repr(dates[1])} (expected 8-character string)")
760
+
761
+ current_date = pd.to_datetime(dates[0], format='%Y%m%d')
762
+ next_date = pd.to_datetime(dates[1], format='%Y%m%d')
763
+
764
+ except Exception as e:
765
+ error_msg = f"_calculate_security_stock_data_complete: Date processing error - "
766
+ error_msg += f"dates[0]='{dates[0] if len(dates) > 0 else 'MISSING'}' "
767
+ error_msg += f"(type: {type(dates[0]) if len(dates) > 0 else 'N/A'}), "
768
+ error_msg += f"dates[1]='{dates[1] if len(dates) > 1 else 'MISSING'}' "
769
+ error_msg += f"(type: {type(dates[1]) if len(dates) > 1 else 'N/A'}), "
770
+ error_msg += f"period_index={period_index}, dates_length={len(dates)}, "
771
+ error_msg += f"original_error: {str(e)}"
772
+ raise ValueError(error_msg)
447
773
  days_to_next_period = (next_date - current_date).days
448
774
 
449
775
  # Formula: SuggestedForecast × (days_to_next_period / coverage)
@@ -509,7 +835,10 @@ def _prepare_transit_schedule_complete(key, transit_amount, dates, df_transit, l
509
835
  if df_transit is None:
510
836
  # Default logic: complete transit arrives in period 1
511
837
  if len(dates) > 1:
512
- arrival_date = pd.to_datetime(dates[1], format='%Y%m%d')
838
+ try:
839
+ arrival_date = pd.to_datetime(dates[1], format='%Y%m%d')
840
+ except Exception as e:
841
+ raise ValueError(f"_prepare_transit_schedule_complete: Invalid date dates[1]='{dates[1]}' - {str(e)}")
513
842
  transit_schedule.append({
514
843
  'quantity': transit_amount,
515
844
  'arrival_date': arrival_date
@@ -540,7 +869,10 @@ def _prepare_transit_schedule_complete(key, transit_amount, dates, df_transit, l
540
869
  else:
541
870
  # If no transit data provided for this item, use default logic
542
871
  if len(dates) > 1:
543
- arrival_date = pd.to_datetime(dates[1], format='%Y%m%d')
872
+ try:
873
+ arrival_date = pd.to_datetime(dates[1], format='%Y%m%d')
874
+ except Exception as e:
875
+ raise ValueError(f"_prepare_transit_schedule_complete: Invalid fallback date dates[1]='{dates[1]}' - {str(e)}")
544
876
  transit_schedule.append({
545
877
  'quantity': transit_amount,
546
878
  'arrival_date': arrival_date
@@ -648,8 +980,30 @@ def _process_future_period_complete(current_df_inv, df_sstock, df_previous, key,
648
980
  previous_stock = df_previous['FutureInventory'].iloc[0] - consumption
649
981
 
650
982
  # Process transit orders - EXACTLY like future_reorder_optimized line 467-473
651
- current_date = pd.to_datetime(date, format='%Y%m%d')
652
- previous_date = pd.to_datetime(dates[i-1], format='%Y%m%d')
983
+ try:
984
+ # Validate indices before accessing dates array
985
+ if i <= 0:
986
+ raise ValueError(f"Invalid period index {i} for future period processing (must be > 0)")
987
+ if i-1 >= len(dates):
988
+ raise ValueError(f"Previous period index {i-1} is out of bounds for dates array of length {len(dates)}")
989
+
990
+ # Validate date values before conversion
991
+ if not isinstance(date, str) or len(date) != 8:
992
+ raise ValueError(f"Invalid current date format: {repr(date)} (expected 8-character string)")
993
+ if not isinstance(dates[i-1], str) or len(dates[i-1]) != 8:
994
+ raise ValueError(f"Invalid previous date format: {repr(dates[i-1])} (expected 8-character string)")
995
+
996
+ current_date = pd.to_datetime(date, format='%Y%m%d')
997
+ previous_date = pd.to_datetime(dates[i-1], format='%Y%m%d')
998
+
999
+ except Exception as e:
1000
+ error_msg = f"_process_future_period_complete: Date processing error - "
1001
+ error_msg += f"current='{date}' (type: {type(date)}), "
1002
+ error_msg += f"previous='{dates[i-1] if i-1 < len(dates) else 'INDEX_OUT_OF_BOUNDS'}' "
1003
+ error_msg += f"(type: {type(dates[i-1]) if i-1 < len(dates) else 'N/A'}), "
1004
+ error_msg += f"period_index={i}, dates_length={len(dates)}, "
1005
+ error_msg += f"original_error: {str(e)}"
1006
+ raise ValueError(error_msg)
653
1007
 
654
1008
  stock_from_arrivals, new_transit, transit_arrivals = _process_transit_orders_complete(
655
1009
  transit_orders, key, current_date, previous_date
@@ -712,12 +1066,38 @@ class FutureReorder():
712
1066
  """
713
1067
  Versión completa optimizada para procesamiento masivo de datasets grandes.
714
1068
  Incluye TODA la funcionalidad de la clase original pero optimizada para paralelización.
1069
+
1070
+ Nueva funcionalidad period2:
1071
+ - period2 controla el número de períodos para ítems con ReorderFreq <= 20
1072
+ - periods controla el número de períodos para ítems con ReorderFreq > 20
1073
+ - Esto permite reducir el número de resultados para ítems con frecuencias de reorden pequeñas
715
1074
  """
716
1075
 
717
1076
  def __init__(self, df_inv, df_lead_time, df_prep, df_fcst, periods, start_date,
718
1077
  location=False, security_stock_ref=False, df_transit=None, integer=True,
719
1078
  complete_suggested=False, start_date_zero=None, batch_size=None, n_workers=None,
720
- verbose=True):
1079
+ verbose=True, period2=2):
1080
+ """
1081
+ Initialize FutureReorder with enhanced period control.
1082
+
1083
+ Args:
1084
+ df_inv: Inventory DataFrame
1085
+ df_lead_time: Lead time DataFrame
1086
+ df_prep: Preparation DataFrame
1087
+ df_fcst: Forecast DataFrame
1088
+ periods: Number of periods for items with ReorderFreq > 20
1089
+ start_date: Start date for calculations
1090
+ location: Boolean for location-based processing
1091
+ security_stock_ref: Boolean for reference-based security stock
1092
+ df_transit: Transit DataFrame (optional)
1093
+ integer: Boolean for integer formatting
1094
+ complete_suggested: Boolean for complete suggested forecast mode
1095
+ start_date_zero: Custom start date for period 0
1096
+ batch_size: Batch size for parallel processing (auto-configured if None)
1097
+ n_workers: Number of workers for parallel processing (auto-configured if None)
1098
+ verbose: Boolean for detailed logging
1099
+ period2: Number of periods for items with ReorderFreq <= 20 (default: 2)
1100
+ """
721
1101
 
722
1102
  # Original parameters - TODOS los parámetros de la clase original
723
1103
  self.df_inv = df_inv
@@ -726,6 +1106,7 @@ class FutureReorder():
726
1106
  self.df_fcst = df_fcst
727
1107
  self.default_coverage = 30
728
1108
  self.periods = periods
1109
+ self.period2 = period2
729
1110
  self.start_date = pd.to_datetime(start_date, format='%Y-%m-%d') if start_date is not None else None
730
1111
  self.location = location
731
1112
  self.security_stock_ref = security_stock_ref
@@ -785,7 +1166,7 @@ class FutureReorder():
785
1166
  'df_fcst': len(self.df_fcst),
786
1167
  'df_transit': len(self.df_transit) if self.df_transit is not None else 0
787
1168
  }
788
- self._log("🔍 Pre-filtering dataframes based on df_inv...")
1169
+ self._log("📊 Pre-filtering dataframes based on df_inv...")
789
1170
 
790
1171
  # Create base filter from df_inv
791
1172
  if self.location:
@@ -967,16 +1348,26 @@ class FutureReorder():
967
1348
 
968
1349
  return item_dates
969
1350
 
970
- def _prepare_batch_data(self, item_dates):
971
- """Prepara datos por lotes de manera eficiente. Replicates exactly the logic from future_reorder_optimized."""
1351
+ def _prepare_batch_data(self):
1352
+ """
1353
+ Prepara datos por lotes de manera eficiente sin generar fechas pre-calculadas.
1354
+ Las fechas se generarán localmente en cada worker process.
1355
+ """
972
1356
  batch_data = []
973
1357
 
974
- for key, dates in item_dates.items():
1358
+ # Get unique items from df_inv
1359
+ if self.location:
1360
+ unique_items = self.df_inv[['Item', 'Location']].drop_duplicates()
1361
+ else:
1362
+ unique_items = self.df_inv[['Item']].drop_duplicates()
1363
+
1364
+ for _, row in unique_items.iterrows():
975
1365
  try:
976
- # Get dataframes for this item - EXACTLY like future_reorder_optimized _get_current_dataframes_optimized
977
1366
  if self.location:
1367
+ key = (row['Item'], row['Location'])
978
1368
  item, location = key
979
1369
  else:
1370
+ key = row['Item']
980
1371
  item = key
981
1372
  location = None
982
1373
 
@@ -994,7 +1385,8 @@ class FutureReorder():
994
1385
  current_df_inv = self.df_inv[mask_inv]
995
1386
 
996
1387
  if not current_df_lead_time.empty and not current_df_inv.empty:
997
- batch_data.append((key, dates, current_df_lead_time, current_df_inv))
1388
+ # Only include key and dataframes, dates will be generated in worker
1389
+ batch_data.append((key, current_df_lead_time, current_df_inv))
998
1390
 
999
1391
  except Exception as e:
1000
1392
  if self.verbose:
@@ -1083,7 +1475,7 @@ class FutureReorder():
1083
1475
  last_date = unique_dates[-1]
1084
1476
  filtered_df = df[df['PurchaseDate_dt'] != last_date].copy()
1085
1477
 
1086
- self._log(f"🔍 Filtered periods (start_date=None): Only removed last period ({last_date.strftime('%Y-%m-%d')})")
1478
+ self._log(f"📊 Filtered periods (start_date=None): Only removed last period ({last_date.strftime('%Y-%m-%d')})")
1087
1479
 
1088
1480
  else:
1089
1481
  # When start_date is specified, filter both first and last periods (original logic)
@@ -1099,7 +1491,7 @@ class FutureReorder():
1099
1491
  (df['PurchaseDate_dt'] != last_date)
1100
1492
  ].copy()
1101
1493
 
1102
- self._log(f"🔍 Filtered periods: Removed period 0 ({first_date.strftime('%Y-%m-%d')}) and last period ({last_date.strftime('%Y-%m-%d')})")
1494
+ self._log(f"📊 Filtered periods: Removed period 0 ({first_date.strftime('%Y-%m-%d')}) and last period ({last_date.strftime('%Y-%m-%d')})")
1103
1495
 
1104
1496
  # Drop the temporary datetime column
1105
1497
  filtered_df = filtered_df.drop('PurchaseDate_dt', axis=1)
@@ -1115,7 +1507,7 @@ class FutureReorder():
1115
1507
  large datasets efficiently through batching, multiprocessing, and optimized algorithms.
1116
1508
 
1117
1509
  Processing Pipeline:
1118
- 1. Generate future dates based on reorder frequencies
1510
+ 1. Generate future dates based on reorder frequencies and period control
1119
1511
  2. Pre-filter and prepare data for batch processing
1120
1512
  3. Split items into optimally-sized batches
1121
1513
  4. Process batches in parallel using multiple CPU cores
@@ -1130,6 +1522,11 @@ class FutureReorder():
1130
1522
  - Implements intelligent error handling and recovery
1131
1523
  - Optimizes memory usage through efficient data structures
1132
1524
 
1525
+ Period Control Logic:
1526
+ - Items with ReorderFreq <= 20: Uses period2 (default: 2 periods)
1527
+ - Items with ReorderFreq > 20: Uses periods parameter
1528
+ - This reduces output volume for high-frequency reorder items
1529
+
1133
1530
  Period Filtering Logic:
1134
1531
  - When start_date=None: Only removes last period (keeps period 0 as current)
1135
1532
  - When start_date specified: Removes both period 0 and last period
@@ -1152,13 +1549,14 @@ class FutureReorder():
1152
1549
  - Coverage parameters: ReorderFreq, Coverage
1153
1550
 
1154
1551
  Example usage:
1155
- >>> reorder_system = FutureReorderMassiveComplete(
1552
+ >>> reorder_system = FutureReorder(
1156
1553
  ... df_inv=inventory_df,
1157
1554
  ... df_lead_time=lead_time_df,
1158
1555
  ... df_prep=prep_df,
1159
1556
  ... df_fcst=forecast_df,
1160
- ... periods=6,
1557
+ ... periods=6, # For items with ReorderFreq > 20
1161
1558
  ... start_date=None, # Use current date
1559
+ ... period2=2, # For items with ReorderFreq <= 20
1162
1560
  ... batch_size=100, # Optional: auto-configured if None
1163
1561
  ... n_workers=4 # Optional: auto-configured if None
1164
1562
  ... )
@@ -1169,41 +1567,24 @@ class FutureReorder():
1169
1567
 
1170
1568
  self._log("🚀 FutureReorder Massive Complete - Processing Started")
1171
1569
 
1172
- # Generate future dates
1173
- self._log("📅 Generando fechas futuras...")
1174
- item_dates = self.future_date()
1175
-
1176
- if not item_dates:
1177
- columns = ['Date', 'Item'] + (['Location'] if self.location else [])
1178
- return pd.DataFrame(columns=columns)
1179
-
1180
- # Prepare batch data first to get accurate count of items that will actually be processed
1181
- batch_data = self._prepare_batch_data(item_dates)
1570
+ # Prepare batch data without pre-generating dates
1571
+ self._log("🔧 Preparando datos por lotes...")
1572
+ batch_data = self._prepare_batch_data()
1182
1573
 
1183
- # Calculate accurate statistics based on items that will actually be processed
1574
+ # Calculate statistics based on items that will be processed
1184
1575
  total_items = len(batch_data)
1185
- if batch_data:
1186
- # Get dates for items that will actually be processed
1187
- processed_item_dates = {item_data[0]: item_data[1] for item_data in batch_data}
1188
- avg_periods = np.mean([len(dates) for dates in processed_item_dates.values()])
1189
- total_calculations = sum(len(dates) for dates in processed_item_dates.values())
1190
- else:
1191
- avg_periods = 0
1192
- total_calculations = 0
1193
1576
 
1194
1577
  self._log(f"📊 Dataset Info:")
1195
1578
  self._log(f" • Total Items: {total_items}")
1196
- self._log(f" • Average Periods per Item: {avg_periods:.1f}")
1197
- self._log(f" • Total Calculations: {total_calculations}")
1579
+ self._log(f" • Periods (ReorderFreq > 20): {self.periods}")
1580
+ self._log(f" • Period2 (ReorderFreq <= 20): {self.period2}")
1581
+ self._log(f" • Estimated Total Calculations: {total_items * self.periods}")
1198
1582
 
1199
- # batch_data already prepared above for accurate counting
1200
1583
  if not batch_data:
1201
1584
  self._log("⚠️ No items to process after filtering")
1202
1585
  columns = ['Date', 'Item'] + (['Location'] if self.location else [])
1203
1586
  return pd.DataFrame(columns=columns)
1204
1587
 
1205
- self._log("🔧 Datos preparados por lotes...")
1206
-
1207
1588
  # Split into batches for parallel processing
1208
1589
  batches = []
1209
1590
  for i in range(0, len(batch_data), self.batch_size):
@@ -1211,7 +1592,9 @@ class FutureReorder():
1211
1592
  batch_args = (
1212
1593
  batch, self.df_fcst, self.df_prep, self.metadata,
1213
1594
  self.location, self.default_coverage, self.complete_suggested,
1214
- self.security_stock_ref, self.integer, self.verbose, self.df_transit
1595
+ self.security_stock_ref, self.integer, self.verbose, self.df_transit,
1596
+ self.periods, self.period2, self.start_date.strftime('%Y-%m-%d') if self.start_date else None,
1597
+ self.start_date_zero
1215
1598
  )
1216
1599
  batches.append(batch_args)
1217
1600
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datupapi
3
- Version: 1.112.0
3
+ Version: 1.112.2
4
4
  Summary: Utility library to support Datup AI MLOps processes
5
5
  Author: Datup AI
6
6
  Author-email: ramiro@datup.ai
@@ -19,10 +19,10 @@ datupapi/inventory/conf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG
19
19
  datupapi/inventory/src/DailyUsage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
20
  datupapi/inventory/src/DailyUsage/daily_usage.py,sha256=q45uF22HuwmeChhmRM3AJxvcSkzrLNpQokgGYl1izcg,10920
21
21
  datupapi/inventory/src/Format/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- datupapi/inventory/src/Format/inventory_format.py,sha256=qrHkr6orSMdiraRH69nKawW1WBi_OFbqei7z2LJeSNI,7080
22
+ datupapi/inventory/src/Format/inventory_format.py,sha256=W8GPg5VwCnBSia7Wc9mj9CwckdxhYyOGUwP2I8av6KI,7091
23
23
  datupapi/inventory/src/FutureInventory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  datupapi/inventory/src/FutureInventory/daily_usage_future.py,sha256=jsW3h-rsLo34lEX--KuRo6Qnk5svI5aPLxenPoNUlRI,3458
25
- datupapi/inventory/src/FutureInventory/future_reorder.py,sha256=Ymsu1dhWewcgmNBdBYmtpJ0pIiuGrtB2p2iaNMEjKPQ,57758
25
+ datupapi/inventory/src/FutureInventory/future_reorder.py,sha256=qlCgUDLxxsZLFtRaqexlWSr9cQeugZfrkjvIM6_rmck,75968
26
26
  datupapi/inventory/src/InventoryFunctions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  datupapi/inventory/src/InventoryFunctions/functions_inventory.py,sha256=RgKlF_YTuIUs03CLGpekPqmTaRvbsvwIn-62ClWqNGg,13319
28
28
  datupapi/inventory/src/ProcessForecast/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -48,7 +48,7 @@ datupapi/transform/forecasting.py,sha256=OboiVyErzWXJAv6R4fCXiPNaoVp5dNAP9F53EDq
48
48
  datupapi/transform/ranking.py,sha256=XOI0XqMx9Cy52Xjc4LCzJCNUsJZNjgrPky7nrpELr-U,7943
49
49
  datupapi/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
50
  datupapi/utils/utils.py,sha256=pU3mXPupm-1gvODI-kPlIpOdMHa2F9lEXvqBn6t3ajc,4637
51
- datupapi-1.112.0.dist-info/METADATA,sha256=EEUvI5IMKMkM-ke6YfBevMSQv8iDlc13G1kCWoyJfFI,1516
52
- datupapi-1.112.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
53
- datupapi-1.112.0.dist-info/top_level.txt,sha256=oERwtRZu8xq2u1TDGwJwuWK0iJbH4p7x9kYECAL5So0,9
54
- datupapi-1.112.0.dist-info/RECORD,,
51
+ datupapi-1.112.2.dist-info/METADATA,sha256=q_XO4eLpCV8aICr_WBnDnAHiDBs7LJjnxbTVcUNShUs,1516
52
+ datupapi-1.112.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
53
+ datupapi-1.112.2.dist-info/top_level.txt,sha256=oERwtRZu8xq2u1TDGwJwuWK0iJbH4p7x9kYECAL5So0,9
54
+ datupapi-1.112.2.dist-info/RECORD,,