sibi-dst 2025.1.11__py3-none-any.whl → 2025.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import datetime
2
4
  from typing import Union, Tuple, Callable, Dict, Optional
3
5
 
@@ -306,64 +308,62 @@ class FileAgeChecker:
306
308
 
307
309
 
308
310
  # --- Vectorized Helper Functions ---
309
- # These replace the slow, row-by-row .apply() logic. They operate
310
- # on entire DataFrame partitions for maximum efficiency.
311
-
312
- def _vectorized_busday_count(
313
- partition: pd.DataFrame,
314
- begin_col: str,
315
- end_col: str,
316
- holidays: list
317
- ) -> pd.Series:
318
- """Vectorized function to count business days on a DataFrame partition."""
319
- if partition.empty:
320
- return pd.Series([], dtype=float)
321
-
322
- # Convert entire columns to datetime at once, coercing errors to NaT
323
- start_dates = pd.to_datetime(partition[begin_col], errors='coerce').dt.date
324
- end_dates = pd.to_datetime(partition[end_col], errors='coerce').dt.date
325
-
326
- # Create a result series filled with NaN to handle rows with invalid dates
327
- result = pd.Series(np.nan, index=partition.index, dtype=float)
328
-
329
- # Create a boolean mask for valid, non-NaT date pairs
311
+
312
+ def _vectorized_busday_count(partition, begin_col, end_col, holidays):
313
+ """
314
+ Calculates the number of business days between a start and end date.
315
+ """
316
+ # Extract the raw columns
317
+ start_dates_raw = partition[begin_col]
318
+ end_dates_raw = partition[end_col]
319
+
320
+
321
+ start_dates = pd.to_datetime(start_dates_raw, errors='coerce')
322
+ end_dates = pd.to_datetime(end_dates_raw, errors='coerce')
323
+
324
+ # Initialize the result Series with NaN, as the output is a number
325
+ result = pd.Series(np.nan, index=partition.index)
326
+
327
+ # Create a mask for rows where both start and end dates are valid
330
328
  valid_mask = pd.notna(start_dates) & pd.notna(end_dates)
331
329
 
332
- # Perform the vectorized calculation only on the valid subset of dates
330
+ # Perform the vectorized calculation only on the valid subset
331
+ # Convert to NumPy arrays of date type for the calculation
333
332
  result.loc[valid_mask] = np.busday_count(
334
- start_dates[valid_mask],
335
- end_dates[valid_mask],
333
+ start_dates[valid_mask].values.astype('datetime64[D]'),
334
+ end_dates[valid_mask].values.astype('datetime64[D]'),
336
335
  holidays=holidays
337
336
  )
338
- return result
339
337
 
338
+ return result
340
339
 
341
- def _vectorized_sla_end_date(
342
- partition: pd.DataFrame,
343
- start_col: str,
344
- n_days_col: str,
345
- holidays: list
346
- ) -> pd.Series:
347
- """Vectorized function to calculate the SLA end date on a DataFrame partition."""
348
- if partition.empty:
349
- return pd.Series([], dtype='datetime64[ns]')
350
340
 
351
- start_dates = pd.to_datetime(partition[start_col], errors='coerce').dt.date
341
+ def _vectorized_sla_end_date(partition, start_col, n_days_col, holidays):
342
+ """
343
+ Calculates the end date of an SLA, skipping weekends and holidays.
344
+ """
345
+ # Extract the relevant columns as pandas Series
346
+ start_dates_raw = partition[start_col]
352
347
  sla_days = partition[n_days_col]
353
348
 
354
- # Create a result series filled with NaT for rows with invalid start dates
349
+
350
+ start_dates = pd.to_datetime(start_dates_raw, errors='coerce')
351
+
352
+ # Initialize the result Series with NaT (Not a Time)
355
353
  result = pd.Series(pd.NaT, index=partition.index, dtype='datetime64[ns]')
356
354
 
357
- # Create a boolean mask for valid start dates and sla_days
355
+ # Create a mask for rows that have valid start dates and SLA days
358
356
  valid_mask = pd.notna(start_dates) & pd.notna(sla_days)
359
357
 
360
358
  # Perform the vectorized calculation only on the valid subset
359
+ # Note: np.busday_offset requires a NumPy array, so we use .values
361
360
  result.loc[valid_mask] = np.busday_offset(
362
- start_dates[valid_mask],
361
+ start_dates[valid_mask].values.astype('datetime64[D]'), # Convert to numpy array of dates
363
362
  sla_days[valid_mask].astype(int), # Ensure days are integers
364
363
  roll='forward',
365
364
  holidays=holidays
366
365
  )
366
+
367
367
  return result
368
368
 
369
369
 
@@ -452,6 +452,7 @@ class BusinessDays:
452
452
  meta=(result_col, 'datetime64[ns]')
453
453
  )}
454
454
  )
455
+
455
456
  # Class enhancements
456
457
  # DateUtils.register_period('next_week', lambda: (datetime.date.today() + datetime.timedelta(days=7),
457
458
  # datetime.date.today() + datetime.timedelta(days=13)))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sibi-dst
3
- Version: 2025.1.11
3
+ Version: 2025.1.12
4
4
  Summary: Data Science Toolkit
5
5
  Author: Luis Valverde
6
6
  Author-email: lvalverdeb@gmail.com
@@ -39,7 +39,7 @@ sibi_dst/utils/credentials.py,sha256=cHJPPsmVyijqbUQIq7WWPe-lIallA-mI5RAy3YUuRME
39
39
  sibi_dst/utils/data_from_http_source.py,sha256=AcpKNsqTgN2ClNwuhgUpuNCx62r5_DdsAiKY8vcHEBA,1867
40
40
  sibi_dst/utils/data_utils.py,sha256=MqbwXk33BuANWeKKmsabHouhb8GZswSmbM-VetWWE-M,10357
41
41
  sibi_dst/utils/data_wrapper.py,sha256=9aYXorbrqDX53NVJ5oUnNQy6FbXYhs5osxzeMcdZpC4,9609
42
- sibi_dst/utils/date_utils.py,sha256=T0uXNIG2IQfgs0AyQNsF9S6-cTujtA4GDC1IalvZVSU,18040
42
+ sibi_dst/utils/date_utils.py,sha256=fV2X9HZND92CV-sRvOOGMs6Iv82gaLURQ8M78xAWfTY,17996
43
43
  sibi_dst/utils/df_utils.py,sha256=TzIAUCLbgOn3bvCFvzkc1S9YU-OlZTImdCj-88dtg8g,11401
44
44
  sibi_dst/utils/file_utils.py,sha256=Z99CZ_4nPDIaZqbCfzzUDfAYJjSudWDj-mwEO8grhbc,1253
45
45
  sibi_dst/utils/filepath_generator.py,sha256=-HHO0U-PR8fysDDFwnWdHRlgqksh_RkmgBZLWv9hM7s,6669
@@ -71,6 +71,6 @@ sibi_dst/v2/df_helper/core/_params_config.py,sha256=DYx2drDz3uF-lSPzizPkchhy-kxR
71
71
  sibi_dst/v2/df_helper/core/_query_config.py,sha256=Y8LVSyaKuVkrPluRDkQoOwuXHQxner1pFWG3HPfnDHM,441
72
72
  sibi_dst/v2/utils/__init__.py,sha256=6H4cvhqTiFufnFPETBF0f8beVVMpfJfvUs6Ne0TQZNY,58
73
73
  sibi_dst/v2/utils/log_utils.py,sha256=rfk5VsLAt-FKpv6aPTC1FToIPiyrnHAFFBAkHme24po,4123
74
- sibi_dst-2025.1.11.dist-info/METADATA,sha256=7iwn7RFfaDF_9dfpWvnNl2Al_8NHWu7l8vGhzO9BAac,2611
75
- sibi_dst-2025.1.11.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
76
- sibi_dst-2025.1.11.dist-info/RECORD,,
74
+ sibi_dst-2025.1.12.dist-info/METADATA,sha256=dkWXdBL5nk_gpqEqwfg8UCKCtsKw1eqLKIn4v0BtDy8,2611
75
+ sibi_dst-2025.1.12.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
76
+ sibi_dst-2025.1.12.dist-info/RECORD,,