sibi-dst 2025.8.1__py3-none-any.whl → 2025.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,461 +1,778 @@
1
1
  from __future__ import annotations
2
2
 
3
- import datetime
4
- from typing import Union, Tuple, Callable, Dict, Optional
5
-
6
- import fsspec
7
- import numpy as np
8
- import pandas as pd
9
- import dask.dataframe as dd
10
- from .log_utils import Logger
3
+ import datetime as dt
4
+ import re
5
+ from typing import Callable, Union
11
6
 
12
7
 
13
8
  class DateUtils:
14
9
  """
15
- Utility class for date-related operations.
16
-
17
- The DateUtils class provides a variety of operations to manipulate and retrieve
18
- information about dates, such as calculating week ranges, determining start or
19
- end dates for specific periods (quarters, months, years), and dynamically
20
- registering custom time period functions. It also supports parsing specific
21
- periods for date range computations and ensuring the input date is correctly
22
- converted to the desired format.
23
-
24
- :ivar logger: Logger instance used for logging messages. Defaults to the logger
25
- for the current class if not provided.
26
- :type logger: Logger
27
-
28
- :ivar _PERIOD_FUNCTIONS: Stores dynamically registered period functions that
29
- return start and end dates.
30
- :type _PERIOD_FUNCTIONS: Dict[str, Callable[[], Tuple[datetime.date, datetime.date]]]
10
+ Period resolution & normalization for ETL artifacts.
11
+
12
+ Canonical periods:
13
+ - 'today'
14
+ - 'current_month'
15
+ - 'ytd'
16
+ - 'itd'
17
+ - 'custom' (requires 'start_on' and 'end_on')
18
+
19
+ Extras:
20
+ - Register named periods at runtime (register_period)
21
+ - Register regex-based periods (register_pattern)
22
+ - Recognize explicit windows: 'YYYY-MM-DD..YYYY-MM-DD'
23
+ - Accept 'last_N_days' and 'last_N_hours' via default patterns
24
+
25
+ All dynamic/custom outputs standardize on:
26
+ - date windows: 'start_on' / 'end_on' (YYYY-MM-DD or date-like)
27
+ - time windows: 'start_ts' / 'end_ts' (ISO datetimes)
31
28
  """
32
- _PERIOD_FUNCTIONS: Dict[str, Callable[[], Tuple[datetime.date, datetime.date]]] = {}
33
29
 
34
- def __init__(self, logger=None, debug=False):
35
- self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
36
- self.debug = debug
30
+ # ---- Dynamic registries ----
31
+ _PERIOD_FUNCTIONS: Dict[str, Callable[[], Tuple[dt.date, dt.date]]] = {}
32
+ _PERIOD_PATTERNS: List[Tuple[re.Pattern[str], Callable[[re.Match[str], dt.datetime], Dict[str, Any]]]] = []
37
33
 
38
- @classmethod
39
- def _ensure_date(cls, value: Union[str, datetime.date, datetime.datetime, pd.Timestamp]) -> datetime.date:
40
- """
41
- Ensure the input is converted to a datetime.date object.
42
- """
43
- if isinstance(value, datetime.date) and not isinstance(value, datetime.datetime):
34
+ _LAST_N_DAYS_RE = re.compile(r"^last_(\d+)_days$")
35
+ _WINDOW_RE = re.compile(r"^(\d{4}-\d{2}-\d{2})\.\.(\d{4}-\d{2}-\d{2})$")
36
+
37
+ # ---------------- Core coercion helpers ----------------
38
+
39
+ @staticmethod
40
+ def _ensure_date(value: Union[str, dt.date, dt.datetime, pd.Timestamp]) -> dt.date:
41
+ """Ensure the input is converted to a datetime.date."""
42
+ if isinstance(value, dt.date) and not isinstance(value, dt.datetime):
44
43
  return value
45
- elif isinstance(value, datetime.datetime):
44
+ if isinstance(value, dt.datetime):
46
45
  return value.date()
47
- elif isinstance(value, pd.Timestamp):
46
+ if isinstance(value, pd.Timestamp):
48
47
  return value.to_pydatetime().date()
49
- elif isinstance(value, str):
50
- for fmt in ('%Y-%m-%d %H:%M:%S', '%Y-%m-%d'):
51
- try:
52
- return datetime.datetime.strptime(value, fmt).date()
53
- except ValueError:
54
- continue
55
- raise ValueError(f"Unsupported date format: {value}")
56
-
57
- # Public alias to access _ensure_date from other classes
48
+ if isinstance(value, str):
49
+ # Try pandas parser first (robust), then ISO date
50
+ try:
51
+ return pd.to_datetime(value, errors="raise").date() # type: ignore[return-value]
52
+ except Exception:
53
+ pass
54
+ try:
55
+ return dt.date.fromisoformat(value)
56
+ except Exception:
57
+ pass
58
+ raise ValueError(f"Unsupported date format: {value!r}")
59
+
60
+ # Public alias (used by others)
58
61
  ensure_date = _ensure_date
59
62
 
63
+ @staticmethod
64
+ def _ensure_datetime(
65
+ value: Union[str, dt.date, dt.datetime, pd.Timestamp],
66
+ tz: dt.tzinfo = dt.timezone.utc,
67
+ ) -> dt.datetime:
68
+ """Convert input to timezone-aware datetime (defaults to UTC)."""
69
+ if isinstance(value, dt.datetime):
70
+ return value if value.tzinfo else value.replace(tzinfo=tz)
71
+ if isinstance(value, dt.date):
72
+ return dt.datetime(value.year, value.month, value.day, tzinfo=tz)
73
+ if isinstance(value, pd.Timestamp):
74
+ dtt = value.to_pydatetime()
75
+ return dtt if dtt.tzinfo else dtt.replace(tzinfo=tz)
76
+ if isinstance(value, str):
77
+ ts = pd.to_datetime(value, errors="raise", utc=False)
78
+ dtt = ts.to_pydatetime()
79
+ return dtt if getattr(dtt, "tzinfo", None) else dtt.replace(tzinfo=tz)
80
+ raise ValueError(f"Unsupported datetime format: {value!r}")
81
+
82
+ # ---------------- Week / Month / Quarter helpers ----------------
83
+
60
84
  @classmethod
61
- def calc_week_range(cls, reference_date: Union[str, datetime.date, datetime.datetime, pd.Timestamp]) -> Tuple[
62
- datetime.date, datetime.date]:
63
- """
64
- Calculate the start and end of the week for a given reference date.
65
- """
66
- reference_date = cls._ensure_date(reference_date)
67
- start = reference_date - datetime.timedelta(days=reference_date.weekday())
68
- end = start + datetime.timedelta(days=6)
85
+ def calc_week_range(cls, reference_date: Union[str, dt.date, dt.datetime, pd.Timestamp]) -> Tuple[dt.date, dt.date]:
86
+ """Start (Mon) and end (Sun) for the week containing reference_date."""
87
+ ref = cls._ensure_date(reference_date)
88
+ start = ref - dt.timedelta(days=ref.weekday())
89
+ end = start + dt.timedelta(days=6)
69
90
  return start, end
70
91
 
71
92
  @staticmethod
72
- def get_year_timerange(year: int) -> Tuple[datetime.date, datetime.date]:
73
- """
74
- Get the start and end dates for a given year.
75
- """
76
- return datetime.date(year, 1, 1), datetime.date(year, 12, 31)
93
+ def get_year_timerange(year: int) -> Tuple[dt.date, dt.date]:
94
+ return dt.date(year, 1, 1), dt.date(year, 12, 31)
77
95
 
78
96
  @classmethod
79
- def get_first_day_of_the_quarter(cls, reference_date: Union[
80
- str, datetime.date, datetime.datetime, pd.Timestamp]) -> datetime.date:
81
- """
82
- Get the first day of the quarter for a given date.
83
- """
84
- reference_date = cls._ensure_date(reference_date)
85
- quarter = (reference_date.month - 1) // 3 + 1
86
- return datetime.date(reference_date.year, 3 * quarter - 2, 1)
97
+ def get_first_day_of_the_quarter(cls, reference_date: Union[str, dt.date, dt.datetime, pd.Timestamp]) -> dt.date:
98
+ ref = cls._ensure_date(reference_date)
99
+ quarter = (ref.month - 1) // 3 + 1
100
+ return dt.date(ref.year, 3 * quarter - 2, 1)
87
101
 
88
102
  @classmethod
89
- def get_last_day_of_the_quarter(cls, reference_date: Union[
90
- str, datetime.date, datetime.datetime, pd.Timestamp]) -> datetime.date:
91
- """
92
- Get the last day of the quarter for a given date.
93
- """
94
- reference_date = cls._ensure_date(reference_date)
95
- quarter = (reference_date.month - 1) // 3 + 1
96
- first_day_of_next_quarter = datetime.date(reference_date.year, 3 * quarter + 1, 1)
97
- return first_day_of_next_quarter - datetime.timedelta(days=1)
103
+ def get_last_day_of_the_quarter(cls, reference_date: Union[str, dt.date, dt.datetime, pd.Timestamp]) -> dt.date:
104
+ ref = cls._ensure_date(reference_date)
105
+ quarter = (ref.month - 1) // 3 + 1
106
+ first_day_next_q = dt.date(ref.year, 3 * quarter + 1, 1)
107
+ return first_day_next_q - dt.timedelta(days=1)
98
108
 
99
109
  @classmethod
100
- def get_month_range(cls, n: int = 0) -> Tuple[datetime.date, datetime.date]:
110
+ def get_month_range(cls, n: int = 0) -> Tuple[dt.date, dt.date]:
101
111
  """
102
- Get the date range for the current month or the month `n` months in the past or future.
112
+ Range for current month (n=0) or +/- n months relative to today.
113
+ If n == 0, end is today. Otherwise end is calendar month end.
103
114
  """
104
- today = datetime.date.today()
115
+ today = dt.date.today()
105
116
  target_month = (today.month - 1 + n) % 12 + 1
106
117
  target_year = today.year + (today.month - 1 + n) // 12
107
- start = datetime.date(target_year, target_month, 1)
118
+ start = dt.date(target_year, target_month, 1)
108
119
  if n == 0:
109
120
  return start, today
110
121
  next_month = (target_month % 12) + 1
111
122
  next_year = target_year + (target_month == 12)
112
- end = datetime.date(next_year, next_month, 1) - datetime.timedelta(days=1)
123
+ end = dt.date(next_year, next_month, 1) - dt.timedelta(days=1)
113
124
  return start, end
114
125
 
126
+ # ---------------- Period registration ----------------
127
+
115
128
  @classmethod
116
- def register_period(cls, name: str, func: Callable[[], Tuple[datetime.date, datetime.date]]):
129
+ def register_period(cls, name: str, func: Callable[[], Tuple[dt.date, dt.date]]) -> None:
117
130
  """
118
- Dynamically register a new period function.
131
+ Dynamically register a new named period.
132
+ The callable must return (start_date, end_date) as datetime.date values.
119
133
  """
120
134
  cls._PERIOD_FUNCTIONS[name] = func
121
135
 
122
136
  @classmethod
123
- def parse_period(cls, **kwargs) -> Tuple[datetime.date, datetime.date]:
137
+ def register_pattern(
138
+ cls,
139
+ pattern: str | re.Pattern[str],
140
+ resolver: Callable[[re.Match[str], dt.datetime], Dict[str, Any]],
141
+ ) -> None:
124
142
  """
125
- Parse the period keyword to determine the start and end date for date range operations.
143
+ Register a regex-based dynamic period.
144
+
145
+ The resolver receives:
146
+ - match: regex match object
147
+ - now: timezone-aware datetime (UTC by default)
148
+
149
+ It must return a dict with optional keys:
150
+ - 'canonical' : str (defaults to 'custom')
151
+ - 'start_on'/'end_on' : ISO date strings (YYYY-MM-DD) OR
152
+ - 'start_ts'/'end_ts' : ISO datetime strings
153
+ - any additional per-period params
126
154
  """
127
- period = kwargs.setdefault('period', 'today')
128
- period_functions = cls._get_default_periods()
129
- period_functions.update(cls._PERIOD_FUNCTIONS)
130
- if period not in period_functions:
131
- raise ValueError(f"Unknown period '{period}'. Available periods: {list(period_functions.keys())}")
132
- return period_functions[period]()
155
+ compiled = re.compile(pattern) if isinstance(pattern, str) else pattern
156
+ cls._PERIOD_PATTERNS.append((compiled, resolver))
157
+
158
+ # ---------------- Default named periods ----------------
133
159
 
134
160
  @classmethod
135
- def _get_default_periods(cls) -> Dict[str, Callable[[], Tuple[datetime.date, datetime.date]]]:
136
- """
137
- Get default period functions.
138
- """
139
- today = datetime.date.today
161
+ def _get_default_periods(cls) -> Dict[str, Callable[[], Tuple[dt.date, dt.date]]]:
162
+ today = dt.date.today
140
163
  return {
141
- 'today': lambda: (today(), today()),
142
- 'yesterday': lambda: (today() - datetime.timedelta(days=1), today() - datetime.timedelta(days=1)),
143
- 'current_week': lambda: cls.calc_week_range(today()),
144
- 'last_week': lambda: cls.calc_week_range(today() - datetime.timedelta(days=7)),
145
- 'current_month': lambda: cls.get_month_range(n=0),
146
- 'last_month': lambda: cls.get_month_range(n=-1),
147
- 'current_year': lambda: cls.get_year_timerange(today().year),
148
- 'last_year': lambda: cls.get_year_timerange(today().year - 1),
149
- 'current_quarter': lambda: (
150
- cls.get_first_day_of_the_quarter(today()), cls.get_last_day_of_the_quarter(today())),
151
- 'ytd': lambda: (datetime.date(today().year, 1, 1), today()),
164
+ "today": lambda: (today(), today()),
165
+ "yesterday": lambda: (today() - dt.timedelta(days=1), today() - dt.timedelta(days=1)),
166
+ "current_week": lambda: cls.calc_week_range(today()),
167
+ "last_week": lambda: cls.calc_week_range(today() - dt.timedelta(days=7)),
168
+ "current_month": lambda: cls.get_month_range(n=0),
169
+ "last_month": lambda: cls.get_month_range(n=-1),
170
+ "current_year": lambda: cls.get_year_timerange(today().year),
171
+ "last_year": lambda: cls.get_year_timerange(today().year - 1),
172
+ "current_quarter": lambda: (
173
+ cls.get_first_day_of_the_quarter(today()),
174
+ cls.get_last_day_of_the_quarter(today()),
175
+ ),
176
+ "ytd": lambda: (dt.date(today().year, 1, 1), today()),
177
+ "itd": lambda: (dt.date(1900, 1, 1), today()),
152
178
  }
153
179
 
180
+ @classmethod
181
+ def period_keys(cls) -> Iterable[str]:
182
+ """List available named periods (defaults + registered)."""
183
+ d = dict(cls._get_default_periods())
184
+ d.update(cls._PERIOD_FUNCTIONS)
185
+ return d.keys()
154
186
 
155
- class FileAgeChecker:
156
- def __init__(self, debug=False, logger=None):
157
- self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
158
- self.logger.set_level(Logger.DEBUG if debug else Logger.INFO)
159
- def is_file_older_than(
160
- self,
161
- file_path: str,
162
- max_age_minutes: int,
163
- fs: Optional[fsspec.AbstractFileSystem] = None,
164
- ignore_missing: bool = False,
165
- verbose: bool = False,
166
- ) -> bool:
167
- """
168
- Check if a file or directory is older than the specified max_age_minutes.
169
-
170
- :param file_path: Path to the file or directory.
171
- :param max_age_minutes: Maximum allowed age in minutes.
172
- :param fs: Filesystem object. Defaults to local filesystem.
173
- :param ignore_missing: Treat missing paths as not old if True.
174
- :param verbose: Enable detailed logging.
175
- :return: True if older than max_age_minutes, False otherwise.
176
- """
177
- fs = fs or fsspec.filesystem("file")
178
- self.logger.debug(f"Checking age for {file_path}...")
179
-
180
- try:
181
- if not fs.exists(file_path):
182
- self.logger.debug(f"Path not found: {file_path}.")
183
- return not ignore_missing
184
-
185
- if fs.isdir(file_path):
186
- self.logger.debug(f"Found directory: {file_path}")
187
- age = self._get_directory_age_minutes(file_path, fs, verbose)
188
- elif fs.isfile(file_path):
189
- age = self._get_file_age_minutes(file_path, fs, verbose)
190
- else:
191
- self.logger.warning(f"Path {file_path} is neither file nor directory.")
192
- return True
193
-
194
- return age > max_age_minutes
195
-
196
- except Exception as e:
197
- self.logger.warning(f"Error checking {file_path}: {str(e)}")
198
- return True
199
-
200
- def get_file_or_dir_age_minutes(
201
- self,
202
- file_path: str,
203
- fs: Optional[fsspec.AbstractFileSystem] = None,
204
- ) -> float:
205
- """
206
- Get age of file/directory in minutes. Returns infinity for errors/missing paths.
187
+ # ---------------- Flexible resolver ----------------
207
188
 
208
- :param file_path: Path to check.
209
- :param fs: Filesystem object. Defaults to local filesystem.
210
- :return: Age in minutes or infinity if unavailable.
189
+ @classmethod
190
+ def resolve_period(
191
+ cls,
192
+ period: Optional[str] = None,
193
+ *,
194
+ now: Optional[dt.datetime] = None,
195
+ tz: dt.tzinfo = dt.timezone.utc,
196
+ **overrides: Any,
197
+ ) -> Tuple[str, Dict[str, Any]]:
211
198
  """
212
- fs = fs or fsspec.filesystem("file")
213
- try:
214
- if not fs.exists(file_path):
215
- self.logger.debug(f"Path not found: {file_path}")
216
- return float("inf")
217
-
218
- if fs.isdir(file_path):
219
- return self._get_directory_age_minutes(file_path, fs, verbose=False)
220
- if fs.isfile(file_path):
221
- return self._get_file_age_minutes(file_path, fs, verbose=False)
222
-
223
- self.logger.warning(f"Invalid path type: {file_path}")
224
- return float("inf")
225
-
226
- except Exception as e:
227
- self.logger.warning(f"Error getting age for {file_path}: {str(e)}")
228
- return float("inf")
229
-
230
- def _get_directory_age_minutes(
231
- self,
232
- dir_path: str,
233
- fs: fsspec.AbstractFileSystem,
234
- verbose: bool,
235
- ) -> float:
236
- """Calculate age of oldest file in directory."""
237
- try:
238
- all_files = fs.ls(dir_path)
239
- except Exception as e:
240
- self.logger.warning(f"Error listing {dir_path}: {str(e)}")
241
- return float("inf")
242
-
243
- if not all_files:
244
- self.logger.debug(f"Empty directory: {dir_path}")
245
- return float("inf")
246
-
247
- modification_times = []
248
- for file in all_files:
249
- try:
250
- info = fs.info(file)
251
- mod_time = self._get_modification_time(info, file)
252
- modification_times.append(mod_time)
253
- except Exception as e:
254
- self.logger.warning(f"Skipping {file}: {str(e)}")
255
-
256
- if not modification_times:
257
- self.logger.warning(f"No valid files in {dir_path}")
258
- return float("inf")
259
-
260
- oldest = min(modification_times)
261
- age = (datetime.datetime.now(datetime.timezone.utc) - oldest).total_seconds() / 60
262
- self.logger.debug(f"Oldest in {dir_path}: {age:.2f} minutes")
263
-
264
- return age
265
-
266
- def _get_file_age_minutes(
267
- self,
268
- file_path: str,
269
- fs: fsspec.AbstractFileSystem,
270
- verbose: bool,
271
- ) -> float:
272
- """Calculate file age in minutes."""
273
- try:
274
- info = fs.info(file_path)
275
- mod_time = self._get_modification_time(info, file_path)
276
- age = (datetime.datetime.now(datetime.timezone.utc) - mod_time).total_seconds() / 60
277
-
278
- if verbose:
279
- self.logger.debug(f"{file_path} info: {info}")
280
- self.logger.debug(f"File age: {age:.2f} minutes")
281
-
282
- return age
283
-
284
- except Exception as e:
285
- self.logger.warning(f"Error processing {file_path}: {str(e)}")
286
- return float("inf")
287
-
288
- def _get_modification_time(self, info: Dict, file_path: str) -> datetime.datetime:
289
- """Extract modification time from filesystem info with timezone awareness."""
290
- try:
291
- if "LastModified" in info: # S3-like
292
- lm = info["LastModified"]
293
- return lm if isinstance(lm, datetime.datetime) else datetime.datetime.fromisoformat(
294
- lm[:-1]).astimezone()
295
-
296
- if "mtime" in info: # Local filesystem
297
- return datetime.datetime.fromtimestamp(info["mtime"], tz=datetime.timezone.utc)
298
-
299
- if "modified" in info: # FTP/SSH
300
- return datetime.datetime.strptime(
301
- info["modified"], "%Y-%m-%d %H:%M:%S"
302
- ).replace(tzinfo=datetime.timezone.utc)
303
-
304
- raise KeyError("No valid modification time key found")
305
-
306
- except (KeyError, ValueError) as e:
307
- self.logger.warning(f"Invalid mod time for {file_path}: {str(e)}")
308
- raise ValueError(f"Unsupported modification time format for {file_path}") from e
309
-
310
-
311
- # --- Vectorized Helper Functions ---
312
-
313
- def _vectorized_busday_count(partition, begin_col, end_col, holidays):
314
- """
315
- Calculates the number of business days between a start and end date.
316
- """
317
- # Extract the raw columns
318
- start_dates_raw = partition[begin_col]
319
- end_dates_raw = partition[end_col]
320
-
199
+ Resolve a period into (canonical_key, params).
321
200
 
322
- start_dates = pd.to_datetime(start_dates_raw, errors='coerce')
323
- end_dates = pd.to_datetime(end_dates_raw, errors='coerce')
201
+ Priority:
202
+ 1) exact named period (default + registered)
203
+ 2) registered regex patterns (e.g., 'last_7_days', 'last_36_hours')
204
+ 3) explicit window 'YYYY-MM-DD..YYYY-MM-DD'
205
+ 4) fallback: pass the period verbatim with just overrides
324
206
 
325
- # Initialize the result Series with NaN, as the output is a number
326
- result = pd.Series(np.nan, index=partition.index)
327
-
328
- # Create a mask for rows where both start and end dates are valid
329
- valid_mask = pd.notna(start_dates) & pd.notna(end_dates)
330
-
331
- # Perform the vectorized calculation only on the valid subset
332
- # Convert to NumPy arrays of date type for the calculation
333
- result.loc[valid_mask] = np.busday_count(
334
- start_dates[valid_mask].values.astype('datetime64[D]'),
335
- end_dates[valid_mask].values.astype('datetime64[D]'),
336
- holidays=holidays
337
- )
338
-
339
- return result
207
+ Returns:
208
+ - canonical_key: e.g., 'today', 'current_month', or 'custom'
209
+ - params: dict containing computed keys and merged overrides
210
+ """
211
+ key = (period or "today").strip()
212
+ now = (now or dt.datetime.now(tz)).astimezone(tz)
340
213
 
214
+ # 1) named periods
215
+ period_functions = cls._get_default_periods()
216
+ period_functions.update(cls._PERIOD_FUNCTIONS)
217
+ if key in period_functions:
218
+ start, end = period_functions[key]()
219
+ params = {"start_on": start.isoformat(), "end_on": end.isoformat()}
220
+ params.update(overrides)
221
+ return key, params
222
+
223
+ # 2) regex patterns (user-registered)
224
+ for patt, resolver in cls._PERIOD_PATTERNS:
225
+ m = patt.fullmatch(key)
226
+ if m:
227
+ out = resolver(m, now)
228
+ canonical = out.get("canonical", "custom")
229
+ params = {k: v for k, v in out.items() if k != "canonical"}
230
+ params.update(overrides)
231
+ return canonical, params
232
+
233
+ # 2b) default 'last_N_days'
234
+ m = cls._LAST_N_DAYS_RE.match(key)
235
+ if m:
236
+ days = int(m.group(1))
237
+ end = now.date()
238
+ start = (now - dt.timedelta(days=days)).date()
239
+ params = {"start_on": start.isoformat(), "end_on": end.isoformat()}
240
+ params.update(overrides)
241
+ return "custom", params
242
+
243
+ # 3) explicit date window: YYYY-MM-DD..YYYY-MM-DD
244
+ m2 = cls._WINDOW_RE.fullmatch(key)
245
+ if m2:
246
+ start_on, end_on = m2.group(1), m2.group(2)
247
+ params = {"start_on": start_on, "end_on": end_on}
248
+ params.update(overrides)
249
+ return "custom", params
250
+
251
+ # 4) fallback (unknown key)
252
+ return key, dict(overrides)
253
+
254
+ # ---------------- Backward-compatible API ----------------
341
255
 
342
- def _vectorized_sla_end_date(partition, start_col, n_days_col, holidays):
343
- """
344
- Calculates the end date of an SLA, skipping weekends and holidays.
345
- """
346
- # Extract the relevant columns as pandas Series
347
- start_dates_raw = partition[start_col]
348
- sla_days = partition[n_days_col]
256
+ @classmethod
257
+ def parse_period(cls, **kwargs: Any) -> Tuple[dt.date, dt.date]:
258
+ """
259
+ Return (start_date, end_date) as datetime.date.
349
260
 
261
+ Accepts:
262
+ - period='today' | 'current_month' | 'last_7_days' | 'YYYY-MM-DD..YYYY-MM-DD' | ...
263
+ - optional overrides (e.g., start_on/end_on for 'custom')
264
+ """
265
+ period = kwargs.setdefault("period", "today")
350
266
 
351
- start_dates = pd.to_datetime(start_dates_raw, errors='coerce')
267
+ # Try named periods first
268
+ period_functions = cls._get_default_periods()
269
+ period_functions.update(cls._PERIOD_FUNCTIONS)
270
+ if period in period_functions:
271
+ return period_functions[period]()
352
272
 
353
- # Initialize the result Series with NaT (Not a Time)
354
- result = pd.Series(pd.NaT, index=partition.index, dtype='datetime64[ns]')
273
+ # Otherwise, resolve and coerce
274
+ canonical, params = cls.resolve_period(period, **kwargs)
355
275
 
356
- # Create a mask for rows that have valid start dates and SLA days
357
- valid_mask = pd.notna(start_dates) & pd.notna(sla_days)
276
+ if "start_on" in params and "end_on" in params:
277
+ start = cls._ensure_date(params["start_on"])
278
+ end = cls._ensure_date(params["end_on"])
279
+ return start, end
358
280
 
359
- # Perform the vectorized calculation only on the valid subset
360
- # Note: np.busday_offset requires a NumPy array, so we use .values
361
- result.loc[valid_mask] = np.busday_offset(
362
- start_dates[valid_mask].values.astype('datetime64[D]'), # Convert to numpy array of dates
363
- sla_days[valid_mask].astype(int), # Ensure days are integers
364
- roll='forward',
365
- holidays=holidays
366
- )
281
+ if "start_ts" in params and "end_ts" in params:
282
+ sdt = cls._ensure_datetime(params["start_ts"]).date()
283
+ edt = cls._ensure_datetime(params["end_ts"]).date()
284
+ return sdt, edt
367
285
 
368
- return result
286
+ raise ValueError(
287
+ f"Could not derive date range from period '{period}' (canonical='{canonical}'). "
288
+ f"Params: {params}"
289
+ )
369
290
 
370
291
 
371
- # --- Refactored BusinessDays Class ---
292
+ # ---------------- Default dynamic patterns registration ----------------
372
293
 
373
- class BusinessDays:
294
+ def _register_default_patterns() -> None:
374
295
  """
375
- Business days calculations with a custom holiday list.
376
- Supports scalar and efficient, vectorized Dask DataFrame operations.
296
+ Register common dynamic patterns:
297
+ - last_{n}_hours (ISO datetimes; useful for freshness windows)
377
298
  """
378
299
 
379
- def __init__(self, holiday_list: dict[str, list[str]], logger) -> None:
380
- self.logger = logger
381
- self.HOLIDAY_LIST = holiday_list
382
-
383
- # Flatten and store as tuple for determinism
384
- bd_holidays = [day for year in self.HOLIDAY_LIST for day in self.HOLIDAY_LIST[year]]
385
- self.holidays = tuple(bd_holidays)
386
-
387
- def get_business_days_count(
388
- self,
389
- begin_date: str | datetime.date | pd.Timestamp,
390
- end_date: str | datetime.date | pd.Timestamp,
391
- ) -> int:
392
- """Scalar method to count business days between two dates."""
393
- begin = pd.to_datetime(begin_date)
394
- end = pd.to_datetime(end_date)
395
- return int(np.busday_count(begin.date(), end.date(), holidays=list(self.holidays)))
396
-
397
- def calc_business_days_from_df(
398
- self,
399
- df: dd.DataFrame,
400
- begin_date_col: str,
401
- end_date_col: str,
402
- result_col: str = "business_days",
403
- ) -> dd.DataFrame:
404
- """Calculates business days between two columns in a Dask DataFrame."""
405
- missing = {begin_date_col, end_date_col} - set(df.columns)
406
- if missing:
407
- self.logger.error(f"Missing columns: {missing}")
408
- raise ValueError("Required columns are missing from DataFrame")
409
-
410
- return df.assign(
411
- **{result_col: df.map_partitions(
412
- _vectorized_busday_count,
413
- begin_col=begin_date_col,
414
- end_col=end_date_col,
415
- holidays=list(self.holidays),
416
- meta=(result_col, 'f8') # f8 is float64
417
- )}
418
- )
419
-
420
- def add_business_days(
421
- self,
422
- start_date: str | datetime.date | pd.Timestamp,
423
- n_days: int,
424
- ) -> np.datetime64:
425
- """Scalar method to add N business days to a start date."""
426
- start = pd.to_datetime(start_date)
427
- return np.busday_offset(
428
- start.date(),
429
- n_days,
430
- roll='forward',
431
- holidays=list(self.holidays),
432
- )
433
-
434
- def calc_sla_end_date(
435
- self,
436
- df: dd.DataFrame,
437
- start_date_col: str,
438
- n_days_col: str,
439
- result_col: str = "sla_end_date",
440
- ) -> dd.DataFrame:
441
- """Calculates an SLA end date column for a Dask DataFrame."""
442
- missing = {start_date_col, n_days_col} - set(df.columns)
443
- if missing:
444
- self.logger.error(f"Missing columns: {missing}")
445
- raise ValueError("Required columns are missing from DataFrame")
446
-
447
- return df.assign(
448
- **{result_col: df.map_partitions(
449
- _vectorized_sla_end_date,
450
- start_col=start_date_col,
451
- n_days_col=n_days_col,
452
- holidays=list(self.holidays),
453
- meta=(result_col, 'datetime64[ns]')
454
- )}
455
- )
300
+ def last_x_hours(match: re.Match[str], now: dt.datetime) -> Dict[str, Any]:
301
+ hours = int(match.group(1))
302
+ end_ts = now
303
+ start_ts = now - dt.timedelta(hours=hours)
304
+ return {
305
+ "canonical": "custom",
306
+ "start_ts": start_ts.isoformat(),
307
+ "end_ts": end_ts.isoformat(),
308
+ # Sensible default that callers can override:
309
+ "max_age_minutes": max(15, min(hours * 10, 240)),
310
+ }
456
311
 
457
- # Class enhancements
458
- # DateUtils.register_period('next_week', lambda: (datetime.date.today() + datetime.timedelta(days=7),
459
- # datetime.date.today() + datetime.timedelta(days=13)))
460
- # start, end = DateUtils.parse_period(period='next_week')
461
- # print(f"Next Week: {start} to {end}")
312
+ DateUtils.register_pattern(r"last_(\d+)_hours", last_x_hours)
313
+
314
+
315
+ # Register defaults at import time
316
+ _register_default_patterns()
317
+
318
+ # from __future__ import annotations
319
+ #
320
+ # import datetime
321
+ # from typing import Union, Tuple, Callable, Dict, Optional
322
+ #
323
+ # import fsspec
324
+ # import numpy as np
325
+ # import pandas as pd
326
+ # import dask.dataframe as dd
327
+ # from .log_utils import Logger
328
+ #
329
+ #
330
+ # class DateUtils:
331
+ # """
332
+ # Utility class for date-related operations.
333
+ #
334
+ # The DateUtils class provides a variety of operations to manipulate and retrieve
335
+ # information about dates, such as calculating week ranges, determining start or
336
+ # end dates for specific periods (quarters, months, years), and dynamically
337
+ # registering custom time period functions. It also supports parsing specific
338
+ # periods for date range computations and ensuring the input date is correctly
339
+ # converted to the desired format.
340
+ #
341
+ # :ivar logger: Logger instance used for logging messages. Defaults to the logger
342
+ # for the current class if not provided.
343
+ # :type logger: Logger
344
+ #
345
+ # :ivar _PERIOD_FUNCTIONS: Stores dynamically registered period functions that
346
+ # return start and end dates.
347
+ # :type _PERIOD_FUNCTIONS: Dict[str, Callable[[], Tuple[datetime.date, datetime.date]]]
348
+ # """
349
+ # _PERIOD_FUNCTIONS: Dict[str, Callable[[], Tuple[datetime.date, datetime.date]]] = {}
350
+ #
351
+ # def __init__(self, logger=None, debug=False):
352
+ # self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
353
+ # self.debug = debug
354
+ #
355
+ # @classmethod
356
+ # def _ensure_date(cls, value: Union[str, datetime.date, datetime.datetime, pd.Timestamp]) -> datetime.date:
357
+ # """
358
+ # Ensure the input is converted to a datetime.date object.
359
+ # """
360
+ # if isinstance(value, datetime.date) and not isinstance(value, datetime.datetime):
361
+ # return value
362
+ # elif isinstance(value, datetime.datetime):
363
+ # return value.date()
364
+ # elif isinstance(value, pd.Timestamp):
365
+ # return value.to_pydatetime().date()
366
+ # elif isinstance(value, str):
367
+ # for fmt in ('%Y-%m-%d %H:%M:%S', '%Y-%m-%d'):
368
+ # try:
369
+ # return datetime.datetime.strptime(value, fmt).date()
370
+ # except ValueError:
371
+ # continue
372
+ # raise ValueError(f"Unsupported date format: {value}")
373
+ #
374
+ # # Public alias to access _ensure_date from other classes
375
+ # ensure_date = _ensure_date
376
+ #
377
+ # @classmethod
378
+ # def calc_week_range(cls, reference_date: Union[str, datetime.date, datetime.datetime, pd.Timestamp]) -> Tuple[
379
+ # datetime.date, datetime.date]:
380
+ # """
381
+ # Calculate the start and end of the week for a given reference date.
382
+ # """
383
+ # reference_date = cls._ensure_date(reference_date)
384
+ # start = reference_date - datetime.timedelta(days=reference_date.weekday())
385
+ # end = start + datetime.timedelta(days=6)
386
+ # return start, end
387
+ #
388
+ # @staticmethod
389
+ # def get_year_timerange(year: int) -> Tuple[datetime.date, datetime.date]:
390
+ # """
391
+ # Get the start and end dates for a given year.
392
+ # """
393
+ # return datetime.date(year, 1, 1), datetime.date(year, 12, 31)
394
+ #
395
+ # @classmethod
396
+ # def get_first_day_of_the_quarter(cls, reference_date: Union[
397
+ # str, datetime.date, datetime.datetime, pd.Timestamp]) -> datetime.date:
398
+ # """
399
+ # Get the first day of the quarter for a given date.
400
+ # """
401
+ # reference_date = cls._ensure_date(reference_date)
402
+ # quarter = (reference_date.month - 1) // 3 + 1
403
+ # return datetime.date(reference_date.year, 3 * quarter - 2, 1)
404
+ #
405
+ # @classmethod
406
+ # def get_last_day_of_the_quarter(cls, reference_date: Union[
407
+ # str, datetime.date, datetime.datetime, pd.Timestamp]) -> datetime.date:
408
+ # """
409
+ # Get the last day of the quarter for a given date.
410
+ # """
411
+ # reference_date = cls._ensure_date(reference_date)
412
+ # quarter = (reference_date.month - 1) // 3 + 1
413
+ # first_day_of_next_quarter = datetime.date(reference_date.year, 3 * quarter + 1, 1)
414
+ # return first_day_of_next_quarter - datetime.timedelta(days=1)
415
+ #
416
+ # @classmethod
417
+ # def get_month_range(cls, n: int = 0) -> Tuple[datetime.date, datetime.date]:
418
+ # """
419
+ # Get the date range for the current month or the month `n` months in the past or future.
420
+ # """
421
+ # today = datetime.date.today()
422
+ # target_month = (today.month - 1 + n) % 12 + 1
423
+ # target_year = today.year + (today.month - 1 + n) // 12
424
+ # start = datetime.date(target_year, target_month, 1)
425
+ # if n == 0:
426
+ # return start, today
427
+ # next_month = (target_month % 12) + 1
428
+ # next_year = target_year + (target_month == 12)
429
+ # end = datetime.date(next_year, next_month, 1) - datetime.timedelta(days=1)
430
+ # return start, end
431
+ #
432
+ # @classmethod
433
+ # def register_period(cls, name: str, func: Callable[[], Tuple[datetime.date, datetime.date]]):
434
+ # """
435
+ # Dynamically register a new period function.
436
+ # """
437
+ # cls._PERIOD_FUNCTIONS[name] = func
438
+ #
439
+ # @classmethod
440
+ # def parse_period(cls, **kwargs) -> Tuple[datetime.date, datetime.date]:
441
+ # """
442
+ # Parse the period keyword to determine the start and end date for date range operations.
443
+ # """
444
+ # period = kwargs.setdefault('period', 'today')
445
+ # period_functions = cls._get_default_periods()
446
+ # period_functions.update(cls._PERIOD_FUNCTIONS)
447
+ # if period not in period_functions:
448
+ # raise ValueError(f"Unknown period '{period}'. Available periods: {list(period_functions.keys())}")
449
+ # return period_functions[period]()
450
+ #
451
+ # @classmethod
452
+ # def _get_default_periods(cls) -> Dict[str, Callable[[], Tuple[datetime.date, datetime.date]]]:
453
+ # """
454
+ # Get default period functions.
455
+ # """
456
+ # today = datetime.date.today
457
+ # return {
458
+ # 'today': lambda: (today(), today()),
459
+ # 'yesterday': lambda: (today() - datetime.timedelta(days=1), today() - datetime.timedelta(days=1)),
460
+ # 'current_week': lambda: cls.calc_week_range(today()),
461
+ # 'last_week': lambda: cls.calc_week_range(today() - datetime.timedelta(days=7)),
462
+ # 'current_month': lambda: cls.get_month_range(n=0),
463
+ # 'last_month': lambda: cls.get_month_range(n=-1),
464
+ # 'current_year': lambda: cls.get_year_timerange(today().year),
465
+ # 'last_year': lambda: cls.get_year_timerange(today().year - 1),
466
+ # 'current_quarter': lambda: (
467
+ # cls.get_first_day_of_the_quarter(today()), cls.get_last_day_of_the_quarter(today())),
468
+ # 'ytd': lambda: (datetime.date(today().year, 1, 1), today()),
469
+ # }
470
+ #
471
+ #
472
+ # class FileAgeChecker:
473
+ # def __init__(self, debug=False, logger=None):
474
+ # self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
475
+ # self.logger.set_level(Logger.DEBUG if debug else Logger.INFO)
476
+ # def is_file_older_than(
477
+ # self,
478
+ # file_path: str,
479
+ # max_age_minutes: int,
480
+ # fs: Optional[fsspec.AbstractFileSystem] = None,
481
+ # ignore_missing: bool = False,
482
+ # verbose: bool = False,
483
+ # ) -> bool:
484
+ # """
485
+ # Check if a file or directory is older than the specified max_age_minutes.
486
+ #
487
+ # :param file_path: Path to the file or directory.
488
+ # :param max_age_minutes: Maximum allowed age in minutes.
489
+ # :param fs: Filesystem object. Defaults to local filesystem.
490
+ # :param ignore_missing: Treat missing paths as not old if True.
491
+ # :param verbose: Enable detailed logging.
492
+ # :return: True if older than max_age_minutes, False otherwise.
493
+ # """
494
+ # fs = fs or fsspec.filesystem("file")
495
+ # self.logger.debug(f"Checking age for {file_path}...")
496
+ #
497
+ # try:
498
+ # if not fs.exists(file_path):
499
+ # self.logger.debug(f"Path not found: {file_path}.")
500
+ # return not ignore_missing
501
+ #
502
+ # if fs.isdir(file_path):
503
+ # self.logger.debug(f"Found directory: {file_path}")
504
+ # age = self._get_directory_age_minutes(file_path, fs, verbose)
505
+ # elif fs.isfile(file_path):
506
+ # age = self._get_file_age_minutes(file_path, fs, verbose)
507
+ # else:
508
+ # self.logger.warning(f"Path {file_path} is neither file nor directory.")
509
+ # return True
510
+ #
511
+ # return age > max_age_minutes
512
+ #
513
+ # except Exception as e:
514
+ # self.logger.warning(f"Error checking {file_path}: {str(e)}")
515
+ # return True
516
+ #
517
+ # def get_file_or_dir_age_minutes(
518
+ # self,
519
+ # file_path: str,
520
+ # fs: Optional[fsspec.AbstractFileSystem] = None,
521
+ # ) -> float:
522
+ # """
523
+ # Get age of file/directory in minutes. Returns infinity for errors/missing paths.
524
+ #
525
+ # :param file_path: Path to check.
526
+ # :param fs: Filesystem object. Defaults to local filesystem.
527
+ # :return: Age in minutes or infinity if unavailable.
528
+ # """
529
+ # fs = fs or fsspec.filesystem("file")
530
+ # try:
531
+ # if not fs.exists(file_path):
532
+ # self.logger.debug(f"Path not found: {file_path}")
533
+ # return float("inf")
534
+ #
535
+ # if fs.isdir(file_path):
536
+ # return self._get_directory_age_minutes(file_path, fs, verbose=False)
537
+ # if fs.isfile(file_path):
538
+ # return self._get_file_age_minutes(file_path, fs, verbose=False)
539
+ #
540
+ # self.logger.warning(f"Invalid path type: {file_path}")
541
+ # return float("inf")
542
+ #
543
+ # except Exception as e:
544
+ # self.logger.warning(f"Error getting age for {file_path}: {str(e)}")
545
+ # return float("inf")
546
+ #
547
+ # def _get_directory_age_minutes(
548
+ # self,
549
+ # dir_path: str,
550
+ # fs: fsspec.AbstractFileSystem,
551
+ # verbose: bool,
552
+ # ) -> float:
553
+ # """Calculate age of oldest file in directory."""
554
+ # try:
555
+ # all_files = fs.ls(dir_path)
556
+ # except Exception as e:
557
+ # self.logger.warning(f"Error listing {dir_path}: {str(e)}")
558
+ # return float("inf")
559
+ #
560
+ # if not all_files:
561
+ # self.logger.debug(f"Empty directory: {dir_path}")
562
+ # return float("inf")
563
+ #
564
+ # modification_times = []
565
+ # for file in all_files:
566
+ # try:
567
+ # info = fs.info(file)
568
+ # mod_time = self._get_modification_time(info, file)
569
+ # modification_times.append(mod_time)
570
+ # except Exception as e:
571
+ # self.logger.warning(f"Skipping {file}: {str(e)}")
572
+ #
573
+ # if not modification_times:
574
+ # self.logger.warning(f"No valid files in {dir_path}")
575
+ # return float("inf")
576
+ #
577
+ # oldest = min(modification_times)
578
+ # age = (datetime.datetime.now(datetime.timezone.utc) - oldest).total_seconds() / 60
579
+ # self.logger.debug(f"Oldest in {dir_path}: {age:.2f} minutes")
580
+ #
581
+ # return age
582
+ #
583
+ # def _get_file_age_minutes(
584
+ # self,
585
+ # file_path: str,
586
+ # fs: fsspec.AbstractFileSystem,
587
+ # verbose: bool,
588
+ # ) -> float:
589
+ # """Calculate file age in minutes."""
590
+ # try:
591
+ # info = fs.info(file_path)
592
+ # mod_time = self._get_modification_time(info, file_path)
593
+ # age = (datetime.datetime.now(datetime.timezone.utc) - mod_time).total_seconds() / 60
594
+ #
595
+ # if verbose:
596
+ # self.logger.debug(f"{file_path} info: {info}")
597
+ # self.logger.debug(f"File age: {age:.2f} minutes")
598
+ #
599
+ # return age
600
+ #
601
+ # except Exception as e:
602
+ # self.logger.warning(f"Error processing {file_path}: {str(e)}")
603
+ # return float("inf")
604
+ #
605
+ # def _get_modification_time(self, info: Dict, file_path: str) -> datetime.datetime:
606
+ # """Extract modification time from filesystem info with timezone awareness."""
607
+ # try:
608
+ # if "LastModified" in info: # S3-like
609
+ # lm = info["LastModified"]
610
+ # return lm if isinstance(lm, datetime.datetime) else datetime.datetime.fromisoformat(
611
+ # lm[:-1]).astimezone()
612
+ #
613
+ # if "mtime" in info: # Local filesystem
614
+ # return datetime.datetime.fromtimestamp(info["mtime"], tz=datetime.timezone.utc)
615
+ #
616
+ # if "modified" in info: # FTP/SSH
617
+ # return datetime.datetime.strptime(
618
+ # info["modified"], "%Y-%m-%d %H:%M:%S"
619
+ # ).replace(tzinfo=datetime.timezone.utc)
620
+ #
621
+ # raise KeyError("No valid modification time key found")
622
+ #
623
+ # except (KeyError, ValueError) as e:
624
+ # self.logger.warning(f"Invalid mod time for {file_path}: {str(e)}")
625
+ # raise ValueError(f"Unsupported modification time format for {file_path}") from e
626
+ #
627
+ #
628
+ # # --- Vectorized Helper Functions ---
629
+ #
630
+ # def _vectorized_busday_count(partition, begin_col, end_col, holidays):
631
+ # """
632
+ # Calculates the number of business days between a start and end date.
633
+ # """
634
+ # # Extract the raw columns
635
+ # start_dates_raw = partition[begin_col]
636
+ # end_dates_raw = partition[end_col]
637
+ #
638
+ #
639
+ # start_dates = pd.to_datetime(start_dates_raw, errors='coerce')
640
+ # end_dates = pd.to_datetime(end_dates_raw, errors='coerce')
641
+ #
642
+ # # Initialize the result Series with NaN, as the output is a number
643
+ # result = pd.Series(np.nan, index=partition.index)
644
+ #
645
+ # # Create a mask for rows where both start and end dates are valid
646
+ # valid_mask = pd.notna(start_dates) & pd.notna(end_dates)
647
+ #
648
+ # # Perform the vectorized calculation only on the valid subset
649
+ # # Convert to NumPy arrays of date type for the calculation
650
+ # result.loc[valid_mask] = np.busday_count(
651
+ # start_dates[valid_mask].values.astype('datetime64[D]'),
652
+ # end_dates[valid_mask].values.astype('datetime64[D]'),
653
+ # holidays=holidays
654
+ # )
655
+ #
656
+ # return result
657
+ #
658
+ #
659
+ # def _vectorized_sla_end_date(partition, start_col, n_days_col, holidays):
660
+ # """
661
+ # Calculates the end date of an SLA, skipping weekends and holidays.
662
+ # """
663
+ # # Extract the relevant columns as pandas Series
664
+ # start_dates_raw = partition[start_col]
665
+ # sla_days = partition[n_days_col]
666
+ #
667
+ #
668
+ # start_dates = pd.to_datetime(start_dates_raw, errors='coerce')
669
+ #
670
+ # # Initialize the result Series with NaT (Not a Time)
671
+ # result = pd.Series(pd.NaT, index=partition.index, dtype='datetime64[ns]')
672
+ #
673
+ # # Create a mask for rows that have valid start dates and SLA days
674
+ # valid_mask = pd.notna(start_dates) & pd.notna(sla_days)
675
+ #
676
+ # # Perform the vectorized calculation only on the valid subset
677
+ # # Note: np.busday_offset requires a NumPy array, so we use .values
678
+ # result.loc[valid_mask] = np.busday_offset(
679
+ # start_dates[valid_mask].values.astype('datetime64[D]'), # Convert to numpy array of dates
680
+ # sla_days[valid_mask].astype(int), # Ensure days are integers
681
+ # roll='forward',
682
+ # holidays=holidays
683
+ # )
684
+ #
685
+ # return result
686
+ #
687
+ #
688
+ # # --- Refactored BusinessDays Class ---
689
+ #
690
+ # class BusinessDays:
691
+ # """
692
+ # Business days calculations with a custom holiday list.
693
+ # Supports scalar and efficient, vectorized Dask DataFrame operations.
694
+ # """
695
+ #
696
+ # def __init__(self, holiday_list: dict[str, list[str]], logger) -> None:
697
+ # self.logger = logger
698
+ # self.HOLIDAY_LIST = holiday_list
699
+ #
700
+ # # Flatten and store as tuple for determinism
701
+ # bd_holidays = [day for year in self.HOLIDAY_LIST for day in self.HOLIDAY_LIST[year]]
702
+ # self.holidays = tuple(bd_holidays)
703
+ #
704
+ # def get_business_days_count(
705
+ # self,
706
+ # begin_date: str | datetime.date | pd.Timestamp,
707
+ # end_date: str | datetime.date | pd.Timestamp,
708
+ # ) -> int:
709
+ # """Scalar method to count business days between two dates."""
710
+ # begin = pd.to_datetime(begin_date)
711
+ # end = pd.to_datetime(end_date)
712
+ # return int(np.busday_count(begin.date(), end.date(), holidays=list(self.holidays)))
713
+ #
714
+ # def calc_business_days_from_df(
715
+ # self,
716
+ # df: dd.DataFrame,
717
+ # begin_date_col: str,
718
+ # end_date_col: str,
719
+ # result_col: str = "business_days",
720
+ # ) -> dd.DataFrame:
721
+ # """Calculates business days between two columns in a Dask DataFrame."""
722
+ # missing = {begin_date_col, end_date_col} - set(df.columns)
723
+ # if missing:
724
+ # self.logger.error(f"Missing columns: {missing}")
725
+ # raise ValueError("Required columns are missing from DataFrame")
726
+ #
727
+ # return df.assign(
728
+ # **{result_col: df.map_partitions(
729
+ # _vectorized_busday_count,
730
+ # begin_col=begin_date_col,
731
+ # end_col=end_date_col,
732
+ # holidays=list(self.holidays),
733
+ # meta=(result_col, 'f8') # f8 is float64
734
+ # )}
735
+ # )
736
+ #
737
+ # def add_business_days(
738
+ # self,
739
+ # start_date: str | datetime.date | pd.Timestamp,
740
+ # n_days: int,
741
+ # ) -> np.datetime64:
742
+ # """Scalar method to add N business days to a start date."""
743
+ # start = pd.to_datetime(start_date)
744
+ # return np.busday_offset(
745
+ # start.date(),
746
+ # n_days,
747
+ # roll='forward',
748
+ # holidays=list(self.holidays),
749
+ # )
750
+ #
751
+ # def calc_sla_end_date(
752
+ # self,
753
+ # df: dd.DataFrame,
754
+ # start_date_col: str,
755
+ # n_days_col: str,
756
+ # result_col: str = "sla_end_date",
757
+ # ) -> dd.DataFrame:
758
+ # """Calculates an SLA end date column for a Dask DataFrame."""
759
+ # missing = {start_date_col, n_days_col} - set(df.columns)
760
+ # if missing:
761
+ # self.logger.error(f"Missing columns: {missing}")
762
+ # raise ValueError("Required columns are missing from DataFrame")
763
+ #
764
+ # return df.assign(
765
+ # **{result_col: df.map_partitions(
766
+ # _vectorized_sla_end_date,
767
+ # start_col=start_date_col,
768
+ # n_days_col=n_days_col,
769
+ # holidays=list(self.holidays),
770
+ # meta=(result_col, 'datetime64[ns]')
771
+ # )}
772
+ # )
773
+ #
774
+ # # Class enhancements
775
+ # # DateUtils.register_period('next_week', lambda: (datetime.date.today() + datetime.timedelta(days=7),
776
+ # # datetime.date.today() + datetime.timedelta(days=13)))
777
+ # # start, end = DateUtils.parse_period(period='next_week')
778
+ # # print(f"Next Week: {start} to {end}")