sibi-dst 2025.8.1__py3-none-any.whl → 2025.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,248 @@
1
+ import datetime as dt
2
+ from typing import Any, Dict, Iterable, Optional
3
+ from sibi_dst.utils import Logger
4
+ import numpy as np
5
+ import pandas as pd
6
+ import dask.dataframe as dd
7
+
8
+
9
+ # ---------------- Vectorized helpers (used by Dask map_partitions) ----------------
10
+
11
+ def _to_np_days(series: pd.Series) -> np.ndarray:
12
+ """Coerce to numpy datetime64[D] with NaT-safe conversion."""
13
+ # Use pandas for robust parsing, then cast to date-days
14
+ s = pd.to_datetime(series, errors="coerce")
15
+ # Convert to numpy datetime64[D] (day precision)
16
+ return s.values.astype("datetime64[D]")
17
+
18
+
19
+ def _vectorized_busday_count(
20
+ part: pd.DataFrame,
21
+ begin_col: str,
22
+ end_col: str,
23
+ holidays: Iterable[str],
24
+ weekmask: Optional[str],
25
+ inclusive: bool,
26
+ ) -> pd.Series:
27
+ start = _to_np_days(part[begin_col]) # numpy datetime64[D]
28
+ end = _to_np_days(part[end_col]) # numpy datetime64[D]
29
+
30
+ kwargs: Dict[str, Any] = {}
31
+ if holidays:
32
+ kwargs["holidays"] = np.array(list(holidays), dtype="datetime64[D]")
33
+ if weekmask:
34
+ kwargs["weekmask"] = weekmask
35
+
36
+ end_adj = end
37
+ if inclusive:
38
+ with np.errstate(invalid="ignore"):
39
+ end_adj = end + np.timedelta64(1, "D")
40
+
41
+ valid = (~pd.isna(start)) & (~pd.isna(end)) # numpy bool mask
42
+ result = np.full(part.shape[0], np.nan, dtype="float64")
43
+ if valid.any():
44
+ counts = np.busday_count(
45
+ start[valid].astype("datetime64[D]"),
46
+ end_adj[valid].astype("datetime64[D]"),
47
+ **kwargs,
48
+ ).astype("float64")
49
+ result[valid] = counts
50
+
51
+ return pd.Series(result, index=part.index)
52
+
53
+
54
+ def _vectorized_busday_offset(
55
+ part: pd.DataFrame,
56
+ start_col: str,
57
+ n_days_col: str,
58
+ holidays: Iterable[str],
59
+ weekmask: Optional[str],
60
+ roll: str,
61
+ ) -> pd.Series:
62
+ start = _to_np_days(part[start_col]) # numpy datetime64[D]
63
+ n_days = pd.to_numeric(part[n_days_col], errors="coerce").to_numpy() # numpy float -> cast later
64
+
65
+ kwargs: Dict[str, Any] = {"roll": roll}
66
+ if holidays:
67
+ kwargs["holidays"] = np.array(list(holidays), dtype="datetime64[D]")
68
+ if weekmask:
69
+ kwargs["weekmask"] = weekmask
70
+
71
+ valid = (~pd.isna(start)) & (~pd.isna(n_days)) # numpy bool mask
72
+ out = np.full(part.shape[0], np.datetime64("NaT", "ns"), dtype="datetime64[ns]")
73
+ if valid.any():
74
+ offs = np.busday_offset(
75
+ start[valid].astype("datetime64[D]"),
76
+ n_days[valid].astype("int64"),
77
+ **kwargs,
78
+ ).astype("datetime64[ns]")
79
+ out[valid] = offs
80
+
81
+ return pd.Series(out, index=part.index)
82
+
83
+
84
+ # ---------------- BusinessDays ----------------
85
+
86
+ class BusinessDays:
87
+ """
88
+ Business day calculations with custom holidays and optional weekmask.
89
+
90
+ Features
91
+ - Scalar helpers:
92
+ - get_business_days_count(begin, end, inclusive=False) -> int
93
+ - add_business_days(start_date, n_days, roll='forward') -> np.datetime64
94
+ - Dask DataFrame helpers (vectorized via map_partitions):
95
+ - calc_business_days_from_df(df, begin_col, end_col, result_col='business_days', inclusive=False)
96
+ - calc_sla_end_date(df, start_date_col, n_days_col, result_col='sla_end_date', roll='forward')
97
+
98
+ Parameters
99
+ ----------
100
+ holiday_list : dict[str, list[str]] | Iterable[str]
101
+ Either a mapping of year -> [YYYY-MM-DD, ...] or a flat iterable of YYYY-MM-DD strings.
102
+ logger : Any
103
+ Logger with .debug/.info/.warning/.error.
104
+ weekmask : str | None
105
+ A numpy business day weekmask like '1111100' (Mon–Fri). None means default Mon–Fri.
106
+ Examples:
107
+ '1111100' -> Mon-Fri
108
+ '1111110' -> Mon-Sat
109
+ """
110
+
111
+ def __init__(
112
+ self,
113
+ holiday_list: Dict[str, list[str]] | Iterable[str],
114
+ debug: bool = False,
115
+ logger: Optional[Logger] = None,
116
+ weekmask: Optional[str] = None,
117
+ ) -> None:
118
+ self.debug = debug
119
+ self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
120
+ self.weekmask = weekmask
121
+
122
+ # Normalize holidays to a flat, sorted tuple of 'YYYY-MM-DD'
123
+ if isinstance(holiday_list, dict):
124
+ flat = [d for _, days in sorted(holiday_list.items()) for d in days]
125
+ else:
126
+ flat = list(holiday_list)
127
+ # Deduplicate while preserving order
128
+ seen = set()
129
+ flat_unique = []
130
+ for d in flat:
131
+ if d not in seen:
132
+ seen.add(d)
133
+ flat_unique.append(d)
134
+ self.holidays: tuple[str, ...] = tuple(flat_unique)
135
+
136
+ # -------- Scalar API --------
137
+
138
+ def get_business_days_count(
139
+ self,
140
+ begin_date: str | dt.date | pd.Timestamp,
141
+ end_date: str | dt.date | pd.Timestamp,
142
+ *,
143
+ inclusive: bool = False,
144
+ ) -> int:
145
+ """Business days between two dates. If inclusive=True, include the end date."""
146
+ b = pd.to_datetime(begin_date).date()
147
+ e = pd.to_datetime(end_date).date()
148
+
149
+ kwargs: Dict[str, Any] = {}
150
+ if self.holidays:
151
+ kwargs["holidays"] = np.array(self.holidays, dtype="datetime64[D]")
152
+ if self.weekmask:
153
+ kwargs["weekmask"] = self.weekmask
154
+
155
+ if inclusive:
156
+ e_np = np.datetime64(e) + np.timedelta64(1, "D")
157
+ else:
158
+ e_np = np.datetime64(e)
159
+
160
+ val = int(np.busday_count(np.datetime64(b), e_np, **kwargs))
161
+ return val
162
+
163
+ def add_business_days(
164
+ self,
165
+ start_date: str | dt.date | pd.Timestamp,
166
+ n_days: int,
167
+ *,
168
+ roll: str = "forward",
169
+ ) -> np.datetime64:
170
+ """
171
+ Add (or subtract) business days to a date. Returns numpy datetime64[D].
172
+ roll: {'forward','backward','following','preceding','modifiedfollowing',
173
+ 'modifiedpreceding','nat'}
174
+ """
175
+ s = pd.to_datetime(start_date).date()
176
+ kwargs: Dict[str, Any] = {"roll": roll}
177
+ if self.holidays:
178
+ kwargs["holidays"] = np.array(self.holidays, dtype="datetime64[D]")
179
+ if self.weekmask:
180
+ kwargs["weekmask"] = self.weekmask
181
+
182
+ return np.busday_offset(np.datetime64(s), int(n_days), **kwargs)
183
+
184
+ # -------- Dask API --------
185
+
186
+ def calc_business_days_from_df(
187
+ self,
188
+ df: dd.DataFrame,
189
+ begin_date_col: str,
190
+ end_date_col: str,
191
+ result_col: str = "business_days",
192
+ *,
193
+ inclusive: bool = False,
194
+ ) -> dd.DataFrame:
195
+ """
196
+ Vectorized business-day difference between two date columns.
197
+ Produces float64 (NaN where either side is missing).
198
+ """
199
+ missing = {begin_date_col, end_date_col} - set(df.columns)
200
+ if missing:
201
+ self.logger.error(f"Missing columns: {missing}")
202
+ raise ValueError("Required columns are missing from DataFrame")
203
+
204
+ return df.assign(
205
+ **{
206
+ result_col: df.map_partitions(
207
+ _vectorized_busday_count,
208
+ begin_col=begin_date_col,
209
+ end_col=end_date_col,
210
+ holidays=self.holidays,
211
+ weekmask=self.weekmask,
212
+ inclusive=inclusive,
213
+ meta=(result_col, "f8"),
214
+ )
215
+ }
216
+ )
217
+
218
+ def calc_sla_end_date(
219
+ self,
220
+ df: dd.DataFrame,
221
+ start_date_col: str,
222
+ n_days_col: str,
223
+ result_col: str = "sla_end_date",
224
+ *,
225
+ roll: str = "forward",
226
+ ) -> dd.DataFrame:
227
+ """
228
+ Vectorized business-day offset for SLA end date.
229
+ Produces datetime64[ns] with NaT where invalid.
230
+ """
231
+ missing = {start_date_col, n_days_col} - set(df.columns)
232
+ if missing:
233
+ self.logger.error(f"Missing columns: {missing}")
234
+ raise ValueError("Required columns are missing from DataFrame")
235
+
236
+ return df.assign(
237
+ **{
238
+ result_col: df.map_partitions(
239
+ _vectorized_busday_offset,
240
+ start_col=start_date_col,
241
+ n_days_col=n_days_col,
242
+ holidays=self.holidays,
243
+ weekmask=self.weekmask,
244
+ roll=roll,
245
+ meta=(result_col, "datetime64[ns]"),
246
+ )
247
+ }
248
+ )