sibi-dst 2025.1.13__py3-none-any.whl → 2025.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sibi_dst/__init__.py +7 -1
- sibi_dst/df_helper/__init__.py +3 -2
- sibi_dst/df_helper/_artifact_updater_async.py +238 -0
- sibi_dst/df_helper/_artifact_updater_threaded.py +195 -0
- sibi_dst/df_helper/_df_helper.py +418 -118
- sibi_dst/df_helper/_parquet_artifact.py +275 -283
- sibi_dst/df_helper/_parquet_reader.py +9 -10
- sibi_dst/df_helper/backends/parquet/_parquet_options.py +8 -4
- sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +68 -107
- sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py +15 -0
- sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +105 -255
- sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +90 -42
- sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py +192 -0
- sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +122 -72
- sibi_dst/osmnx_helper/route_path_builder.py +45 -46
- sibi_dst/utils/__init__.py +2 -0
- sibi_dst/utils/base.py +235 -100
- sibi_dst/utils/business_days.py +248 -0
- sibi_dst/utils/clickhouse_writer.py +472 -206
- sibi_dst/utils/data_utils.py +139 -186
- sibi_dst/utils/data_wrapper.py +392 -88
- sibi_dst/utils/date_utils.py +711 -393
- sibi_dst/utils/df_utils.py +193 -213
- sibi_dst/utils/file_age_checker.py +301 -0
- sibi_dst/utils/file_utils.py +3 -2
- sibi_dst/utils/filepath_generator.py +314 -152
- sibi_dst/utils/log_utils.py +581 -242
- sibi_dst/utils/manifest_manager.py +60 -76
- sibi_dst/utils/parquet_saver.py +33 -27
- sibi_dst/utils/periods.py +42 -0
- sibi_dst/utils/phone_formatter.py +88 -95
- sibi_dst/utils/update_planner.py +180 -178
- sibi_dst/utils/webdav_client.py +116 -166
- {sibi_dst-2025.1.13.dist-info → sibi_dst-2025.8.2.dist-info}/METADATA +1 -1
- {sibi_dst-2025.1.13.dist-info → sibi_dst-2025.8.2.dist-info}/RECORD +36 -30
- sibi_dst/df_helper/_artifact_updater_multi_wrapper.py +0 -422
- {sibi_dst-2025.1.13.dist-info → sibi_dst-2025.8.2.dist-info}/WHEEL +0 -0
sibi_dst/utils/base.py
CHANGED
@@ -1,117 +1,252 @@
|
|
1
|
-
import
|
2
|
-
|
1
|
+
import abc
|
2
|
+
import threading
|
3
|
+
import weakref
|
4
|
+
from typing import Self, Optional, Callable
|
3
5
|
|
4
|
-
|
5
|
-
|
6
|
-
|
6
|
+
import fsspec
|
7
|
+
|
8
|
+
from sibi_dst.utils import Logger
|
7
9
|
|
8
|
-
It handles the creation and cleanup of these resources, ensuring they are only
|
9
|
-
closed if they were created by the instance itself.
|
10
|
-
"""
|
11
10
|
|
12
|
-
|
13
|
-
|
14
|
-
|
11
|
+
class ManagedResource(abc.ABC):
|
12
|
+
"""
|
13
|
+
Boilerplate ABC for components that manage a logger and an optional fsspec filesystem,
|
14
|
+
with sync/async lifecycle helpers, lazy FS creation via an optional factory, and
|
15
|
+
configurable cleanup-error logging.
|
16
|
+
"""
|
15
17
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
18
|
+
def __init__(
|
19
|
+
self,
|
20
|
+
*,
|
21
|
+
verbose: bool = False,
|
22
|
+
debug: bool = False,
|
23
|
+
log_cleanup_errors: bool = True,
|
24
|
+
logger: Optional[Logger] = None,
|
25
|
+
fs: Optional[fsspec.AbstractFileSystem] = None,
|
26
|
+
fs_factory: Optional[Callable[[], fsspec.AbstractFileSystem]] = None,
|
27
|
+
**_: object,
|
28
|
+
) -> None:
|
29
|
+
# ---- Declared upfront for type checkers
|
30
|
+
self.logger: Logger
|
31
|
+
self.fs: Optional[fsspec.AbstractFileSystem] = None
|
32
|
+
self._fs_factory: Optional[Callable[[], fsspec.AbstractFileSystem]] = None
|
33
|
+
self._owns_logger: bool = False
|
34
|
+
self._owns_fs: bool = False
|
35
|
+
self._is_closed: bool = False
|
36
|
+
self._closing: bool = False
|
37
|
+
self._close_lock = threading.RLock()
|
38
|
+
|
39
|
+
self.verbose = verbose
|
40
|
+
self.debug = debug
|
41
|
+
self._log_cleanup_errors = log_cleanup_errors
|
42
|
+
|
43
|
+
# ---- Logger ownership
|
44
|
+
if logger is None:
|
45
|
+
self.logger = Logger.default_logger(logger_name=self.__class__.__name__)
|
46
|
+
self._owns_logger = True
|
47
|
+
level = Logger.DEBUG if self.debug else (Logger.INFO if self.verbose else Logger.WARNING)
|
48
|
+
self.logger.set_level(level)
|
49
|
+
else:
|
20
50
|
self.logger = logger
|
21
|
-
self.
|
22
|
-
|
51
|
+
self._owns_logger = False # do not mutate external logger
|
52
|
+
|
53
|
+
# ---- FS ownership & lazy creation
|
54
|
+
if fs is not None:
|
55
|
+
self.fs = fs
|
56
|
+
self._owns_fs = False
|
57
|
+
self._fs_factory = None
|
58
|
+
elif fs_factory is not None:
|
59
|
+
# Lazy: don't create until first use
|
60
|
+
self._fs_factory = fs_factory
|
61
|
+
self._owns_fs = True # we will own it *if* created
|
62
|
+
self.fs = None
|
23
63
|
else:
|
24
|
-
|
25
|
-
self.
|
26
|
-
|
27
|
-
|
28
|
-
# Set default logger_name if not specified in the config
|
29
|
-
logger_config.setdefault("logger_name", self.__class__.__name__)
|
64
|
+
self.fs = None
|
65
|
+
self._owns_fs = False
|
66
|
+
self._fs_factory = None
|
30
67
|
|
31
|
-
|
32
|
-
|
33
|
-
|
68
|
+
# Register a GC-time finalizer that does not capture self
|
69
|
+
self_ref = weakref.ref(self)
|
70
|
+
self._finalizer = weakref.finalize(self, self._finalize_static, self_ref)
|
34
71
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
72
|
+
if self.debug:
|
73
|
+
try:
|
74
|
+
self.logger.debug("Component %s initialized. %s", self.__class__.__name__, repr(self))
|
75
|
+
except Exception:
|
76
|
+
pass
|
39
77
|
|
40
|
-
|
41
|
-
|
42
|
-
|
78
|
+
# ---------- Introspection ----------
|
79
|
+
@property
|
80
|
+
def is_closed(self) -> bool:
|
81
|
+
return self._is_closed
|
43
82
|
|
44
|
-
|
83
|
+
@property
|
84
|
+
def closed(self) -> bool: # alias
|
85
|
+
return self._is_closed
|
45
86
|
|
46
|
-
def
|
47
|
-
|
48
|
-
self.
|
87
|
+
def __repr__(self) -> str:
|
88
|
+
class_name = self.__class__.__name__
|
89
|
+
logger_status = "own" if self._owns_logger else "external"
|
90
|
+
if self.fs is None and self._fs_factory is not None:
|
91
|
+
fs_status = "own(lazy)"
|
92
|
+
elif self.fs is None:
|
93
|
+
fs_status = "none"
|
94
|
+
else:
|
95
|
+
fs_status = "own" if self._owns_fs else "external"
|
96
|
+
return (f"<{class_name} debug={self.debug} verbose={self.verbose} "
|
97
|
+
f"log_cleanup_errors={self._log_cleanup_errors} "
|
98
|
+
f"logger={logger_status} fs={fs_status}>")
|
99
|
+
|
100
|
+
# ---------- Subclass hooks ----------
|
101
|
+
def _cleanup(self) -> None:
|
102
|
+
"""Sync cleanup for resources created BY THE SUBCLASS."""
|
103
|
+
return
|
104
|
+
|
105
|
+
async def _acleanup(self) -> None:
|
106
|
+
"""Async cleanup for resources created BY THE SUBCLASS."""
|
107
|
+
return
|
108
|
+
|
109
|
+
# ---------- FS helpers ----------
|
110
|
+
def _ensure_fs(self) -> Optional[fsspec.AbstractFileSystem]:
|
111
|
+
"""Create the FS lazily if a factory was provided. Return fs (or None)."""
|
112
|
+
if self.fs is None and self._fs_factory is not None:
|
113
|
+
created = self._fs_factory()
|
114
|
+
if not isinstance(created, fsspec.AbstractFileSystem):
|
115
|
+
raise TypeError(f"fs_factory() must return fsspec.AbstractFileSystem, got {type(created)!r}")
|
116
|
+
self.fs = created
|
117
|
+
# _owns_fs already True when factory is present
|
118
|
+
return self.fs
|
119
|
+
|
120
|
+
def require_fs(self) -> fsspec.AbstractFileSystem:
|
121
|
+
"""Return a filesystem or raise if not configured/creatable."""
|
122
|
+
fs = self._ensure_fs()
|
123
|
+
if fs is None:
|
124
|
+
raise RuntimeError(
|
125
|
+
f"{self.__class__.__name__}: filesystem is required but not configured"
|
126
|
+
)
|
127
|
+
return fs
|
128
|
+
|
129
|
+
# ---------- Shared shutdown helpers (no logging; safe for late shutdown) ----------
|
130
|
+
def _release_owned_fs(self) -> None:
|
131
|
+
if self._owns_fs:
|
132
|
+
# ensure creation state is respected even if never used
|
133
|
+
_ = self.fs or None # no-op; if never created, nothing to close
|
134
|
+
if self.fs is not None:
|
135
|
+
close = getattr(self.fs, "close", None)
|
136
|
+
try:
|
137
|
+
if callable(close):
|
138
|
+
close()
|
139
|
+
finally:
|
140
|
+
self.fs = None
|
141
|
+
|
142
|
+
def _shutdown_logger(self) -> None:
|
143
|
+
if self._owns_logger:
|
144
|
+
try:
|
145
|
+
self.logger.shutdown()
|
146
|
+
except Exception:
|
147
|
+
pass
|
148
|
+
|
149
|
+
def _shutdown_owned_resources(self) -> None:
|
150
|
+
self._release_owned_fs()
|
151
|
+
self._shutdown_logger()
|
152
|
+
|
153
|
+
# ---------- Public lifecycle (sync) ----------
|
154
|
+
def close(self) -> None:
|
155
|
+
with self._close_lock:
|
156
|
+
if self._is_closed or self._closing:
|
157
|
+
return
|
158
|
+
self._closing = True
|
159
|
+
|
160
|
+
try:
|
161
|
+
self._cleanup()
|
162
|
+
except Exception:
|
163
|
+
# Only include traceback when debug=True
|
164
|
+
if self._log_cleanup_errors:
|
165
|
+
try:
|
166
|
+
self.logger.error(
|
167
|
+
"Error during %s._cleanup()", self.__class__.__name__,
|
168
|
+
exc_info=self.debug
|
169
|
+
)
|
170
|
+
except Exception:
|
171
|
+
pass
|
172
|
+
raise
|
173
|
+
finally:
|
174
|
+
with self._close_lock:
|
175
|
+
self._is_closed = True
|
176
|
+
self._closing = False
|
177
|
+
self._shutdown_owned_resources()
|
178
|
+
if self.debug:
|
179
|
+
try:
|
180
|
+
self.logger.debug("Component %s closed.", self.__class__.__name__)
|
181
|
+
except Exception:
|
182
|
+
pass
|
183
|
+
|
184
|
+
# ---------- Public lifecycle (async) ----------
|
185
|
+
async def aclose(self) -> None:
|
186
|
+
with self._close_lock:
|
187
|
+
if self._is_closed or self._closing:
|
188
|
+
return
|
189
|
+
self._closing = True
|
190
|
+
|
191
|
+
try:
|
192
|
+
await self._acleanup()
|
193
|
+
except Exception:
|
194
|
+
# Only include traceback when debug=True
|
195
|
+
if self._log_cleanup_errors:
|
196
|
+
try:
|
197
|
+
self.logger.error(
|
198
|
+
"Error during %s._acleanup()", self.__class__.__name__,
|
199
|
+
exc_info=self.debug
|
200
|
+
)
|
201
|
+
except Exception:
|
202
|
+
pass
|
203
|
+
raise
|
204
|
+
finally:
|
205
|
+
with self._close_lock:
|
206
|
+
self._is_closed = True
|
207
|
+
self._closing = False
|
208
|
+
self._shutdown_owned_resources()
|
209
|
+
if self.debug:
|
210
|
+
try:
|
211
|
+
self.logger.debug("Async component %s closed.", self.__class__.__name__)
|
212
|
+
except Exception:
|
213
|
+
pass
|
214
|
+
|
215
|
+
# ---------- Context managers ----------
|
216
|
+
def __enter__(self) -> Self:
|
49
217
|
return self
|
50
218
|
|
51
|
-
def __exit__(self, exc_type,
|
52
|
-
|
53
|
-
|
54
|
-
return False # Propagate exceptions
|
55
|
-
|
56
|
-
# --- Asynchronous Context Management ---
|
219
|
+
def __exit__(self, exc_type, exc, tb) -> bool:
|
220
|
+
self.close()
|
221
|
+
return False # propagate exceptions
|
57
222
|
|
58
|
-
async def __aenter__(self):
|
59
|
-
"""Enter the runtime context for 'async with' statements."""
|
60
|
-
self._entered = True
|
223
|
+
async def __aenter__(self) -> Self:
|
61
224
|
return self
|
62
225
|
|
63
|
-
async def __aexit__(self, exc_type,
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
#
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
self.fs.clear_instance_cache()
|
90
|
-
|
91
|
-
if self._own_logger and hasattr(self.logger, "shutdown"):
|
92
|
-
# Ensure the logger exists before trying to use or shut it down
|
93
|
-
if self.logger:
|
94
|
-
self.logger.debug(f"'{self.__class__.__name__}' is shutting down its own logger.")
|
95
|
-
self.logger.shutdown()
|
96
|
-
self.logger = None # Set to None after shutdown
|
97
|
-
|
98
|
-
self._entered = False
|
99
|
-
|
100
|
-
async def acleanup(self):
|
101
|
-
"""
|
102
|
-
Async Cleanup resources managed by this instance.
|
103
|
-
"""
|
104
|
-
if self._own_fs and hasattr(self.fs, "clear_instance_cache"):
|
105
|
-
if self.logger:
|
106
|
-
self.logger.debug(f"'{self.__class__.__name__}' is clearing its own filesystem cache.")
|
107
|
-
self.fs.clear_instance_cache()
|
108
|
-
|
109
|
-
if self._own_logger and hasattr(self.logger, "shutdown"):
|
110
|
-
# Ensure the logger exists before trying to use or shut it down
|
111
|
-
if self.logger:
|
112
|
-
self.logger.debug(f"'{self.__class__.__name__}' is shutting down its own logger.")
|
113
|
-
self.logger.shutdown()
|
114
|
-
self.logger = None # Set to None after shutdown
|
115
|
-
|
116
|
-
self._entered = False
|
226
|
+
async def __aexit__(self, exc_type, exc, tb) -> bool:
|
227
|
+
await self.aclose()
|
228
|
+
return False
|
229
|
+
|
230
|
+
# ---------- Finalizer ( at Garbage Collection-time absolutely silent) ----------
|
231
|
+
@staticmethod
|
232
|
+
def _finalize_static(ref: "weakref.ReferenceType[ManagedResource]") -> None:
|
233
|
+
obj = ref()
|
234
|
+
if obj is None:
|
235
|
+
return
|
236
|
+
# No logging here; interpreter may be tearing down.
|
237
|
+
# Best-effort silent cleanup; avoid locks and context managers.
|
238
|
+
try:
|
239
|
+
if not obj._is_closed:
|
240
|
+
try:
|
241
|
+
obj._cleanup()
|
242
|
+
except Exception:
|
243
|
+
pass
|
244
|
+
obj._is_closed = True
|
245
|
+
try:
|
246
|
+
obj._shutdown_owned_resources()
|
247
|
+
except Exception:
|
248
|
+
pass
|
249
|
+
except Exception:
|
250
|
+
# do not show anything at garbage collection time
|
251
|
+
pass
|
117
252
|
|
@@ -0,0 +1,248 @@
|
|
1
|
+
import datetime as dt
|
2
|
+
from typing import Any, Dict, Iterable, Optional
|
3
|
+
from sibi_dst.utils import Logger
|
4
|
+
import numpy as np
|
5
|
+
import pandas as pd
|
6
|
+
import dask.dataframe as dd
|
7
|
+
|
8
|
+
|
9
|
+
# ---------------- Vectorized helpers (used by Dask map_partitions) ----------------
|
10
|
+
|
11
|
+
def _to_np_days(series: pd.Series) -> np.ndarray:
|
12
|
+
"""Coerce to numpy datetime64[D] with NaT-safe conversion."""
|
13
|
+
# Use pandas for robust parsing, then cast to date-days
|
14
|
+
s = pd.to_datetime(series, errors="coerce")
|
15
|
+
# Convert to numpy datetime64[D] (day precision)
|
16
|
+
return s.values.astype("datetime64[D]")
|
17
|
+
|
18
|
+
|
19
|
+
def _vectorized_busday_count(
|
20
|
+
part: pd.DataFrame,
|
21
|
+
begin_col: str,
|
22
|
+
end_col: str,
|
23
|
+
holidays: Iterable[str],
|
24
|
+
weekmask: Optional[str],
|
25
|
+
inclusive: bool,
|
26
|
+
) -> pd.Series:
|
27
|
+
start = _to_np_days(part[begin_col]) # numpy datetime64[D]
|
28
|
+
end = _to_np_days(part[end_col]) # numpy datetime64[D]
|
29
|
+
|
30
|
+
kwargs: Dict[str, Any] = {}
|
31
|
+
if holidays:
|
32
|
+
kwargs["holidays"] = np.array(list(holidays), dtype="datetime64[D]")
|
33
|
+
if weekmask:
|
34
|
+
kwargs["weekmask"] = weekmask
|
35
|
+
|
36
|
+
end_adj = end
|
37
|
+
if inclusive:
|
38
|
+
with np.errstate(invalid="ignore"):
|
39
|
+
end_adj = end + np.timedelta64(1, "D")
|
40
|
+
|
41
|
+
valid = (~pd.isna(start)) & (~pd.isna(end)) # numpy bool mask
|
42
|
+
result = np.full(part.shape[0], np.nan, dtype="float64")
|
43
|
+
if valid.any():
|
44
|
+
counts = np.busday_count(
|
45
|
+
start[valid].astype("datetime64[D]"),
|
46
|
+
end_adj[valid].astype("datetime64[D]"),
|
47
|
+
**kwargs,
|
48
|
+
).astype("float64")
|
49
|
+
result[valid] = counts
|
50
|
+
|
51
|
+
return pd.Series(result, index=part.index)
|
52
|
+
|
53
|
+
|
54
|
+
def _vectorized_busday_offset(
|
55
|
+
part: pd.DataFrame,
|
56
|
+
start_col: str,
|
57
|
+
n_days_col: str,
|
58
|
+
holidays: Iterable[str],
|
59
|
+
weekmask: Optional[str],
|
60
|
+
roll: str,
|
61
|
+
) -> pd.Series:
|
62
|
+
start = _to_np_days(part[start_col]) # numpy datetime64[D]
|
63
|
+
n_days = pd.to_numeric(part[n_days_col], errors="coerce").to_numpy() # numpy float -> cast later
|
64
|
+
|
65
|
+
kwargs: Dict[str, Any] = {"roll": roll}
|
66
|
+
if holidays:
|
67
|
+
kwargs["holidays"] = np.array(list(holidays), dtype="datetime64[D]")
|
68
|
+
if weekmask:
|
69
|
+
kwargs["weekmask"] = weekmask
|
70
|
+
|
71
|
+
valid = (~pd.isna(start)) & (~pd.isna(n_days)) # numpy bool mask
|
72
|
+
out = np.full(part.shape[0], np.datetime64("NaT", "ns"), dtype="datetime64[ns]")
|
73
|
+
if valid.any():
|
74
|
+
offs = np.busday_offset(
|
75
|
+
start[valid].astype("datetime64[D]"),
|
76
|
+
n_days[valid].astype("int64"),
|
77
|
+
**kwargs,
|
78
|
+
).astype("datetime64[ns]")
|
79
|
+
out[valid] = offs
|
80
|
+
|
81
|
+
return pd.Series(out, index=part.index)
|
82
|
+
|
83
|
+
|
84
|
+
# ---------------- BusinessDays ----------------
|
85
|
+
|
86
|
+
class BusinessDays:
|
87
|
+
"""
|
88
|
+
Business day calculations with custom holidays and optional weekmask.
|
89
|
+
|
90
|
+
Features
|
91
|
+
- Scalar helpers:
|
92
|
+
- get_business_days_count(begin, end, inclusive=False) -> int
|
93
|
+
- add_business_days(start_date, n_days, roll='forward') -> np.datetime64
|
94
|
+
- Dask DataFrame helpers (vectorized via map_partitions):
|
95
|
+
- calc_business_days_from_df(df, begin_col, end_col, result_col='business_days', inclusive=False)
|
96
|
+
- calc_sla_end_date(df, start_date_col, n_days_col, result_col='sla_end_date', roll='forward')
|
97
|
+
|
98
|
+
Parameters
|
99
|
+
----------
|
100
|
+
holiday_list : dict[str, list[str]] | Iterable[str]
|
101
|
+
Either a mapping of year -> [YYYY-MM-DD, ...] or a flat iterable of YYYY-MM-DD strings.
|
102
|
+
logger : Any
|
103
|
+
Logger with .debug/.info/.warning/.error.
|
104
|
+
weekmask : str | None
|
105
|
+
A numpy business day weekmask like '1111100' (Mon–Fri). None means default Mon–Fri.
|
106
|
+
Examples:
|
107
|
+
'1111100' -> Mon-Fri
|
108
|
+
'1111110' -> Mon-Sat
|
109
|
+
"""
|
110
|
+
|
111
|
+
def __init__(
|
112
|
+
self,
|
113
|
+
holiday_list: Dict[str, list[str]] | Iterable[str],
|
114
|
+
debug: bool = False,
|
115
|
+
logger: Optional[Logger] = None,
|
116
|
+
weekmask: Optional[str] = None,
|
117
|
+
) -> None:
|
118
|
+
self.debug = debug
|
119
|
+
self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
|
120
|
+
self.weekmask = weekmask
|
121
|
+
|
122
|
+
# Normalize holidays to a flat, sorted tuple of 'YYYY-MM-DD'
|
123
|
+
if isinstance(holiday_list, dict):
|
124
|
+
flat = [d for _, days in sorted(holiday_list.items()) for d in days]
|
125
|
+
else:
|
126
|
+
flat = list(holiday_list)
|
127
|
+
# Deduplicate while preserving order
|
128
|
+
seen = set()
|
129
|
+
flat_unique = []
|
130
|
+
for d in flat:
|
131
|
+
if d not in seen:
|
132
|
+
seen.add(d)
|
133
|
+
flat_unique.append(d)
|
134
|
+
self.holidays: tuple[str, ...] = tuple(flat_unique)
|
135
|
+
|
136
|
+
# -------- Scalar API --------
|
137
|
+
|
138
|
+
def get_business_days_count(
|
139
|
+
self,
|
140
|
+
begin_date: str | dt.date | pd.Timestamp,
|
141
|
+
end_date: str | dt.date | pd.Timestamp,
|
142
|
+
*,
|
143
|
+
inclusive: bool = False,
|
144
|
+
) -> int:
|
145
|
+
"""Business days between two dates. If inclusive=True, include the end date."""
|
146
|
+
b = pd.to_datetime(begin_date).date()
|
147
|
+
e = pd.to_datetime(end_date).date()
|
148
|
+
|
149
|
+
kwargs: Dict[str, Any] = {}
|
150
|
+
if self.holidays:
|
151
|
+
kwargs["holidays"] = np.array(self.holidays, dtype="datetime64[D]")
|
152
|
+
if self.weekmask:
|
153
|
+
kwargs["weekmask"] = self.weekmask
|
154
|
+
|
155
|
+
if inclusive:
|
156
|
+
e_np = np.datetime64(e) + np.timedelta64(1, "D")
|
157
|
+
else:
|
158
|
+
e_np = np.datetime64(e)
|
159
|
+
|
160
|
+
val = int(np.busday_count(np.datetime64(b), e_np, **kwargs))
|
161
|
+
return val
|
162
|
+
|
163
|
+
def add_business_days(
|
164
|
+
self,
|
165
|
+
start_date: str | dt.date | pd.Timestamp,
|
166
|
+
n_days: int,
|
167
|
+
*,
|
168
|
+
roll: str = "forward",
|
169
|
+
) -> np.datetime64:
|
170
|
+
"""
|
171
|
+
Add (or subtract) business days to a date. Returns numpy datetime64[D].
|
172
|
+
roll: {'forward','backward','following','preceding','modifiedfollowing',
|
173
|
+
'modifiedpreceding','nat'}
|
174
|
+
"""
|
175
|
+
s = pd.to_datetime(start_date).date()
|
176
|
+
kwargs: Dict[str, Any] = {"roll": roll}
|
177
|
+
if self.holidays:
|
178
|
+
kwargs["holidays"] = np.array(self.holidays, dtype="datetime64[D]")
|
179
|
+
if self.weekmask:
|
180
|
+
kwargs["weekmask"] = self.weekmask
|
181
|
+
|
182
|
+
return np.busday_offset(np.datetime64(s), int(n_days), **kwargs)
|
183
|
+
|
184
|
+
# -------- Dask API --------
|
185
|
+
|
186
|
+
def calc_business_days_from_df(
|
187
|
+
self,
|
188
|
+
df: dd.DataFrame,
|
189
|
+
begin_date_col: str,
|
190
|
+
end_date_col: str,
|
191
|
+
result_col: str = "business_days",
|
192
|
+
*,
|
193
|
+
inclusive: bool = False,
|
194
|
+
) -> dd.DataFrame:
|
195
|
+
"""
|
196
|
+
Vectorized business-day difference between two date columns.
|
197
|
+
Produces float64 (NaN where either side is missing).
|
198
|
+
"""
|
199
|
+
missing = {begin_date_col, end_date_col} - set(df.columns)
|
200
|
+
if missing:
|
201
|
+
self.logger.error(f"Missing columns: {missing}")
|
202
|
+
raise ValueError("Required columns are missing from DataFrame")
|
203
|
+
|
204
|
+
return df.assign(
|
205
|
+
**{
|
206
|
+
result_col: df.map_partitions(
|
207
|
+
_vectorized_busday_count,
|
208
|
+
begin_col=begin_date_col,
|
209
|
+
end_col=end_date_col,
|
210
|
+
holidays=self.holidays,
|
211
|
+
weekmask=self.weekmask,
|
212
|
+
inclusive=inclusive,
|
213
|
+
meta=(result_col, "f8"),
|
214
|
+
)
|
215
|
+
}
|
216
|
+
)
|
217
|
+
|
218
|
+
def calc_sla_end_date(
|
219
|
+
self,
|
220
|
+
df: dd.DataFrame,
|
221
|
+
start_date_col: str,
|
222
|
+
n_days_col: str,
|
223
|
+
result_col: str = "sla_end_date",
|
224
|
+
*,
|
225
|
+
roll: str = "forward",
|
226
|
+
) -> dd.DataFrame:
|
227
|
+
"""
|
228
|
+
Vectorized business-day offset for SLA end date.
|
229
|
+
Produces datetime64[ns] with NaT where invalid.
|
230
|
+
"""
|
231
|
+
missing = {start_date_col, n_days_col} - set(df.columns)
|
232
|
+
if missing:
|
233
|
+
self.logger.error(f"Missing columns: {missing}")
|
234
|
+
raise ValueError("Required columns are missing from DataFrame")
|
235
|
+
|
236
|
+
return df.assign(
|
237
|
+
**{
|
238
|
+
result_col: df.map_partitions(
|
239
|
+
_vectorized_busday_offset,
|
240
|
+
start_col=start_date_col,
|
241
|
+
n_days_col=n_days_col,
|
242
|
+
holidays=self.holidays,
|
243
|
+
weekmask=self.weekmask,
|
244
|
+
roll=roll,
|
245
|
+
meta=(result_col, "datetime64[ns]"),
|
246
|
+
)
|
247
|
+
}
|
248
|
+
)
|