sibi-dst 2025.1.10__tar.gz → 2025.1.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/PKG-INFO +1 -1
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/pyproject.toml +1 -1
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/_df_helper.py +3 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +2 -4
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/utils/date_utils.py +132 -132
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/backends/sqlmodel/_io_dask.py +4 -3
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/README.md +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/__init__.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/__init__.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/_artifact_updater_multi_wrapper.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/_parquet_artifact.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/_parquet_reader.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/backends/__init__.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/backends/http/__init__.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/backends/http/_http_config.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/backends/parquet/__init__.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/backends/parquet/_filter_handler.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/backends/parquet/_parquet_options.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/backends/sqlalchemy/__init__.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/core/__init__.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/core/_defaults.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/core/_filter_handler.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/core/_params_config.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/core/_query_config.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/data_cleaner.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/geopy_helper/__init__.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/geopy_helper/geo_location_service.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/geopy_helper/utils.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/osmnx_helper/__init__.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/osmnx_helper/base_osm_map.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/osmnx_helper/basemaps/__init__.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/osmnx_helper/basemaps/calendar_html.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/osmnx_helper/basemaps/router_plotter.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/osmnx_helper/utils.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/tests/__init__.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/tests/test_data_wrapper_class.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/utils/__init__.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/utils/base.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/utils/clickhouse_writer.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/utils/credentials.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/utils/data_from_http_source.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/utils/data_utils.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/utils/data_wrapper.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/utils/df_utils.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/utils/file_utils.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/utils/filepath_generator.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/utils/log_utils.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/utils/manifest_manager.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/utils/parquet_saver.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/utils/phone_formatter.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/utils/storage_config.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/utils/storage_manager.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/utils/update_planner.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/utils/webdav_client.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/__init__.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/__init__.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/_df_helper.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/backends/__init__.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/backends/sqlalchemy/_model_builder.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/backends/sqlmodel/__init__.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/core/__init__.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/core/_filter_handler.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/core/_params_config.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/core/_query_config.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/utils/__init__.py +0 -0
- {sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/utils/log_utils.py +0 -0
@@ -68,6 +68,9 @@ class ParquetBackend(BaseBackend):
|
|
68
68
|
df = self.helper.backend_parquet.load_files()
|
69
69
|
if options and df is not None:
|
70
70
|
df = FilterHandler('dask', logger=self.logger, debug=False).apply_filters(df, filters=options)
|
71
|
+
|
72
|
+
df = df.persist()
|
73
|
+
|
71
74
|
self.total_records = len(df) or -1 # If df is empty, set total_records to -1
|
72
75
|
return self.total_records, df
|
73
76
|
except Exception as e:
|
{sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py
RENAMED
@@ -15,7 +15,7 @@ from sqlalchemy.engine import url as sqlalchemy_url
|
|
15
15
|
from sqlalchemy.engine import Engine
|
16
16
|
from sqlalchemy.exc import OperationalError, SQLAlchemyError
|
17
17
|
from sqlalchemy.orm import sessionmaker, Session
|
18
|
-
from sqlalchemy.pool import QueuePool, NullPool, StaticPool
|
18
|
+
from sqlalchemy.pool import QueuePool, NullPool, StaticPool, Pool
|
19
19
|
|
20
20
|
# Assuming these are your project's internal modules
|
21
21
|
from sibi_dst.utils import Logger
|
@@ -54,7 +54,7 @@ class SqlAlchemyConnectionConfig(BaseModel):
|
|
54
54
|
pool_timeout: int = int(os.environ.get("DB_POOL_TIMEOUT", 30))
|
55
55
|
pool_recycle: int = int(os.environ.get("DB_POOL_RECYCLE", 1800))
|
56
56
|
pool_pre_ping: bool = True
|
57
|
-
poolclass: Type[
|
57
|
+
poolclass: Type[Pool] = QueuePool
|
58
58
|
|
59
59
|
# --- Internal & Runtime State ---
|
60
60
|
model: Optional[Type[Any]] = None
|
@@ -195,7 +195,6 @@ class SqlAlchemyConnectionConfig(BaseModel):
|
|
195
195
|
wrapper = self._engine_registry.get(self._engine_key_instance)
|
196
196
|
if wrapper:
|
197
197
|
wrapper['active_connections'] += 1
|
198
|
-
# self.logger.debug(f"Connection checked out. Active: {self.active_connections}")
|
199
198
|
|
200
199
|
def _on_checkin(self, *args) -> None:
|
201
200
|
"""Event listener for when a connection is returned to the pool."""
|
@@ -203,7 +202,6 @@ class SqlAlchemyConnectionConfig(BaseModel):
|
|
203
202
|
wrapper = self._engine_registry.get(self._engine_key_instance)
|
204
203
|
if wrapper:
|
205
204
|
wrapper['active_connections'] = max(0, wrapper['active_connections'] - 1)
|
206
|
-
# self.logger.debug(f"Connection checked in. Active: {self.active_connections}")
|
207
205
|
|
208
206
|
@property
|
209
207
|
def active_connections(self) -> int:
|
@@ -1,10 +1,12 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
1
3
|
import datetime
|
2
4
|
from typing import Union, Tuple, Callable, Dict, Optional
|
3
5
|
|
4
6
|
import fsspec
|
5
7
|
import numpy as np
|
6
8
|
import pandas as pd
|
7
|
-
|
9
|
+
import dask.dataframe as dd
|
8
10
|
from .log_utils import Logger
|
9
11
|
|
10
12
|
|
@@ -305,154 +307,152 @@ class FileAgeChecker:
|
|
305
307
|
raise ValueError(f"Unsupported modification time format for {file_path}") from e
|
306
308
|
|
307
309
|
|
308
|
-
|
310
|
+
# --- Vectorized Helper Functions ---
|
311
|
+
|
312
|
+
def _vectorized_busday_count(partition, begin_col, end_col, holidays):
|
309
313
|
"""
|
310
|
-
|
311
|
-
holiday list. The class includes methods for calculating the number of
|
312
|
-
business days, modifying dates by adding business days, and applying these
|
313
|
-
operations to Dask DataFrames.
|
314
|
-
|
315
|
-
:ivar logger: Logger instance for logging error, warning, and debug messages.
|
316
|
-
:type logger: logging.Logger
|
317
|
-
:ivar HOLIDAY_LIST: Dictionary mapping years to lists of holiday dates.
|
318
|
-
:type HOLIDAY_LIST: dict
|
319
|
-
:ivar bd_cal: Numpy busdaycalendar object containing holidays and week mask.
|
320
|
-
:type bd_cal: numpy.busdaycalendar
|
321
|
-
:ivar holidays: Array of holiday dates used by the business day calendar.
|
322
|
-
:type holidays: numpy.ndarray
|
323
|
-
:ivar week_mask: Boolean array indicating working days within a week.
|
324
|
-
:type week_mask: numpy.ndarray
|
314
|
+
Calculates the number of business days between a start and end date.
|
325
315
|
"""
|
316
|
+
# Extract the raw columns
|
317
|
+
start_dates_raw = partition[begin_col]
|
318
|
+
end_dates_raw = partition[end_col]
|
326
319
|
|
327
|
-
def __init__(self, holiday_list, logger):
|
328
|
-
"""
|
329
|
-
Initialize a BusinessDays object with a given holiday list.
|
330
|
-
"""
|
331
|
-
self.logger = logger
|
332
|
-
self.HOLIDAY_LIST = holiday_list
|
333
|
-
bd_holidays = [day for year in self.HOLIDAY_LIST for day in self.HOLIDAY_LIST[year]]
|
334
|
-
self.bd_cal = np.busdaycalendar(holidays=bd_holidays, weekmask="1111100")
|
335
|
-
self.holidays = self.bd_cal.holidays
|
336
|
-
self.week_mask = self.bd_cal.weekmask
|
337
320
|
|
338
|
-
|
339
|
-
|
340
|
-
Calculate the number of business days between two dates.
|
341
|
-
"""
|
342
|
-
try:
|
343
|
-
begin_date = pd.to_datetime(begin_date)
|
344
|
-
end_date = pd.to_datetime(end_date)
|
345
|
-
except Exception as e:
|
346
|
-
raise ValueError(f"Invalid date format: {e}")
|
321
|
+
start_dates = pd.to_datetime(start_dates_raw, errors='coerce')
|
322
|
+
end_dates = pd.to_datetime(end_dates_raw, errors='coerce')
|
347
323
|
|
348
|
-
|
349
|
-
|
350
|
-
raise ValueError("Not all years in date range are in the holiday list")
|
324
|
+
# Initialize the result Series with NaN, as the output is a number
|
325
|
+
result = pd.Series(np.nan, index=partition.index)
|
351
326
|
|
352
|
-
|
353
|
-
|
354
|
-
end_date.strftime("%Y-%m-%d"),
|
355
|
-
busdaycal=self.bd_cal,
|
356
|
-
)
|
327
|
+
# Create a mask for rows where both start and end dates are valid
|
328
|
+
valid_mask = pd.notna(start_dates) & pd.notna(end_dates)
|
357
329
|
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
# Extract holidays and weekmask to recreate the busdaycalendar
|
367
|
-
holidays = self.bd_cal.holidays
|
368
|
-
weekmask = self.bd_cal.weekmask
|
369
|
-
|
370
|
-
# Define a function to calculate business days
|
371
|
-
def calculate_business_days(row, holidays, weekmask):
|
372
|
-
begin_date = pd.to_datetime(row[begin_date_col])
|
373
|
-
end_date = pd.to_datetime(row[end_date_col])
|
374
|
-
if pd.isna(begin_date) or pd.isna(end_date):
|
375
|
-
return np.nan
|
376
|
-
busdaycal = np.busdaycalendar(holidays=holidays, weekmask=weekmask)
|
377
|
-
return np.busday_count(
|
378
|
-
begin_date.strftime("%Y-%m-%d"),
|
379
|
-
end_date.strftime("%Y-%m-%d"),
|
380
|
-
busdaycal=busdaycal,
|
381
|
-
)
|
382
|
-
|
383
|
-
# Define a wrapper function for partition-wise operations
|
384
|
-
def apply_business_days(partition, holidays, weekmask):
|
385
|
-
return partition.apply(
|
386
|
-
calculate_business_days, axis=1, holidays=holidays, weekmask=weekmask
|
387
|
-
)
|
388
|
-
|
389
|
-
# Apply the function using map_partitions
|
390
|
-
df[result_col] = df.map_partitions(
|
391
|
-
apply_business_days,
|
392
|
-
holidays,
|
393
|
-
weekmask,
|
394
|
-
meta=(result_col, "int64"),
|
395
|
-
)
|
330
|
+
# Perform the vectorized calculation only on the valid subset
|
331
|
+
# Convert to NumPy arrays of date type for the calculation
|
332
|
+
result.loc[valid_mask] = np.busday_count(
|
333
|
+
start_dates[valid_mask].values.astype('datetime64[D]'),
|
334
|
+
end_dates[valid_mask].values.astype('datetime64[D]'),
|
335
|
+
holidays=holidays
|
336
|
+
)
|
396
337
|
|
397
|
-
|
338
|
+
return result
|
339
|
+
|
340
|
+
|
341
|
+
def _vectorized_sla_end_date(partition, start_col, n_days_col, holidays):
|
342
|
+
"""
|
343
|
+
Calculates the end date of an SLA, skipping weekends and holidays.
|
344
|
+
"""
|
345
|
+
# Extract the relevant columns as pandas Series
|
346
|
+
start_dates_raw = partition[start_col]
|
347
|
+
sla_days = partition[n_days_col]
|
398
348
|
|
399
|
-
def add_business_days(self, start_date, n_days):
|
400
|
-
"""
|
401
|
-
Add n_days business days to start_date.
|
402
|
-
"""
|
403
|
-
try:
|
404
|
-
start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d")
|
405
|
-
except ValueError:
|
406
|
-
raise ValueError("Date should be a string in the format YYYY-MM-DD")
|
407
349
|
|
408
|
-
|
409
|
-
self.logger.warning(f"Year {start_date.year} is not in the holiday list")
|
350
|
+
start_dates = pd.to_datetime(start_dates_raw, errors='coerce')
|
410
351
|
|
352
|
+
# Initialize the result Series with NaT (Not a Time)
|
353
|
+
result = pd.Series(pd.NaT, index=partition.index, dtype='datetime64[ns]')
|
354
|
+
|
355
|
+
# Create a mask for rows that have valid start dates and SLA days
|
356
|
+
valid_mask = pd.notna(start_dates) & pd.notna(sla_days)
|
357
|
+
|
358
|
+
# Perform the vectorized calculation only on the valid subset
|
359
|
+
# Note: np.busday_offset requires a NumPy array, so we use .values
|
360
|
+
result.loc[valid_mask] = np.busday_offset(
|
361
|
+
start_dates[valid_mask].values.astype('datetime64[D]'), # Convert to numpy array of dates
|
362
|
+
sla_days[valid_mask].astype(int), # Ensure days are integers
|
363
|
+
roll='forward',
|
364
|
+
holidays=holidays
|
365
|
+
)
|
366
|
+
|
367
|
+
return result
|
368
|
+
|
369
|
+
|
370
|
+
# --- Refactored BusinessDays Class ---
|
371
|
+
|
372
|
+
class BusinessDays:
|
373
|
+
"""
|
374
|
+
Business days calculations with a custom holiday list.
|
375
|
+
Supports scalar and efficient, vectorized Dask DataFrame operations.
|
376
|
+
"""
|
377
|
+
|
378
|
+
def __init__(self, holiday_list: dict[str, list[str]], logger) -> None:
|
379
|
+
self.logger = logger
|
380
|
+
self.HOLIDAY_LIST = holiday_list
|
381
|
+
|
382
|
+
# Flatten and store as tuple for determinism
|
383
|
+
bd_holidays = [day for year in self.HOLIDAY_LIST for day in self.HOLIDAY_LIST[year]]
|
384
|
+
self.holidays = tuple(bd_holidays)
|
385
|
+
|
386
|
+
def get_business_days_count(
|
387
|
+
self,
|
388
|
+
begin_date: str | datetime.date | pd.Timestamp,
|
389
|
+
end_date: str | datetime.date | pd.Timestamp,
|
390
|
+
) -> int:
|
391
|
+
"""Scalar method to count business days between two dates."""
|
392
|
+
begin = pd.to_datetime(begin_date)
|
393
|
+
end = pd.to_datetime(end_date)
|
394
|
+
return int(np.busday_count(begin.date(), end.date(), holidays=list(self.holidays)))
|
395
|
+
|
396
|
+
def calc_business_days_from_df(
|
397
|
+
self,
|
398
|
+
df: dd.DataFrame,
|
399
|
+
begin_date_col: str,
|
400
|
+
end_date_col: str,
|
401
|
+
result_col: str = "business_days",
|
402
|
+
) -> dd.DataFrame:
|
403
|
+
"""Calculates business days between two columns in a Dask DataFrame."""
|
404
|
+
missing = {begin_date_col, end_date_col} - set(df.columns)
|
405
|
+
if missing:
|
406
|
+
self.logger.error(f"Missing columns: {missing}")
|
407
|
+
raise ValueError("Required columns are missing from DataFrame")
|
408
|
+
|
409
|
+
return df.assign(
|
410
|
+
**{result_col: df.map_partitions(
|
411
|
+
_vectorized_busday_count,
|
412
|
+
begin_col=begin_date_col,
|
413
|
+
end_col=end_date_col,
|
414
|
+
holidays=list(self.holidays),
|
415
|
+
meta=(result_col, 'f8') # f8 is float64
|
416
|
+
)}
|
417
|
+
)
|
418
|
+
|
419
|
+
def add_business_days(
|
420
|
+
self,
|
421
|
+
start_date: str | datetime.date | pd.Timestamp,
|
422
|
+
n_days: int,
|
423
|
+
) -> np.datetime64:
|
424
|
+
"""Scalar method to add N business days to a start date."""
|
425
|
+
start = pd.to_datetime(start_date)
|
411
426
|
return np.busday_offset(
|
412
|
-
|
427
|
+
start.date(),
|
413
428
|
n_days,
|
414
|
-
roll=
|
415
|
-
|
429
|
+
roll='forward',
|
430
|
+
holidays=list(self.holidays),
|
416
431
|
)
|
417
432
|
|
418
|
-
def calc_sla_end_date(
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
)
|
440
|
-
|
441
|
-
# Define a wrapper for partition-wise operation
|
442
|
-
def apply_sla_end_date(partition, holidays, weekmask):
|
443
|
-
return partition.apply(
|
444
|
-
calculate_sla_end_date, axis=1, holidays=holidays, weekmask=weekmask
|
445
|
-
)
|
446
|
-
|
447
|
-
# Apply the function using map_partitions
|
448
|
-
df[result_col] = df.map_partitions(
|
449
|
-
apply_sla_end_date,
|
450
|
-
holidays,
|
451
|
-
weekmask,
|
452
|
-
meta=(result_col, "object"),
|
433
|
+
def calc_sla_end_date(
|
434
|
+
self,
|
435
|
+
df: dd.DataFrame,
|
436
|
+
start_date_col: str,
|
437
|
+
n_days_col: str,
|
438
|
+
result_col: str = "sla_end_date",
|
439
|
+
) -> dd.DataFrame:
|
440
|
+
"""Calculates an SLA end date column for a Dask DataFrame."""
|
441
|
+
missing = {start_date_col, n_days_col} - set(df.columns)
|
442
|
+
if missing:
|
443
|
+
self.logger.error(f"Missing columns: {missing}")
|
444
|
+
raise ValueError("Required columns are missing from DataFrame")
|
445
|
+
|
446
|
+
return df.assign(
|
447
|
+
**{result_col: df.map_partitions(
|
448
|
+
_vectorized_sla_end_date,
|
449
|
+
start_col=start_date_col,
|
450
|
+
n_days_col=n_days_col,
|
451
|
+
holidays=list(self.holidays),
|
452
|
+
meta=(result_col, 'datetime64[ns]')
|
453
|
+
)}
|
453
454
|
)
|
454
455
|
|
455
|
-
return df
|
456
456
|
# Class enhancements
|
457
457
|
# DateUtils.register_period('next_week', lambda: (datetime.date.today() + datetime.timedelta(days=7),
|
458
458
|
# datetime.date.today() + datetime.timedelta(days=13)))
|
{sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/backends/sqlmodel/_io_dask.py
RENAMED
@@ -1,7 +1,8 @@
|
|
1
1
|
import itertools
|
2
2
|
import dask.dataframe as dd
|
3
3
|
import pandas as pd
|
4
|
-
|
4
|
+
|
5
|
+
#from sqlmodel import create_engine, Session, select
|
5
6
|
from sibi_dst.v2.df_helper.core import FilterHandler
|
6
7
|
from sibi_dst.v2.utils import Logger
|
7
8
|
|
@@ -116,7 +117,7 @@ class SQLModelDask:
|
|
116
117
|
return dask_df
|
117
118
|
|
118
119
|
except Exception as e:
|
119
|
-
self.logger.error(f"Error executing query: {str(e)}")
|
120
|
-
self.logger.error(self.query)
|
120
|
+
self.logger.error(f"_io_dask:Error executing query: {str(e)}")
|
121
|
+
self.logger.error(f"_io_dask:{self.query})
|
121
122
|
# In case of error, return an empty Dask DataFrame with the expected columns.
|
122
123
|
return dd.from_pandas(pd.DataFrame(columns=ordered_columns), npartitions=1)
|
File without changes
|
File without changes
|
File without changes
|
{sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/_artifact_updater_multi_wrapper.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/backends/parquet/_filter_handler.py
RENAMED
File without changes
|
{sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/backends/parquet/_parquet_options.py
RENAMED
File without changes
|
{sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/backends/sqlalchemy/__init__.py
RENAMED
File without changes
|
{sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py
RENAMED
File without changes
|
{sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py
RENAMED
File without changes
|
File without changes
|
{sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py
RENAMED
File without changes
|
{sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py
RENAMED
File without changes
|
File without changes
|
{sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/backends/sqlmodel/__init__.py
RENAMED
File without changes
|
{sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py
RENAMED
File without changes
|
{sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py
RENAMED
File without changes
|
{sibi_dst-2025.1.10 → sibi_dst-2025.1.12}/sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|