sibi-dst 2025.1.3__py3-none-any.whl → 2025.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,91 +0,0 @@
1
- from typing import Union, Optional
2
- import pandas as pd
3
- import dask.dataframe as dd
4
-
5
- # Refactored DfHelper class
6
- class DfHelper:
7
- """
8
- DfHelper is a utility class that orchestrates loading and processing data.
9
- It uses a configured BackendStrategy to handle the specifics of data loading.
10
- """
11
- df: Union[dd.DataFrame, pd.DataFrame] = None
12
-
13
- def __init__(self, backend_strategy: BackendStrategy,
14
- params_config: ParamsConfig,
15
- as_pandas: bool = False,
16
- debug: bool = False,
17
- logger: Optional[Logger] = None):
18
-
19
- self.backend_strategy = backend_strategy
20
- self._backend_params = params_config # Needed for post-processing and field mapping
21
- self.as_pandas = as_pandas
22
- self.debug = debug
23
- self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
24
- self.logger.set_level(Logger.DEBUG if self.debug else Logger.INFO)
25
-
26
- # Other attributes like parquet saving paths can be passed in here if needed
27
- # self.parquet_storage_path = kwargs.get("parquet_storage_path")
28
-
29
- def __enter__(self):
30
- return self
31
-
32
- def __exit__(self, exc_type, exc_val, traceback):
33
- # Cleanup logic for resources that DfHelper itself might manage
34
- # The connection cleanup is now the responsibility of the caller who creates the strategy
35
- if hasattr(self.backend_strategy, 'connection') and hasattr(self.backend_strategy.connection, 'close'):
36
- self.backend_strategy.connection.close()
37
- return False
38
-
39
- def load(self, **options):
40
- """
41
- Loads data using the configured backend strategy, applies transformations,
42
- and returns a DataFrame.
43
- """
44
- try:
45
- self.logger.debug(f"Loading data using {self.backend_strategy.__class__.__name__}...")
46
- # 1. Delegate loading to the strategy object
47
- self.df = self.backend_strategy.load(**options)
48
-
49
- # 2. Perform post-processing (these methods remain in DfHelper)
50
- self.__process_loaded_data()
51
- self.__post_process_df()
52
- self.logger.debug("Data successfully loaded and processed.")
53
-
54
- except Exception as e:
55
- self.logger.error(f"Failed to load data using {self.backend_strategy.__class__.__name__}: {e}")
56
- self.df = dd.from_pandas(pd.DataFrame(), npartitions=1)
57
-
58
- if self.as_pandas and isinstance(self.df, dd.DataFrame):
59
- return self.df.compute()
60
- return self.df
61
-
62
- def load_period(self, start: str, end: str, dt_field: str, **kwargs):
63
- """
64
- Loads data for a specific period by delegating filter creation to the strategy.
65
- """
66
- if dt_field is None:
67
- raise ValueError("dt_field must be provided")
68
-
69
- # Parse and validate dates
70
- start_dt = self.parse_date(start)
71
- end_dt = self.parse_date(end)
72
- if start_dt > end_dt:
73
- raise ValueError("The 'start' date cannot be later than the 'end' date.")
74
-
75
- # Delegate the creation of filter logic to the current strategy
76
- field_map = getattr(self._backend_params, 'field_map', {}) or {}
77
- period_filters = self.backend_strategy.build_period_filter(
78
- dt_field, start_dt, end_dt, field_map
79
- )
80
-
81
- # Combine with other filters and load
82
- all_filters = {**kwargs, **period_filters}
83
- self.logger.debug(f"Loading period with combined filters: {all_filters}")
84
- return self.load(**all_filters)
85
-
86
- # The methods __process_loaded_data, __post_process_df, save_to_parquet,
87
- # save_to_clickhouse, and parse_date remain unchanged as they are
88
- # part of the orchestration logic, not the loading strategy itself.
89
-
90
- # ... (paste those methods here without modification) ...
91
- # ... __process_loaded_data, __post_process_df, save_to_parquet, etc. ...