sibi-dst 2025.8.6__py3-none-any.whl → 2025.8.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,6 +29,7 @@ class SqlAlchemyConnectionConfig(BaseModel):
29
29
  connection_url: str
30
30
  table: Optional[str] = None
31
31
  debug: bool = False
32
+ logger_extra: Optional[Dict[str, Any]] = {"sibi_dst_component": __name__}
32
33
 
33
34
  # --- Pool Configuration ---
34
35
  pool_size: int = int(os.environ.get("DB_POOL_SIZE", 5))
@@ -99,10 +100,10 @@ class SqlAlchemyConnectionConfig(BaseModel):
99
100
  self.engine = wrapper["engine"]
100
101
  wrapper["ref_count"] += 1
101
102
  if self.debug:
102
- self.logger.debug(f"Reusing engine. Ref count: {wrapper['ref_count']}.")
103
+ self.logger.debug(f"Reusing engine. Ref count: {wrapper['ref_count']}.", extra=self.logger_extra)
103
104
  else:
104
105
  if self.debug:
105
- self.logger.debug(f"Creating new engine for key: {self._engine_key_instance}")
106
+ self.logger.debug(f"Creating new engine for key: {self._engine_key_instance}", extra=self.logger_extra)
106
107
  try:
107
108
  new_engine = create_engine(
108
109
  self.connection_url,
@@ -121,7 +122,7 @@ class SqlAlchemyConnectionConfig(BaseModel):
121
122
  "active_connections": 0,
122
123
  }
123
124
  except Exception as e:
124
- self.logger.error(f"Failed to create engine: {e}")
125
+ self.logger.error(f"Failed to create engine: {e}", extra=self.logger_extra)
125
126
  raise SQLAlchemyError(f"Engine creation failed: {e}") from e
126
127
 
127
128
  def close(self) -> None:
@@ -134,14 +135,14 @@ class SqlAlchemyConnectionConfig(BaseModel):
134
135
  key = self._engine_key_instance
135
136
  wrapper = _ENGINE_REGISTRY.get(key)
136
137
  if not wrapper:
137
- self.logger.warning("Attempted to close a config whose engine is not in the registry.")
138
+ self.logger.warning("Attempted to close a config whose engine is not in the registry.", extra=self.logger_extra)
138
139
  else:
139
140
  wrapper["ref_count"] -= 1
140
141
  if self.debug:
141
- self.logger.debug(f"Closing connection. Ref count now {wrapper['ref_count']}.")
142
+ self.logger.debug(f"Closing connection. Ref count now {wrapper['ref_count']}.", extra=self.logger_extra)
142
143
  if wrapper["ref_count"] <= 0:
143
144
  if self.debug:
144
- self.logger.debug(f"Disposing engine as reference count is zero. Key: {key}")
145
+ self.logger.debug(f"Disposing engine as reference count is zero. Key: {key}", extra=self.logger_extra)
145
146
  try:
146
147
  wrapper["engine"].dispose()
147
148
  finally:
@@ -177,9 +178,9 @@ class SqlAlchemyConnectionConfig(BaseModel):
177
178
  with self.managed_connection() as conn:
178
179
  conn.execute(text("SELECT 1"))
179
180
  if self.debug:
180
- self.logger.debug("Database connection validated successfully.")
181
+ self.logger.debug("Database connection validated successfully.", extra=self.logger_extra)
181
182
  except OperationalError as e:
182
- self.logger.error(f"Database connection failed: {e}")
183
+ self.logger.error(f"Database connection failed: {e}", extra=self.logger_extra)
183
184
  raise ValueError(f"DB connection failed: {e}") from e
184
185
 
185
186
  @contextmanager
@@ -204,8 +205,8 @@ class SqlAlchemyConnectionConfig(BaseModel):
204
205
  builder = SqlAlchemyModelBuilder(self.engine, self.table)
205
206
  self.model = builder.build_model()
206
207
  if self.debug:
207
- self.logger.debug(f"Successfully built ORM model for table: {self.table}")
208
+ self.logger.debug(f"Successfully built ORM model for table: {self.table}", extra=self.logger_extra)
208
209
  except Exception as e:
209
- self.logger.error(f"Failed to build ORM model for table '{self.table}': {e}")
210
+ self.logger.error(f"Failed to build ORM model for table '{self.table}': {e}", extra=self.logger_extra)
210
211
  raise ValueError(f"Model construction failed for table '{self.table}': {e}") from e
211
212
 
@@ -38,6 +38,7 @@ class SQLAlchemyDask(ManagedResource):
38
38
  "TIME": "object",
39
39
  "UUID": "object",
40
40
  }
41
+ logger_extra: Dict[str, Any] = {"sibi_dst_component": __name__}
41
42
 
42
43
  def __init__(
43
44
  self,
@@ -97,7 +98,7 @@ class SQLAlchemyDask(ManagedResource):
97
98
  max_overflow = _to_int(max_overflow_attr, 10)
98
99
 
99
100
  cap = max(1, pool_size + max_overflow - 1)
100
- self.logger.debug(f"Using a Cap of {cap} from pool size of {pool_size} and max overflow of {max_overflow}.")
101
+ self.logger.debug(f"Using a Cap of {cap} from pool size of {pool_size} and max overflow of {max_overflow}.", extra=self.logger_extra)
101
102
  return max(1, cap)
102
103
 
103
104
  # ---------- meta ----------
@@ -140,25 +141,25 @@ class SQLAlchemyDask(ManagedResource):
140
141
  break
141
142
  except SASQLTimeoutError:
142
143
  if attempt < retry_attempts - 1:
143
- self.logger.warning(f"Connection pool limit reached. Retrying in {backoff} seconds...")
144
+ self.logger.warning(f"Connection pool limit reached. Retrying in {backoff} seconds...", extra=self.logger_extra)
144
145
  time.sleep(backoff)
145
146
  backoff *= 2
146
147
  else:
147
148
  self.total_records = -1
148
- self.logger.error("Failed to get a connection from the pool after retries.", exc_info=True)
149
+ self.logger.error("Failed to get a connection from the pool after retries.", exc_info=True, extra=self.logger_extra)
149
150
  return self.total_records, dd.from_pandas(meta_df, npartitions=1)
150
151
  except OperationalError as oe:
151
152
  if "timeout" in str(oe).lower() and attempt < retry_attempts - 1:
152
- self.logger.warning("Operational timeout, retrying…", exc_info=self.debug)
153
+ self.logger.warning("Operational timeout, retrying…", exc_info=self.debug, extra=self.logger_extra)
153
154
  time.sleep(backoff)
154
155
  backoff *= 2
155
156
  continue
156
157
  self.total_records = -1
157
- self.logger.error("OperationalError during count.", exc_info=True)
158
+ self.logger.error("OperationalError during count.", exc_info=True, extra=self.logger_extra)
158
159
  return self.total_records, dd.from_pandas(meta_df, npartitions=1)
159
160
  except Exception as e:
160
161
  self.total_records = -1
161
- self.logger.error(f"Unexpected error during count: {e}", exc_info=True)
162
+ self.logger.error(f"Unexpected error during count: {e}", exc_info=True, extra=self.logger_extra)
162
163
  return self.total_records, dd.from_pandas(meta_df, npartitions=1)
163
164
 
164
165
  self.total_records = int(total)
@@ -167,7 +168,7 @@ class SQLAlchemyDask(ManagedResource):
167
168
  super().close()
168
169
  return self.total_records, dd.from_pandas(meta_df, npartitions=1)
169
170
 
170
- self.logger.debug(f"Total records to fetch: {total}. Chunk size: {self.chunk_size}.")
171
+ self.logger.debug(f"Total records to fetch: {total}. Chunk size: {self.chunk_size}.", extra=self.logger_extra)
171
172
 
172
173
  @dask.delayed
173
174
  def get_chunk(sql_query, chunk_offset):
@@ -181,6 +182,6 @@ class SQLAlchemyDask(ManagedResource):
181
182
  offsets = range(0, total, self.chunk_size)
182
183
  delayed_chunks = [get_chunk(query, off) for off in offsets]
183
184
  ddf = dd.from_delayed(delayed_chunks, meta=meta_df)
184
- self.logger.debug(f"Created Dask DataFrame with {ddf.npartitions} partitions.")
185
+ self.logger.debug(f"{self.model.__name__} created Dask DataFrame with {ddf.npartitions} partitions.", extra=self.logger_extra)
185
186
  return self.total_records, ddf
186
187
 
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Tuple
3
+ from typing import Any, Tuple, Dict
4
4
 
5
5
  import dask.dataframe as dd
6
6
  import pandas as pd
@@ -15,6 +15,7 @@ class SqlAlchemyLoadFromDb(ManagedResource):
15
15
  """
16
16
  Orchestrates loading data from a database using SQLAlchemy into a Dask DataFrame.
17
17
  """
18
+ logger_extra: Dict[str, Any] = {"sibi_dst_component": __name__}
18
19
 
19
20
  def __init__(
20
21
  self,
@@ -43,86 +44,13 @@ class SqlAlchemyLoadFromDb(ManagedResource):
43
44
  verbose=self.verbose,
44
45
  debug=self.debug,
45
46
  ) as loader:
46
- self.logger.debug(f"SQLAlchemyDask loader initialized for model: {self.model.__name__}")
47
+ self.logger.debug(f"SQLAlchemyDask loader initialized for model: {self.model.__name__}", extra=self.logger_extra)
47
48
  self.total_records, dask_df = loader.read_frame()
48
49
  return self.total_records, dask_df
49
50
  except Exception as e:
50
51
  self.total_records = -1
51
- self.logger.error(f"{self.model.__name__} Failed to build and load data: {e}", exc_info=True)
52
+ self.logger.error(f"{self.model.__name__} Failed to build and load data: {e}", exc_info=True, extra=self.logger_extra)
52
53
  # empty df with correct columns
53
54
  columns = [c.name for c in self.model.__table__.columns]
54
55
  return self.total_records, dd.from_pandas(pd.DataFrame(columns=columns), npartitions=1)
55
56
 
56
- # from __future__ import annotations
57
- #
58
- # from typing import Any
59
- #
60
- # import dask.dataframe as dd
61
- # import pandas as pd
62
- #
63
- # from sibi_dst.utils import ManagedResource
64
- # from sibi_dst.df_helper.core import ParamsConfig, QueryConfig
65
- # from ._db_connection import SqlAlchemyConnectionConfig
66
- # from ._io_dask import SQLAlchemyDask
67
- #
68
- # class SqlAlchemyLoadFromDb(ManagedResource):
69
- # """
70
- # Orchestrates loading data from a database using SQLAlchemy into a Dask
71
- # DataFrame by configuring and delegating to the SQLAlchemyDask loader.
72
- # """
73
- #
74
- # def __init__(
75
- # self,
76
- # plugin_sqlalchemy: SqlAlchemyConnectionConfig,
77
- # plugin_query: QueryConfig = None,
78
- # plugin_params: ParamsConfig = None,
79
- # **kwargs,
80
- # ):
81
- # """
82
- # Initializes the loader with all necessary configurations.
83
- #
84
- # Args:
85
- # plugin_sqlalchemy: The database connection configuration object.
86
- # plugin_query: The query configuration object.
87
- # plugin_params: The parameters and filters configuration object.
88
- # logger: An optional logger instance.
89
- # **kwargs: Must contain 'index_column' for Dask partitioning.
90
- # """
91
- # super().__init__(**kwargs)
92
- # self.db_connection = plugin_sqlalchemy
93
- # self.model = self.db_connection.model
94
- # self.engine = self.db_connection.engine
95
- # self.query_config = plugin_query
96
- # self.params_config = plugin_params
97
- # self.chunk_size = kwargs.get("chunk_size", self.params_config.df_params.get("chunk_size", 1000))
98
- # self.total_records = -1 # Initialize total_records to -1 to indicate no records loaded yet
99
- #
100
- # def build_and_load(self) -> tuple[int | Any, Any] | dd.DataFrame:
101
- # """
102
- # Builds and loads a Dask DataFrame from a SQLAlchemy source.
103
- #
104
- # This method is stateless and returns the DataFrame directly.
105
- #
106
- # Returns:
107
- # A Dask DataFrame containing the queried data or an empty,
108
- # correctly structured DataFrame if the query fails or returns no results.
109
- # """
110
- # try:
111
- # # Instantiate and use the low-level Dask loader
112
- # with SQLAlchemyDask(model=self.model,filters=self.params_config.filters if self.params_config else {},
113
- # engine=self.engine,
114
- # chunk_size=self.chunk_size,
115
- # logger=self.logger,
116
- # verbose=self.verbose,
117
- # debug=self.debug) as sqlalchemy_dask_loader:
118
- # self.logger.debug(f"SQLAlchemyDask loader initialized for model: {self.model.__name__}")
119
- # # Create the lazy DataFrame and read a record count
120
- # # if total_records less than 0, it means an error occurred during the loading process
121
- # self.total_records, dask_df = sqlalchemy_dask_loader.read_frame()
122
- # return self.total_records, dask_df
123
- # except Exception as e:
124
- # self.total_records = -1
125
- # self.logger.error(f"{self.model.__name__} Failed to build and load data: {e}", exc_info=True)
126
- # # Return an empty dataframe with the correct schema on failure
127
- # columns = [c.name for c in self.model.__table__.columns]
128
- # return self.total_records, dd.from_pandas(pd.DataFrame(columns=columns), npartitions=1)
@@ -48,107 +48,3 @@ class SqlAlchemyModelBuilder:
48
48
  return f"{sane_name}_field"
49
49
  return sane_name
50
50
 
51
- # import re
52
- # import keyword
53
- # import threading
54
- # from sqlalchemy import MetaData, Engine
55
- # from sqlalchemy.orm import DeclarativeBase
56
- #
57
- #
58
- # class Base(DeclarativeBase):
59
- # """Shared declarative base for all ORM models."""
60
- # pass
61
- #
62
- #
63
- # apps_label = "datacubes.models"
64
- #
65
- #
66
- # class SqlAlchemyModelBuilder:
67
- # """
68
- # Builds a single SQLAlchemy ORM model from a specific database table.
69
- # This class is thread-safe and caches reflected table metadata to
70
- # improve performance across multiple instantiations.
71
- # """
72
- # _lock = threading.Lock()
73
- # _metadata_cache: dict[str, MetaData] = {}
74
- #
75
- # def __init__(self, engine: Engine, table_name: str):
76
- # """
77
- # Initializes the model builder for a specific table.
78
- #
79
- # Args:
80
- # engine: The SQLAlchemy engine connected to the database.
81
- # table_name: The name of the table to generate the model for.
82
- # """
83
- # self.engine = engine
84
- # self.table_name = table_name
85
- # self.class_name = self._normalize_class_name(self.table_name)
86
- #
87
- # engine_key = str(engine.url)
88
- #
89
- # # ✅ REFACTOR: Acquire lock to make cache access and creation atomic,
90
- # # preventing a race condition between multiple threads.
91
- # with self._lock:
92
- # if engine_key not in self._metadata_cache:
93
- # self._metadata_cache[engine_key] = MetaData()
94
- # self.metadata = self._metadata_cache[engine_key]
95
- #
96
- # def build_model(self) -> type:
97
- # """
98
- # Builds and returns a database model class for the specified table.
99
- # This process is atomic and thread-safe.
100
- #
101
- # Raises:
102
- # ValueError: If the specified table does not exist in the database.
103
- # Returns:
104
- # The dynamically created ORM model class.
105
- # """
106
- # with self._lock:
107
- # # NOTE: Using a private SQLAlchemy API. This is a performance
108
- # # optimization but may break in future versions of the library.
109
- # registered_model = Base.registry._class_registry.get(self.class_name)
110
- # if registered_model:
111
- # return registered_model
112
- #
113
- # # Check if the table's schema is in our metadata cache
114
- # table = self.metadata.tables.get(self.table_name)
115
- #
116
- # # If not cached, reflect it from the database
117
- # if table is None:
118
- # self.metadata.reflect(bind=self.engine, only=[self.table_name])
119
- # table = self.metadata.tables.get(self.table_name)
120
- #
121
- # if table is None:
122
- # raise ValueError(
123
- # f"Table '{self.table_name}' does not exist in the database."
124
- # )
125
- #
126
- # # Create the model class dynamically.
127
- # attrs = {
128
- # "__tablename__": table.name,
129
- # "__table__": table,
130
- # "__module__": apps_label,
131
- # }
132
- # model = type(self.class_name, (Base,), attrs)
133
- #
134
- # return model
135
- #
136
- # @staticmethod
137
- # def _normalize_class_name(table_name: str) -> str:
138
- # """Converts a snake_case table_name to a CamelCase class name."""
139
- # return "".join(word.capitalize() for word in table_name.split("_"))
140
- #
141
- # @staticmethod
142
- # def _normalize_column_name(column_name: str) -> str:
143
- # """
144
- # Sanitizes a column name to be a valid Python identifier.
145
- # (Kept for utility, though not used in the final model creation).
146
- # """
147
- # sane_name = re.sub(r"\W", "_", column_name)
148
- # sane_name = re.sub(r"^\d", r"_\g<0>", sane_name)
149
- #
150
- # if keyword.iskeyword(sane_name):
151
- # return f"{sane_name}_field"
152
- # return sane_name
153
- #
154
- #
@@ -0,0 +1,12 @@
1
+ import asyncio
2
+ import dask.dataframe as dd
3
+
4
+
5
+ def is_dask_dataframe(df):
6
+ """Check if the given object is a Dask DataFrame."""
7
+ return isinstance(df, dd.DataFrame)
8
+
9
+ async def to_thread(func, *args, **kwargs):
10
+ """Explicit helper to keep code clear where we hop off the event loop."""
11
+ return await asyncio.to_thread(func, *args, **kwargs)
12
+
@@ -0,0 +1,6 @@
1
+ from .base_data_artifact import BaseDataArtifact
2
+ from .base_data_cube import BaseDataCube
3
+
4
+ __all__ = ["BaseDataCube",
5
+ "BaseDataArtifact"
6
+ ]
@@ -0,0 +1,110 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from typing import Any, Dict, Mapping, Optional, Type, Union
5
+ from datetime import date, datetime
6
+
7
+ import pandas as pd
8
+ import dask.dataframe as dd
9
+ from sibi_dst.df_helper import ParquetArtifact
10
+
11
+
12
+ DateLike = Union[str, date, datetime, None]
13
+
14
+
15
+ def _validate_and_format_date(name: str, value: DateLike) -> Optional[str]:
16
+ """
17
+ Normalize date-like input into a canonical string '%Y-%m-%d'.
18
+
19
+ - None -> None
20
+ - str/date/datetime -> parse with pandas.to_datetime, take .date(), return '%Y-%m-%d'
21
+ - else -> TypeError
22
+ """
23
+ if value is None:
24
+ return None
25
+ if isinstance(value, (str, date, datetime)):
26
+ try:
27
+ return pd.to_datetime(value).date().strftime("%Y-%m-%d")
28
+ except Exception as e:
29
+ raise ValueError(f"{name} must be a valid date, got {value!r}") from e
30
+ raise TypeError(f"{name} must be str, date, datetime, or None; got {type(value)}")
31
+
32
+
33
+ class BaseDataArtifact(ParquetArtifact):
34
+ """
35
+ Base class for Parquet artifacts with optional date window.
36
+
37
+ Dates are always stored as strings in '%Y-%m-%d' format.
38
+ """
39
+
40
+ config: Mapping[str, Any] = {}
41
+
42
+ parquet_start_date: Optional[str]
43
+ parquet_end_date: Optional[str]
44
+ data_wrapper_class: Optional[Type[Any]]
45
+ class_params: Dict[str, Any]
46
+ df: Union[pd.DataFrame | dd.DataFrame] = None
47
+
48
+ def __init__(
49
+ self,
50
+ **kwargs: Any,
51
+ ) -> None:
52
+ merged = {**self.config, **kwargs}
53
+ super().__init__(**merged)
54
+
55
+ # Normalize and store as canonical strings
56
+ self.parquet_start_date = _validate_and_format_date("parquet_start_date", merged.get("parquet_start_date", None))
57
+ self.parquet_end_date = _validate_and_format_date("parquet_end_date", merged.get("parquet_end_date", None))
58
+
59
+ self.data_wrapper_class = merged.get("data_wrapper_class", None)
60
+ self.class_params = merged.get("class_params", None) or {
61
+ "debug": self.debug,
62
+ "logger": self.logger,
63
+ "fs": self.fs,
64
+ "verbose": getattr(self, "verbose", False),
65
+ }
66
+
67
+ # Ordering check
68
+ if self.parquet_start_date and self.parquet_end_date:
69
+ if self.parquet_start_date > self.parquet_end_date:
70
+ raise ValueError(
71
+ f"parquet_start_date {self.parquet_start_date} "
72
+ f"cannot be after parquet_end_date {self.parquet_end_date}"
73
+ )
74
+
75
+ # -------- Optional hooks --------
76
+
77
+ def before_load(self, **kwargs: Any) -> None: return None
78
+ def after_load(self, **kwargs: Any) -> None: return None
79
+ async def abefore_load(self, **kwargs: Any) -> None: return None
80
+ async def aafter_load(self, **kwargs: Any) -> None: return None
81
+
82
+ # -------- Public API --------
83
+
84
+ def load(self, **kwargs: Any):
85
+ self.before_load(**kwargs)
86
+ self.df = super().load(**kwargs)
87
+ self.after_load(**kwargs)
88
+ return self.df
89
+
90
+ async def aload(self, **kwargs: Any):
91
+ await self.abefore_load(**kwargs)
92
+ df = await asyncio.to_thread(super().load, **kwargs)
93
+ self.df = df
94
+ await self.aafter_load(**kwargs)
95
+ return self.df
96
+
97
+ def has_date_window(self) -> bool:
98
+ return bool(self.parquet_start_date or self.parquet_end_date)
99
+
100
+ def date_window(self) -> tuple[Optional[str], Optional[str]]:
101
+ return self.parquet_start_date, self.parquet_end_date
102
+
103
+ def to_params(self) -> Dict[str, Any]:
104
+ return {
105
+ "parquet_start_date": self.parquet_start_date,
106
+ "parquet_end_date": self.parquet_end_date,
107
+ "data_wrapper_class": self.data_wrapper_class,
108
+ "class_params": dict(self.class_params),
109
+ }
110
+
@@ -0,0 +1,79 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Union
4
+ import dask.dataframe as dd
5
+ import pandas as pd
6
+
7
+ from sibi_dst.df_helper import DfHelper
8
+
9
+
10
+ class BaseDataCube(DfHelper):
11
+ """
12
+ Base cube with sync/async load hooks.
13
+
14
+ Subclasses *may* override:
15
+ - fix_data(self, **kwargs): synchronous, local transforms
16
+ - async afix_data(self, **kwargs): asynchronous transforms (I/O, awaits)
17
+
18
+ Semantics:
19
+ - load() -> runs fix_data() if defined
20
+ - aload() -> runs afix_data() if subclass overrides it, else fix_data()
21
+ """
22
+ df: Union[dd.DataFrame, pd.DataFrame, None] = None
23
+ config: dict = {}
24
+
25
+ def __init__(self, **kwargs):
26
+ # kwargs override class config
27
+ kwargs = {**self.config, **kwargs}
28
+ super().__init__(**kwargs)
29
+
30
+ # -------------------- optional hooks --------------------
31
+
32
+ def fix_data(self, **kwargs) -> None:
33
+ """Optional sync transform hook. Override in subclasses if needed."""
34
+ return None
35
+
36
+ async def afix_data(self, **kwargs) -> None:
37
+ """Optional async transform hook. Override in subclasses if needed."""
38
+ return None
39
+
40
+ # -------------------- internals --------------------
41
+
42
+ def _has_data(self) -> bool:
43
+ """Check if dataframe has rows; avoids hidden heavy ops where possible."""
44
+ if self.df is None:
45
+ return False
46
+ if isinstance(self.df, dd.DataFrame):
47
+ return bool(self.df.shape[0].compute() > 0)
48
+ return not self.df.empty
49
+
50
+ def _afix_data_is_overridden(self) -> bool:
51
+ """Check if subclass provided its own afix_data."""
52
+ return self.__class__.afix_data is not BaseDataCube.afix_data
53
+
54
+ def _fix_data_is_overridden(self) -> bool:
55
+ """Check if subclass provided its own fix_data."""
56
+ return self.__class__.fix_data is not BaseDataCube.fix_data
57
+
58
+ # -------------------- public API --------------------
59
+
60
+ def load(self, **kwargs):
61
+ """Sync load path with optional fix_data hook."""
62
+ self.df = super().load(**kwargs)
63
+ if self._has_data() and self._fix_data_is_overridden():
64
+ self.fix_data()
65
+ elif not self._has_data():
66
+ self.logger.debug(f"No data was found by {self.__class__.__name__} loader")
67
+ return self.df
68
+
69
+ async def aload(self, **kwargs):
70
+ """Async load path with optional afix_data/fix_data hook."""
71
+ self.df = await super().aload(**kwargs)
72
+ if self._has_data():
73
+ if self._afix_data_is_overridden():
74
+ await self.afix_data()
75
+ elif self._fix_data_is_overridden():
76
+ self.fix_data()
77
+ else:
78
+ self.logger.debug(f"No data was found by {self.__class__.__name__} loader")
79
+ return self.df