sibi-dst 0.3.63__py3-none-any.whl → 2025.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. sibi_dst/df_helper/_df_helper.py +186 -591
  2. sibi_dst/df_helper/backends/sqlalchemy/__init__.py +0 -2
  3. sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +161 -115
  4. sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +291 -97
  5. sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +34 -105
  6. sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +175 -162
  7. sibi_dst/df_helper/core/__init__.py +0 -4
  8. sibi_dst/df_helper/core/_defaults.py +1 -50
  9. sibi_dst/df_helper/core/_query_config.py +2 -2
  10. sibi_dst/utils/__init__.py +0 -2
  11. sibi_dst/utils/data_wrapper.py +9 -12
  12. sibi_dst/utils/log_utils.py +15 -11
  13. sibi_dst/utils/update_planner.py +2 -0
  14. sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +325 -50
  15. sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +2 -2
  16. sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +330 -51
  17. sibi_dst/v3/__init__.py +0 -0
  18. sibi_dst/v3/backends/__init__.py +0 -0
  19. sibi_dst/v3/df_helper/__init__.py +0 -0
  20. sibi_dst/v3/df_helper/_df_helper.py +91 -0
  21. sibi_dst-2025.1.1.dist-info/METADATA +55 -0
  22. {sibi_dst-0.3.63.dist-info → sibi_dst-2025.1.1.dist-info}/RECORD +23 -26
  23. sibi_dst/df_helper/backends/django/__init__.py +0 -11
  24. sibi_dst/df_helper/backends/django/_db_connection.py +0 -88
  25. sibi_dst/df_helper/backends/django/_io_dask.py +0 -450
  26. sibi_dst/df_helper/backends/django/_load_from_db.py +0 -227
  27. sibi_dst/df_helper/backends/django/_sql_model_builder.py +0 -493
  28. sibi_dst/df_helper/backends/sqlalchemy/_filter_handler.py +0 -119
  29. sibi_dst/utils/airflow_manager.py +0 -212
  30. sibi_dst-0.3.63.dist-info/METADATA +0 -90
  31. {sibi_dst-0.3.63.dist-info → sibi_dst-2025.1.1.dist-info}/WHEEL +0 -0
@@ -1,193 +1,206 @@
1
1
  import re
2
+ import keyword
3
+ import threading
4
+ from sqlalchemy import MetaData, Engine
5
+ from sqlalchemy.orm import DeclarativeBase
2
6
 
3
- from sqlalchemy import MetaData, Table
4
- from sqlalchemy.orm import declarative_base, relationship
5
7
 
6
- # Base class for dynamically created models
7
- Base = declarative_base()
8
+ class Base(DeclarativeBase):
9
+ """Shared declarative base for all ORM models."""
10
+ pass
8
11
 
9
- apps_label = "datacubes"
12
+
13
+ apps_label = "datacubes.models"
10
14
 
11
15
 
12
16
  class SqlAlchemyModelBuilder:
13
17
  """
14
- Provides functionality for building SQLAlchemy ORM models dynamically from
15
- reflected database tables. This class is intended for use with a SQLAlchemy
16
- engine and metadata to automatically generate ORM models for specified
17
- database tables.
18
-
19
- The primary purpose of this class is to simplify the process of creating
20
- SQLAlchemy ORM models by reflecting tables from a connected database,
21
- dynamically generating model classes, and handling relationships between
22
- tables.
23
-
24
- :ivar engine: SQLAlchemy engine connected to the database.
25
- :type engine: Engine
26
- :ivar table_name: Name of the table for which the model is generated.
27
- :type table_name: str
28
- :ivar metadata: SQLAlchemy MetaData instance for reflecting tables.
29
- :type metadata: MetaData
30
- :ivar table: Reflected SQLAlchemy Table object for the specified table name.
31
- :type table: Optional[Table]
32
- :ivar class_name: Dynamically normalized class name derived from table_name.
33
- :type class_name: str
18
+ Builds a single SQLAlchemy ORM model from a specific database table.
19
+ This class is thread-safe and caches reflected table metadata to
20
+ improve performance across multiple instantiations.
34
21
  """
35
- _model_cache = {} # Local cache for model classes
22
+ _lock = threading.Lock()
23
+ _metadata_cache: dict[str, MetaData] = {}
36
24
 
37
- def __init__(self, engine, table_name):
25
+ def __init__(self, engine: Engine, table_name: str):
38
26
  """
39
- Initialize the model builder with a database engine and specific table.
27
+ Initializes the model builder for a specific table.
40
28
 
41
29
  Args:
42
- engine: SQLAlchemy engine connected to the database.
43
- table_name (str): Name of the table to generate the model for.
30
+ engine: The SQLAlchemy engine connected to the database.
31
+ table_name: The name of the table to generate the model for.
44
32
  """
45
33
  self.engine = engine
46
34
  self.table_name = table_name
47
- self.metadata = MetaData()
48
- self.table = None # Placeholder for the specific table
49
- self.class_name = self.normalize_class_name(self.table_name)
35
+ self.class_name = self._normalize_class_name(self.table_name)
50
36
 
51
- def build_model(self) -> type:
52
- """
53
- Builds and returns a database model class corresponding to the specified table name.
54
- The method checks if the model is already registered in the ORM's registry. If not,
55
- it reflects the database schema of the specified table and dynamically creates the
56
- model class.
57
-
58
- :raises ValueError: If the specified table does not exist in the database.
59
- :return: A database model class corresponding to the specified table name.
60
- :rtype: type
61
- """
62
- # Check if the model is already registered
63
- model = Base.registry._class_registry.get(self.class_name)
64
- if model:
65
- return model
37
+ engine_key = str(engine.url)
66
38
 
67
- self.metadata.reflect(only=[self.table_name], bind=self.engine)
68
- self.table = self.metadata.tables.get(self.table_name)
69
- if self.table is None:
70
- raise ValueError(f"Table '{self.table_name}' does not exist in the database.")
39
+ # ✅ REFACTOR: Acquire lock to make cache access and creation atomic,
40
+ # preventing a race condition between multiple threads.
41
+ with self._lock:
42
+ if engine_key not in self._metadata_cache:
43
+ self._metadata_cache[engine_key] = MetaData()
44
+ self.metadata = self._metadata_cache[engine_key]
71
45
 
72
- model = self.create_model()
73
- return model
74
-
75
- def create_model(self) -> type:
46
+ def build_model(self) -> type:
76
47
  """
77
- Generates a SQLAlchemy model class dynamically based on the specified table and
78
- its columns. The method extracts column information, defines the necessary
79
- attributes, and creates the model class if it doesn't already exist in the
80
- SQLAlchemy base registry.
48
+ Builds and returns a database model class for the specified table.
49
+ This process is atomic and thread-safe.
81
50
 
82
- :raises KeyError: If the table or table name does not exist in the provided
83
- schema.
84
- :raises Exception: If the model creation fails for any reason.
85
-
86
- :return: The dynamically created or fetched model class.
87
- :rtype: type
51
+ Raises:
52
+ ValueError: If the specified table does not exist in the database.
53
+ Returns:
54
+ The dynamically created ORM model class.
88
55
  """
89
- # Normalize the class name from the table name
90
- columns = self.get_columns(self.table)
91
-
92
- # Define attributes for the model class
93
- attrs = {
94
- "__tablename__": self.table_name,
95
- "__table__": self.table,
96
- "__module__": f"{apps_label}.models",
97
- "__mapper_args__": {"eager_defaults": True},
98
- }
99
-
100
- # Add columns and relationships to the model
101
- attrs.update(columns)
102
- #self.add_relationships(attrs, self.table)
103
- model = Base.registry._class_registry.get(self.class_name)
104
- if not model:
56
+ with self._lock:
57
+ # REFACTOR: Add a comment acknowledging the risk of using an
58
+ # internal API. This is a maintenance warning for future developers.
59
+ # NOTE: Using a private SQLAlchemy API. This is a performance
60
+ # optimization but may break in future versions of the library.
61
+ registered_model = Base.registry._class_registry.get(self.class_name)
62
+ if registered_model:
63
+ return registered_model
64
+
65
+ # Check if the table's schema is in our metadata cache
66
+ table = self.metadata.tables.get(self.table_name)
67
+
68
+ # If not cached, reflect it from the database
69
+ if table is None:
70
+ self.metadata.reflect(bind=self.engine, only=[self.table_name])
71
+ table = self.metadata.tables.get(self.table_name)
72
+
73
+ if table is None:
74
+ raise ValueError(
75
+ f"Table '{self.table_name}' does not exist in the database."
76
+ )
77
+
78
+ # Create the model class dynamically.
79
+ attrs = {
80
+ "__tablename__": table.name,
81
+ "__table__": table,
82
+ "__module__": apps_label,
83
+ }
105
84
  model = type(self.class_name, (Base,), attrs)
106
- # Add the class to Base.registry so it is registered
107
- Base.registry._class_registry[self.class_name] = model
108
- return model
109
-
110
- def get_columns(self, table: Table):
111
- """
112
- Extracts and returns a dictionary of column names and their corresponding column
113
- objects from a given table, excluding reserved names. Reserved names are used
114
- internally and should not overlap with column names in the provided table. The
115
- method ensures sanitized column names through normalization and filters out any
116
- column matching reserved keywords.
117
-
118
- :param table: The table object from which columns are to be extracted.
119
- :type table: Table
120
- :return: A dictionary containing the sanitized column names as keys and their
121
- corresponding column objects as values, excluding reserved names.
122
- :rtype: dict
123
- """
124
- columns = {}
125
- reserved_names = ["metadata", "class_", "table"]
126
-
127
- for column in table.columns:
128
- column_name = self.normalize_column_name(column.name)
129
- if column_name not in reserved_names:
130
- columns[column_name] = column
131
- return columns
132
-
133
- def add_relationships(self, attrs, table: Table):
134
- """
135
- Adds relationships to the provided attributes dictionary for a given database table.
136
-
137
- This method iterates through the foreign keys of the provided table, constructs
138
- relationship attributes, and updates the attributes dictionary with relationships
139
- that connect the current table to related tables.
140
-
141
- :param attrs: Dictionary of attributes to which relationships will be added.
142
- The dictionary will be updated with new relationship mappings.
143
- :type attrs: dict
144
- :param table: A database table object containing foreign key relationships.
145
- The method will use this table to establish relationships.
146
- :return: None
147
- """
148
- for fk in table.foreign_keys:
149
- related_table_name = fk.column.table.name
150
- related_class_name = self.normalize_class_name(related_table_name)
151
- relationship_name = self.normalize_column_name(related_table_name)
152
- attrs[relationship_name] = relationship(related_class_name, back_populates=None)
153
85
 
86
+ return model
154
87
 
155
88
  @staticmethod
156
- def normalize_class_name(table_name: str) -> str:
157
- """
158
- Generate a normalized class name from a given table name by capitalizing
159
- each word separated by underscores and concatenating them.
160
-
161
- This static method takes a string representation of a table name, where
162
- words are separated by underscores, and converts it into a camel case
163
- class name. It processes the string by capitalizing the first letter of
164
- each word and removing the underscores. The normalized class name
165
- returned can be used programmatically for various purposes, such as
166
- class generation or naming conventions.
167
-
168
- :param table_name: The table name to normalize, with words separated by
169
- underscores. E.g., 'sample_table' becomes 'SampleTable'.
170
- :type table_name: str
171
- :return: A normalized class name in camel case format.
172
- :rtype: str
173
- """
89
+ def _normalize_class_name(table_name: str) -> str:
90
+ """Converts a snake_case table_name to a CamelCase class name."""
174
91
  return "".join(word.capitalize() for word in table_name.split("_"))
175
92
 
176
93
  @staticmethod
177
- def normalize_column_name(column_name: str) -> str:
94
+ def _normalize_column_name(column_name: str) -> str:
178
95
  """
179
- Normalize a column name by replacing any non-word characters or leading numbers
180
- with underscores, while ensuring it does not conflict with reserved keywords
181
- such as 'class', 'def', 'return', etc. If the normalized name conflicts with
182
- a Python reserved keyword, "_field" is appended to it.
183
-
184
- :param column_name: The original name of the column to be normalized.
185
- :type column_name: str
186
- :return: A normalized column name that is safe and compatible for usage
187
- in various contexts such as database columns or Python code.
188
- :rtype: str
96
+ Sanitizes a column name to be a valid Python identifier.
97
+ (Kept for utility, though not used in the final model creation).
189
98
  """
190
- column_name = re.sub(r"\W|^(?=\d)", "_", column_name)
191
- if column_name in {"class", "def", "return", "yield", "global"}:
192
- column_name += "_field"
193
- return column_name
99
+ sane_name = re.sub(r"\W", "_", column_name)
100
+ sane_name = re.sub(r"^\d", r"_\g<0>", sane_name)
101
+
102
+ if keyword.iskeyword(sane_name):
103
+ return f"{sane_name}_field"
104
+ return sane_name
105
+
106
+ # import re
107
+ # import keyword
108
+ # import threading
109
+ # from sqlalchemy import MetaData, Engine
110
+ # from sqlalchemy.orm import DeclarativeBase
111
+ #
112
+ #
113
+ #
114
+ # class Base(DeclarativeBase):
115
+ # """shared declarative base for all ORM models."""
116
+ # pass
117
+ #
118
+ #
119
+ # apps_label = "datacubes.models"
120
+ #
121
+ #
122
+ # class SqlAlchemyModelBuilder:
123
+ # """
124
+ # Builds a single SQLAlchemy ORM model from a specific database table.
125
+ # This class is thread-safe and caches reflected table metadata to
126
+ # improve performance across multiple instantiations.
127
+ # """
128
+ # _lock = threading.Lock()
129
+ # _metadata_cache: dict[str, MetaData] = {}
130
+ #
131
+ # def __init__(self, engine: Engine, table_name: str):
132
+ # """
133
+ # Initializes the model builder for a specific table.
134
+ #
135
+ # Args:
136
+ # engine: The SQLAlchemy engine connected to the database.
137
+ # table_name: The name of the table to generate the model for.
138
+ # """
139
+ # self.engine = engine
140
+ # self.table_name = table_name
141
+ # self.class_name = self._normalize_class_name(self.table_name)
142
+ #
143
+ # # Use or create a cached MetaData object for this engine to avoid
144
+ # # re-reading the schema for tables that are already known.
145
+ # engine_key = str(engine.url)
146
+ # if engine_key not in self._metadata_cache:
147
+ # self._metadata_cache[engine_key] = MetaData()
148
+ # self.metadata = self._metadata_cache[engine_key]
149
+ #
150
+ # def build_model(self) -> type:
151
+ # """
152
+ # Builds and returns a database model class for the specified table.
153
+ # This process is atomic and thread-safe.
154
+ #
155
+ # Raises:
156
+ # ValueError: If the specified table does not exist in the database.
157
+ # Returns:
158
+ # The dynamically created ORM model class.
159
+ # """
160
+ # with self._lock:
161
+ # # First, check if the model class is already registered in SQLAlchemy
162
+ # registered_model = Base.registry._class_registry.get(self.class_name)
163
+ # if registered_model:
164
+ # return registered_model
165
+ #
166
+ # # Next, check if the table's schema is in our metadata cache
167
+ # table = self.metadata.tables.get(self.table_name)
168
+ #
169
+ # # If not cached, reflect it from the database
170
+ # if table is None:
171
+ # self.metadata.reflect(bind=self.engine, only=[self.table_name])
172
+ # table = self.metadata.tables.get(self.table_name)
173
+ #
174
+ # if table is None:
175
+ # raise ValueError(
176
+ # f"Table '{self.table_name}' does not exist in the database."
177
+ # )
178
+ #
179
+ # # Create the model class dynamically.
180
+ # # No need to add columns manually; __table__ handles it.
181
+ # attrs = {
182
+ # "__tablename__": table.name,
183
+ # "__table__": table,
184
+ # "__module__": apps_label,
185
+ # }
186
+ # model = type(self.class_name, (Base,), attrs)
187
+ #
188
+ # return model
189
+ #
190
+ # @staticmethod
191
+ # def _normalize_class_name(table_name: str) -> str:
192
+ # """Converts a snake_case table_name to a CamelCase class name."""
193
+ # return "".join(word.capitalize() for word in table_name.split("_"))
194
+ #
195
+ # @staticmethod
196
+ # def _normalize_column_name(column_name: str) -> str:
197
+ # """
198
+ # Sanitizes a column name to be a valid Python identifier.
199
+ # (Kept for utility, though not used in the final model creation).
200
+ # """
201
+ # sane_name = re.sub(r"\W", "_", column_name)
202
+ # sane_name = re.sub(r"^\d", r"_\g<0>", sane_name)
203
+ #
204
+ # if keyword.iskeyword(sane_name):
205
+ # return f"{sane_name}_field"
206
+ # return sane_name
@@ -1,8 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from ._defaults import (
4
- django_field_conversion_map_pandas,
5
- django_field_conversion_map_dask,
6
4
  sqlalchemy_field_conversion_map_dask,
7
5
  normalize_sqlalchemy_type)
8
6
  from ._filter_handler import FilterHandler
@@ -12,8 +10,6 @@ from ._query_config import QueryConfig
12
10
  __all__ = [
13
11
  "ParamsConfig",
14
12
  "QueryConfig",
15
- "django_field_conversion_map_pandas",
16
- "django_field_conversion_map_dask",
17
13
  "sqlalchemy_field_conversion_map_dask",
18
14
  "normalize_sqlalchemy_type",
19
15
  "FilterHandler",
@@ -13,56 +13,7 @@ from sqlalchemy.dialects.mysql import TINYINT, MEDIUMTEXT
13
13
  # conversion_map is a dictionary that maps the field types to their corresponding data type conversion functions.
14
14
  # Each entry in the dictionary is a pair of a field type (as a string) and a callable function that performs the
15
15
  # conversion. This mapping is used to convert the values in a pandas DataFrame to the appropriate data types based on
16
- # the Django field type.
17
-
18
- django_field_conversion_map_pandas: Dict[str, callable] = {
19
- "CharField": lambda x: x.astype(str),
20
- "TextField": lambda x: x.astype(str),
21
- "IntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
22
- "AutoField": lambda x: pd.to_numeric(x, errors="coerce"),
23
- "BigAutoField": lambda x: pd.to_numeric(x, errors="coerce"),
24
- "BigIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
25
- "SmallIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
26
- "PositiveIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
27
- "PositiveSmallIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
28
- "FloatField": lambda x: pd.to_numeric(x, errors="coerce"),
29
- "DecimalField": lambda x: pd.to_numeric(x, errors="coerce"),
30
- "BooleanField": lambda x: x.astype(bool),
31
- "NullBooleanField": lambda x: x.astype(bool),
32
- "DateTimeField": lambda x: pd.to_datetime(x, errors="coerce"),
33
- "DateField": lambda x: pd.to_datetime(x, errors="coerce").dt.date,
34
- "TimeField": lambda x: pd.to_datetime(x, errors="coerce").dt.time,
35
- "DurationField": lambda x: pd.to_timedelta(x, errors="coerce"),
36
- # for JSONField, assuming JSON objects are represented as string in df
37
- "JSONField": lambda x: x.apply(json.loads),
38
- "ArrayField": lambda x: x.apply(eval),
39
- "UUIDField": lambda x: x.astype(str),
40
- }
41
-
42
- django_field_conversion_map_dask: Dict[str, callable] = {
43
- "CharField": lambda x: x.astype(str),
44
- "TextField": lambda x: x.astype(str),
45
- "IntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
46
- "AutoField": lambda x: pd.to_numeric(x, errors="coerce"),
47
- "BigAutoField": lambda x: pd.to_numeric(x, errors="coerce"),
48
- "BigIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
49
- "SmallIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
50
- "PositiveIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
51
- "PositiveSmallIntegerField": lambda x: pd.to_numeric(x, errors="coerce"),
52
- "FloatField": lambda x: pd.to_numeric(x, errors="coerce"),
53
- "DecimalField": lambda x: pd.to_numeric(x, errors="coerce"),
54
- "BooleanField": lambda x: x.astype(bool),
55
- "NullBooleanField": lambda x: x.astype(bool),
56
- "DateTimeField": lambda x: pd.to_datetime(x, errors="coerce"),
57
- "DateField": lambda x: pd.to_datetime(x, errors="coerce").map_partitions(lambda x: x.dt.date,
58
- meta=("date", "object")),
59
- "TimeField": lambda x: pd.to_datetime(x, errors="coerce").map_partitions(lambda x: x.dt.time,
60
- meta=("time", "object")),
61
- "DurationField": lambda x: pd.to_timedelta(x, errors="coerce"),
62
- "JSONField": lambda x: x.map_partitions(lambda s: s.apply(json.loads), meta=("json", "object")),
63
- "ArrayField": lambda x: x.map_partitions(lambda s: s.apply(eval), meta=("array", "object")),
64
- "UUIDField": lambda x: x.astype(str),
65
- }
16
+ # the db field type.
66
17
 
67
18
  sqlalchemy_field_conversion_map_dask: Dict[str, callable] = {
68
19
  String.__name__: lambda x: x.astype(str).fillna(""),
@@ -7,8 +7,8 @@ class QueryConfig(BaseModel):
7
7
  use_exclude: bool = False
8
8
  n_records: int = 100
9
9
  dt_field: Optional[str] = None
10
- use_dask: bool = False
11
- as_dask: bool = False
10
+ use_dask: bool = True
11
+ as_dask: bool = True
12
12
 
13
13
  @model_validator(mode='after')
14
14
  def check_n_records(self):
@@ -10,7 +10,6 @@ from .df_utils import DfUtils
10
10
  from .storage_manager import StorageManager
11
11
  from .parquet_saver import ParquetSaver
12
12
  from .clickhouse_writer import ClickHouseWriter
13
- from .airflow_manager import AirflowDAGManager
14
13
  from .credentials import *
15
14
  from .update_planner import UpdatePlanner
16
15
  from .data_wrapper import DataWrapper
@@ -35,7 +34,6 @@ __all__ = [
35
34
  "StorageManager",
36
35
  "DfUtils",
37
36
  "ClickHouseWriter",
38
- "AirflowDAGManager",
39
37
  "StorageConfig",
40
38
  "FsRegistry",
41
39
  "DataFromHttpSource",
@@ -38,7 +38,7 @@ class DataWrapper:
38
38
  logger: Logger = None,
39
39
  show_progress: bool = False,
40
40
  timeout: float = 30,
41
- max_threads: int = 1,
41
+ max_threads: int = 3,
42
42
  **kwargs: Any,
43
43
  ):
44
44
  self.dataclass = dataclass
@@ -66,6 +66,7 @@ class DataWrapper:
66
66
  self.benchmarks: Dict[datetime.date, Dict[str, float]] = {}
67
67
  self.mmanifest = kwargs.get("mmanifest", None)
68
68
  self.update_planner=kwargs.get("update_planner", None)
69
+ self.datacls = self.dataclass(**self.class_params)
69
70
 
70
71
  def __enter__(self):
71
72
  """Context manager entry"""
@@ -164,28 +165,24 @@ class DataWrapper:
164
165
  def _process_single_date(self, date: datetime.date):
165
166
  """Core date processing logic with load/save timing and thread reporting"""
166
167
  path = f"{self.data_path}{date.year}/{date.month:02d}/{date.day:02d}/"
167
- self.logger.info(f"Processing date {date.isoformat()} for {path}")
168
- # self.logger.info(f"Path {path} in {self.skipped}: {path in self.skipped}")
168
+ self.logger.debug(f"Processing date {date.isoformat()} for {path}")
169
169
  if path in self.update_planner.skipped and self.update_planner.ignore_missing:
170
170
  self.logger.info(f"Skipping {date} as it exists in the skipped list")
171
171
  return
172
172
  full_path = f"{path}{self.parquet_filename}"
173
173
 
174
174
  thread_name = threading.current_thread().name
175
- self.logger.info(f"[{thread_name}] Executing date: {date} -> saving to: {full_path}")
175
+ self.logger.debug(f"[{thread_name}] Executing date: {date} -> saving to: {full_path}")
176
176
 
177
177
  overall_start = time.perf_counter()
178
178
  try:
179
179
  load_start = time.perf_counter()
180
- with self.dataclass(**self.class_params) as data:
181
- df = data.load_period(
182
- dt_field=self.date_field,
183
- start=date,
184
- end=date,
185
- **self.load_params
186
- )
180
+ date_filter = {f"{self.date_field}__date": {date.isoformat()}}
181
+ self.logger.debug(f"Loading data for {date} with filter: {date_filter}")
182
+ # Load data using the dataclass with the provided date filter
183
+ self.load_params.update(date_filter)
184
+ df = self.datacls.load(**self.load_params)
187
185
  load_time = time.perf_counter() - load_start
188
-
189
186
  if df.head(1, compute=True).empty:
190
187
  if self.mmanifest:
191
188
  schema = df._meta.dtypes.astype(str).to_dict()
@@ -115,22 +115,26 @@ class Logger:
115
115
  """
116
116
  self.logger.setLevel(level)
117
117
 
118
- def debug(self, msg: str):
118
+ def debug(self, msg: str, *args, **kwargs):
119
119
  """Log a debug message."""
120
- self.logger.debug(msg)
120
+ self.logger.debug(msg, *args, **kwargs)
121
121
 
122
- def info(self, msg: str):
122
+ def info(self, msg: str, *args, **kwargs):
123
123
  """Log an info message."""
124
- self.logger.info(msg)
124
+ self.logger.info(msg, *args, **kwargs)
125
125
 
126
- def warning(self, msg: str):
126
+ def warning(self, msg: str, *args, **kwargs):
127
127
  """Log a warning message."""
128
- self.logger.warning(msg)
128
+ self.logger.warning(msg, *args, **kwargs)
129
129
 
130
- def error(self, msg: str):
131
- """Log an error message."""
132
- self.logger.error(msg)
130
+ def error(self, msg: str, *args, **kwargs):
131
+ """
132
+ Log an error message.
133
+
134
+ To log exception information, use the `exc_info=True` keyword argument.
135
+ """
136
+ self.logger.error(msg, *args, **kwargs)
133
137
 
134
- def critical(self, msg: str):
138
+ def critical(self, msg: str, *args, **kwargs):
135
139
  """Log a critical message."""
136
- self.logger.critical(msg)
140
+ self.logger.critical(msg, *args, **kwargs)
@@ -73,6 +73,8 @@ class UpdatePlanner:
73
73
  self.show_progress = show_progress
74
74
  self.logger = logger or Logger.default_logger(logger_name="update_planner")
75
75
  self.logger.set_level(Logger.DEBUG if debug else Logger.INFO)
76
+ self.debug = debug
77
+ self.verbose = verbose
76
78
 
77
79
  # Filesystem and age helper
78
80
  self.fs = fs or fsspec.filesystem(filesystem_type, **(filesystem_options or {}))