sibi-dst 0.3.63__py3-none-any.whl → 2025.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. sibi_dst/df_helper/_df_helper.py +186 -591
  2. sibi_dst/df_helper/backends/sqlalchemy/__init__.py +0 -2
  3. sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +161 -115
  4. sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +291 -97
  5. sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +34 -105
  6. sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +175 -162
  7. sibi_dst/df_helper/core/__init__.py +0 -4
  8. sibi_dst/df_helper/core/_defaults.py +1 -50
  9. sibi_dst/df_helper/core/_query_config.py +2 -2
  10. sibi_dst/utils/__init__.py +0 -2
  11. sibi_dst/utils/data_wrapper.py +9 -12
  12. sibi_dst/utils/log_utils.py +15 -11
  13. sibi_dst/utils/update_planner.py +2 -0
  14. sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +325 -50
  15. sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +2 -2
  16. sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +330 -51
  17. sibi_dst/v3/__init__.py +0 -0
  18. sibi_dst/v3/backends/__init__.py +0 -0
  19. sibi_dst/v3/df_helper/__init__.py +0 -0
  20. sibi_dst/v3/df_helper/_df_helper.py +91 -0
  21. sibi_dst-2025.1.1.dist-info/METADATA +55 -0
  22. {sibi_dst-0.3.63.dist-info → sibi_dst-2025.1.1.dist-info}/RECORD +23 -26
  23. sibi_dst/df_helper/backends/django/__init__.py +0 -11
  24. sibi_dst/df_helper/backends/django/_db_connection.py +0 -88
  25. sibi_dst/df_helper/backends/django/_io_dask.py +0 -450
  26. sibi_dst/df_helper/backends/django/_load_from_db.py +0 -227
  27. sibi_dst/df_helper/backends/django/_sql_model_builder.py +0 -493
  28. sibi_dst/df_helper/backends/sqlalchemy/_filter_handler.py +0 -119
  29. sibi_dst/utils/airflow_manager.py +0 -212
  30. sibi_dst-0.3.63.dist-info/METADATA +0 -90
  31. {sibi_dst-0.3.63.dist-info → sibi_dst-2025.1.1.dist-info}/WHEEL +0 -0
@@ -1,82 +1,357 @@
1
- from typing import Any, Optional
1
+ from __future__ import annotations
2
2
 
3
- from pydantic import BaseModel, model_validator, ConfigDict
4
- from sqlalchemy import create_engine, text
3
+ import threading
4
+ from contextlib import contextmanager
5
+ from typing import Any, Optional, ClassVar, Generator, Type, Dict
6
+
7
+ from pydantic import (
8
+ BaseModel,
9
+ field_validator,
10
+ model_validator,
11
+ ConfigDict,
12
+ )
13
+ from sqlalchemy import create_engine, event, text
14
+ from sqlalchemy.engine import url as sqlalchemy_url
5
15
  from sqlalchemy.engine import Engine
6
- from sqlalchemy.exc import OperationalError
16
+ from sqlalchemy.exc import OperationalError, SQLAlchemyError
17
+ from sqlalchemy.orm import sessionmaker, Session
18
+ from sqlalchemy.pool import QueuePool, NullPool, StaticPool
7
19
 
8
- from sibi_dst.v2.utils import Logger
20
+ # Restoring the original intended imports
21
+ from sibi_dst.utils import Logger
9
22
  from ._model_builder import SqlAlchemyModelBuilder
10
23
 
11
24
 
12
25
  class SqlAlchemyConnectionConfig(BaseModel):
13
26
  """
14
- Configuration for establishing an SQLAlchemy database connection and dynamically building
15
- an ORM model for a specific table.
27
+ A thread-safe, registry-backed SQLAlchemy connection manager.
28
+
29
+ This class encapsulates database connection configuration and provides robust,
30
+ shared resource management. It is designed to be instantiated multiple times
31
+ with the same connection parameters, where it will safely share a single
32
+ underlying database engine and connection pool.
33
+
34
+ Key Features:
35
+ - Shared Engine & Pool: Reuses a single SQLAlchemy Engine for identical
36
+ database URLs and pool settings, improving application performance.
37
+ - Reference Counting: Safely manages the lifecycle of the shared engine,
38
+ disposing of it only when the last user has closed its connection config.
39
+ - Per-Engine Connection Tracking: Monitors active connections for each
40
+ engine individually.
41
+ - Dynamic ORM Model Building: If a table name is provided, it can
42
+ dynamically generate a SQLAlchemy ORM model for that table.
43
+ - Integrated Session Management: Provides a pre-configured session factory
44
+ for convenient database interactions.
16
45
 
17
46
  Attributes:
18
- connection_url (str): The URL used to connect to the database.
19
- table_name (Optional[str]): The name of the table for which the model will be built.
20
- model (Any): The dynamically built SQLAlchemy model.
21
- engine (Optional[Engine]): The SQLAlchemy engine instance.
47
+ connection_url (str): The database URL (e.g., "postgresql://u:p@h/d").
48
+ table (Optional[str]): If provided, an ORM model will be built for this table.
49
+ model (Optional[Any]): The dynamically generated ORM model class.
50
+ engine (Optional[Engine]): The underlying SQLAlchemy engine.
51
+ logger (Logger): The logger instance.
52
+ debug (bool): If True, sets logger to DEBUG level.
53
+ session_factory (Optional[sessionmaker]): A factory for creating Session objects.
22
54
  """
23
-
24
- model_config = ConfigDict(arbitrary_types_allowed=True)
55
+ # --- Public Configuration ---
25
56
  connection_url: str
26
57
  table: Optional[str] = None
27
- model: Any = None
28
- engine: Optional[Engine] = None
29
58
  debug: bool = False
59
+
60
+ # --- Pool Configuration ---
61
+ pool_size: int = 5
62
+ max_overflow: int = 10
63
+ pool_timeout: int = 30
64
+ pool_recycle: int = 300
65
+ pool_pre_ping: bool = True
66
+ poolclass: Type[QueuePool] = QueuePool
67
+
68
+ # --- Internal & Runtime State ---
69
+ model: Optional[Type[Any]] = None
70
+ engine: Optional[Engine] = None
30
71
  logger: Optional[Logger] = None
31
- add_relationships: bool = False
32
- export_models: bool = False
33
- export_file_name: str = 'models.py'
72
+ session_factory: Optional[sessionmaker] = None
73
+
74
+ # --- Private State ---
75
+ _engine_key_instance: tuple = ()
76
+
77
+ # --- Class-level Shared Resources ---
78
+ # Registry stores engine wrappers with metadata like ref_count.
79
+ # Format: { engine_key: {'engine': Engine, 'ref_count': int, 'active_connections': int} }
80
+ _engine_registry: ClassVar[Dict[tuple, Dict[str, Any]]] = {}
81
+ _registry_lock: ClassVar[threading.Lock] = threading.Lock()
82
+
83
+ # Pydantic v2 configuration
84
+ model_config = ConfigDict(
85
+ arbitrary_types_allowed=True,
86
+ )
87
+
88
+ @field_validator("pool_size", "max_overflow", "pool_timeout", "pool_recycle")
89
+ @classmethod
90
+ def _validate_pool_params(cls, v: int) -> int:
91
+ if v < 0:
92
+ raise ValueError("Pool parameters must be non-negative")
93
+ return v
34
94
 
35
95
  @model_validator(mode="after")
36
- def validate_and_initialize(self) -> "SqlAlchemyConnectionConfig":
96
+ def _init_all(self) -> SqlAlchemyConnectionConfig:
37
97
  """
38
- Validate the configuration, initialize the engine, test the connection, and build the model.
98
+ Orchestrates the initialization process after Pydantic validation.
99
+ This method sets up the logger, engine, validates the connection,
100
+ builds the ORM model, and creates the session factory.
101
+ """
102
+ self._init_logger()
103
+
104
+ # The engine key is generated and stored on the instance to ensure
105
+ # that even if public attributes are changed later, this instance
106
+ # can always find its original engine in the registry.
107
+ self._engine_key_instance = self._get_engine_key()
108
+
109
+ self._init_engine()
110
+ self._validate_conn()
111
+ self._build_model()
39
112
 
40
- Raises:
41
- ValueError: If `connection_url` or `table_name` is missing, or if the connection or model
42
- building fails.
113
+ if self.engine:
114
+ self.session_factory = sessionmaker(bind=self.engine, expire_on_commit=False)
115
+
116
+ return self
117
+
118
+ def _init_logger(self) -> None:
119
+ """Initializes the logger for this instance."""
120
+ if self.logger is None:
121
+ self.logger = Logger.default_logger(logger_name=self.__class__.__name__)
122
+ # Assuming the Logger has a set_level method that accepts standard logging levels
123
+ log_level = Logger.DEBUG if self.debug else Logger.INFO
124
+ self.logger.set_level(log_level)
125
+
126
+ def _get_engine_key(self) -> tuple:
127
+ """
128
+ Generates a unique, normalized key for an engine configuration.
129
+ This key is used as the dictionary key in the shared _engine_registry.
130
+ It intentionally excludes instance-specific details like `table` to
131
+ ensure proper engine sharing.
132
+ """
133
+ parsed = sqlalchemy_url.make_url(self.connection_url)
134
+ # Exclude pooling from the query params as they are handled separately
135
+ query = {k: v for k, v in parsed.query.items() if not k.startswith("pool_")}
136
+ normalized_url = parsed.set(query=query)
137
+
138
+ key_parts = [str(normalized_url)]
139
+ if self.poolclass not in (NullPool, StaticPool):
140
+ key_parts += [
141
+ self.pool_size, self.max_overflow, self.pool_timeout,
142
+ self.pool_recycle, self.pool_pre_ping
143
+ ]
144
+ return tuple(key_parts)
145
+
146
+ def _init_engine(self) -> None:
147
+ """
148
+ Initializes the SQLAlchemy Engine.
149
+ If an identical engine already exists in the registry, it is reused
150
+ and its reference count is incremented. Otherwise, a new engine
151
+ is created and added to the registry.
43
152
  """
44
- self.logger = self.logger or Logger.default_logger(logger_name="sqlalchemy_connection", debug=self.debug)
45
- self.logger.debug("Validating and initializing SQLAlchemy connection configuration.")
46
- if not self.connection_url:
47
- raise ValueError("`connection_url` must be provided.")
153
+ with self._registry_lock:
154
+ engine_wrapper = self._engine_registry.get(self._engine_key_instance)
155
+
156
+ if engine_wrapper:
157
+ # --- Reuse existing engine ---
158
+ self.engine = engine_wrapper['engine']
159
+ engine_wrapper['ref_count'] += 1
160
+ self.logger.debug(
161
+ f"Reusing engine. Ref count: {engine_wrapper['ref_count']}. Key: {self._engine_key_instance}")
162
+ else:
163
+ # --- Create new engine ---
164
+ self.logger.debug(f"Creating new engine for key: {self._engine_key_instance}")
165
+ try:
166
+ new_engine = create_engine(
167
+ self.connection_url,
168
+ pool_size=self.pool_size,
169
+ max_overflow=self.max_overflow,
170
+ pool_timeout=self.pool_timeout,
171
+ pool_recycle=self.pool_recycle,
172
+ pool_pre_ping=self.pool_pre_ping,
173
+ poolclass=self.poolclass,
174
+ )
175
+ self.engine = new_engine
176
+ self._attach_events()
177
+
178
+ # Store the new engine and its metadata in the registry
179
+ self._engine_registry[self._engine_key_instance] = {
180
+ 'engine': new_engine,
181
+ 'ref_count': 1,
182
+ 'active_connections': 0
183
+ }
184
+ except Exception as e:
185
+ self.logger.error(f"Failed to create engine: {e}")
186
+ raise SQLAlchemyError(f"Engine creation failed: {e}") from e
48
187
 
49
- # Initialize the engine.
50
- self.engine = create_engine(self.connection_url)
51
- self.logger.debug(f"Engine created for URL")
188
+ def _attach_events(self) -> None:
189
+ """Attaches checkout/checkin events to the engine for connection tracking."""
190
+ if self.engine:
191
+ event.listen(self.engine, "checkout", self._on_checkout)
192
+ event.listen(self.engine, "checkin", self._on_checkin)
52
193
 
53
- # Validate the connection.
54
- self.validate_connection()
194
+ def _on_checkout(self, *args) -> None:
195
+ """Event listener for when a connection is checked out from the pool."""
196
+ with self._registry_lock:
197
+ wrapper = self._engine_registry.get(self._engine_key_instance)
198
+ if wrapper:
199
+ wrapper['active_connections'] += 1
200
+ self.logger.debug(f"Connection checked out. Active: {self.active_connections}")
55
201
 
56
- if not self.table:
57
- raise ValueError("`table` must be provided to build the model.")
202
+ def _on_checkin(self, *args) -> None:
203
+ """Event listener for when a connection is returned to the pool."""
204
+ with self._registry_lock:
205
+ wrapper = self._engine_registry.get(self._engine_key_instance)
206
+ if wrapper:
207
+ wrapper['active_connections'] = max(0, wrapper['active_connections'] - 1)
208
+ self.logger.debug(f"Connection checked in. Active: {self.active_connections}")
58
209
 
210
+ @property
211
+ def active_connections(self) -> int:
212
+ """Returns the number of active connections for this instance's engine."""
213
+ with self._registry_lock:
214
+ wrapper = self._engine_registry.get(self._engine_key_instance)
215
+ return wrapper['active_connections'] if wrapper else 0
216
+
217
+ def _validate_conn(self) -> None:
218
+ """Tests the database connection by executing a simple query."""
59
219
  try:
60
- builder = SqlAlchemyModelBuilder(self.engine, self.table, self.add_relationships, self.debug, self.logger)
220
+ with self.managed_connection() as conn:
221
+ conn.execute(text("SELECT 1"))
222
+ self.logger.debug("Database connection validated successfully.")
223
+ except OperationalError as e:
224
+ self.logger.error(f"Database connection failed: {e}")
225
+ # This will be caught by Pydantic and raised as a ValidationError
226
+ raise ValueError(f"DB connection failed: {e}") from e
227
+
228
+ @contextmanager
229
+ def managed_connection(self) -> Generator[Any, None, None]:
230
+ """Provides a single database connection from the engine pool."""
231
+ if not self.engine:
232
+ raise RuntimeError("Engine not initialized. Cannot get a connection.")
233
+ conn = self.engine.connect()
234
+ try:
235
+ yield conn
236
+ finally:
237
+ conn.close()
238
+
239
+ def get_session(self) -> Session:
240
+ """Returns a new SQLAlchemy Session from the session factory."""
241
+ if not self.session_factory:
242
+ raise RuntimeError("Session factory not initialized. Cannot get a session.")
243
+ return self.session_factory()
244
+
245
+ def _build_model(self) -> None:
246
+ """
247
+ Dynamically builds and assigns an ORM model if `self.table` is set.
248
+ Uses SqlAlchemyModelBuilder to reflect the table schema from the database.
249
+ """
250
+ if not self.table or not self.engine:
251
+ return
252
+ try:
253
+ builder = SqlAlchemyModelBuilder(self.engine, self.table)
61
254
  self.model = builder.build_model()
62
- if self.export_models:
63
- builder.export_models_to_file(self.export_file_name)
64
- self.logger.debug(f"Successfully built model for table: {self.table}")
255
+ self.logger.debug(f"Successfully built ORM model for table: {self.table}")
65
256
  except Exception as e:
66
- raise ValueError(f"Failed to build model for table {self.table}: {e}")
257
+ self.logger.error(f"Failed to build ORM model for table '{self.table}': {e}")
258
+ # Propagate as a ValueError to be caught by Pydantic validation
259
+ raise ValueError(f"Model construction failed for table '{self.table}': {e}") from e
67
260
 
68
- return self
261
+ def close(self) -> None:
262
+ """
263
+ Decrements the engine's reference count and disposes of the engine
264
+ if the count reaches zero. This is the primary method for releasing
265
+ resources managed by this configuration object.
266
+ """
267
+ with self._registry_lock:
268
+ key = self._engine_key_instance
269
+ engine_wrapper = self._engine_registry.get(key)
270
+
271
+ if not engine_wrapper:
272
+ self.logger.warning("Attempted to close a config whose engine is not in the registry.")
273
+ return
274
+
275
+ engine_wrapper['ref_count'] -= 1
276
+ self.logger.debug(f"Closing config. Ref count is now {engine_wrapper['ref_count']} for key {key}.")
69
277
 
70
- def validate_connection(self) -> None:
278
+ if engine_wrapper['ref_count'] <= 0:
279
+ self.logger.debug(f"Disposing engine as reference count is zero. Key: {key}")
280
+ engine_wrapper['engine'].dispose()
281
+ del self._engine_registry[key]
282
+
283
+ def dispose_idle_connections(self) -> int:
71
284
  """
72
- Test the database connection by executing a simple query.
285
+ Closes and discards idle connections in this engine's connection pool.
286
+
287
+ This is a pool-level operation that affects only the connections held
288
+ by the pool, not those active at the database level.
73
289
 
74
- Raises:
75
- ValueError: If the connection cannot be established.
290
+ Returns:
291
+ int: The number of connections disposed of.
76
292
  """
77
- try:
78
- with self.engine.connect() as connection:
79
- connection.execute(text("SELECT 1"))
80
- self.logger.debug("Database connection validated.")
81
- except OperationalError as e:
82
- raise ValueError(f"Failed to connect to the database: {e}")
293
+ if not self.engine:
294
+ self.logger.warning("Cannot dispose idle connections: engine not initialized.")
295
+ return 0
296
+
297
+ pool = self.engine.pool
298
+ if isinstance(pool, QueuePool):
299
+ # The pool.dispose() method checks in all connections and clears the pool.
300
+ # The number of checked-in connections is the number of idle connections.
301
+ count = pool.checkedin()
302
+ pool.dispose()
303
+ self.logger.debug(f"Disposed {count} idle connections from the pool.")
304
+ return count
305
+
306
+ self.logger.warning(f"Idle connection disposal not applicable for pool type: {type(pool).__name__}")
307
+ return 0
308
+
309
+ def terminate_idle_connections(self, idle_seconds: int = 300) -> int:
310
+ """
311
+ Terminates idle connections at the database server level.
312
+
313
+ This is an administrative database operation that forcibly closes
314
+ backend processes. Use with caution. Support is dialect-specific.
315
+
316
+ Args:
317
+ idle_seconds (int): The minimum idle time in seconds for a
318
+ connection to be terminated.
319
+
320
+ Returns:
321
+ int: The number of connections terminated.
322
+ """
323
+ if not self.engine:
324
+ self.logger.warning("Cannot terminate connections: engine not initialized.")
325
+ return 0
326
+
327
+ terminated_count = 0
328
+ dialect = self.engine.dialect.name
329
+
330
+ with self.managed_connection() as conn:
331
+ try:
332
+ if dialect == 'postgresql':
333
+ query = text(
334
+ "SELECT pg_terminate_backend(pid) FROM pg_stat_activity "
335
+ "WHERE state = 'idle' "
336
+ "AND (now() - query_start) > INTERVAL ':idle_seconds seconds' "
337
+ "AND pid <> pg_backend_pid()"
338
+ )
339
+ res = conn.execute(query, {"idle_seconds": idle_seconds})
340
+ terminated_count = res.rowcount if res.rowcount is not None else 0
341
+
342
+ elif dialect == 'mysql':
343
+ process_list = conn.execute(text("SHOW PROCESSLIST"))
344
+ for row in process_list:
345
+ # Pylint compatible attribute access
346
+ if getattr(row, 'Command', '') == 'Sleep' and getattr(row, 'Time', 0) > idle_seconds:
347
+ conn.execute(text(f"KILL {getattr(row, 'Id')}"))
348
+ terminated_count += 1
349
+ else:
350
+ self.logger.warning(f"Idle connection termination is not supported for dialect: {dialect}")
351
+ except SQLAlchemyError as e:
352
+ self.logger.error(f"Error terminating idle connections for dialect {dialect}: {e}")
353
+
354
+ if terminated_count > 0:
355
+ self.logger.debug(f"Terminated {terminated_count} idle connections on the database server.")
356
+ return terminated_count
357
+
@@ -10,7 +10,7 @@ from sibi_dst.v2.utils import Logger
10
10
 
11
11
 
12
12
  class SQLAlchemyDask:
13
- def __init__(self, model, filters, engine_url, chunk_size=1000, logger=None, debug=False):
13
+ def __init__(self, model, filters, engine, chunk_size=1000, logger=None, debug=False):
14
14
  """
15
15
  Initialize with an SQLAlchemy query and database engine URL.
16
16
 
@@ -26,7 +26,7 @@ class SQLAlchemyDask:
26
26
  self.filters = filters
27
27
  self.chunk_size = chunk_size
28
28
  self.debug = debug
29
- self.engine = create_engine(engine_url)
29
+ self.engine = engine
30
30
  self.Session = sessionmaker(bind=self.engine)
31
31
  self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
32
32
  self.logger.set_level(logger.DEBUG if debug else logger.INFO)