sibi-dst 0.3.62__py3-none-any.whl → 0.3.64__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sibi_dst/df_helper/_df_helper.py +184 -591
- sibi_dst/df_helper/_parquet_artifact.py +2 -0
- sibi_dst/df_helper/backends/sqlalchemy/__init__.py +0 -2
- sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +161 -115
- sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +141 -97
- sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +34 -105
- sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +175 -162
- sibi_dst/df_helper/core/_query_config.py +2 -2
- sibi_dst/utils/data_wrapper.py +2 -2
- sibi_dst/utils/log_utils.py +15 -11
- sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +325 -50
- sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +2 -2
- sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +330 -51
- sibi_dst/v3/__init__.py +0 -0
- sibi_dst/v3/backends/__init__.py +0 -0
- sibi_dst/v3/df_helper/__init__.py +0 -0
- sibi_dst/v3/df_helper/_df_helper.py +91 -0
- {sibi_dst-0.3.62.dist-info → sibi_dst-0.3.64.dist-info}/METADATA +1 -1
- {sibi_dst-0.3.62.dist-info → sibi_dst-0.3.64.dist-info}/RECORD +20 -17
- sibi_dst/df_helper/backends/sqlalchemy/_filter_handler.py +0 -119
- {sibi_dst-0.3.62.dist-info → sibi_dst-0.3.64.dist-info}/WHEEL +0 -0
@@ -158,6 +158,7 @@ class ParquetArtifact(DfHelper):
|
|
158
158
|
if not manifest_exists:
|
159
159
|
self.logger.info(f"Creating new manifest at {self.missing_manifest_path}")
|
160
160
|
self.mmanifest.save()
|
161
|
+
self.mmanifest.cleanup_temp_manifests()
|
161
162
|
else:
|
162
163
|
self.logger.info(f"Manifest already exists at {self.missing_manifest_path}")
|
163
164
|
|
@@ -206,6 +207,7 @@ class ParquetArtifact(DfHelper):
|
|
206
207
|
try:
|
207
208
|
if self.mmanifest and self.mmanifest._new_records:
|
208
209
|
self.mmanifest.save()
|
210
|
+
self.mmanifest.cleanup_temp_manifests()
|
209
211
|
if getattr(self, "_entered", False) and self.fs and self._own_fs:
|
210
212
|
self.fs.close()
|
211
213
|
except Exception as e:
|
@@ -1,4 +1,3 @@
|
|
1
|
-
from ._filter_handler import SqlAlchemyFilterHandler
|
2
1
|
from ._db_connection import SqlAlchemyConnectionConfig
|
3
2
|
from ._load_from_db import SqlAlchemyLoadFromDb
|
4
3
|
from ._sql_model_builder import SqlAlchemyModelBuilder
|
@@ -7,5 +6,4 @@ __all__ = [
|
|
7
6
|
'SqlAlchemyConnectionConfig',
|
8
7
|
'SqlAlchemyModelBuilder',
|
9
8
|
'SqlAlchemyLoadFromDb',
|
10
|
-
'SqlAlchemyFilterHandler'
|
11
9
|
]
|
@@ -1,51 +1,85 @@
|
|
1
1
|
from __future__ import annotations
|
2
|
-
|
2
|
+
|
3
3
|
import threading
|
4
4
|
from contextlib import contextmanager
|
5
|
-
from
|
5
|
+
from typing import Any, Optional, ClassVar, Generator, Type, Dict
|
6
|
+
|
7
|
+
from pydantic import (
|
8
|
+
BaseModel,
|
9
|
+
field_validator,
|
10
|
+
model_validator,
|
11
|
+
ConfigDict,
|
12
|
+
)
|
6
13
|
from sqlalchemy import create_engine, event, text
|
7
14
|
from sqlalchemy.engine import url as sqlalchemy_url
|
8
15
|
from sqlalchemy.engine import Engine
|
9
|
-
from sqlalchemy.exc import OperationalError
|
10
|
-
from sqlalchemy.pool import QueuePool, NullPool, StaticPool
|
16
|
+
from sqlalchemy.exc import OperationalError, SQLAlchemyError
|
11
17
|
from sqlalchemy.orm import sessionmaker, Session
|
18
|
+
from sqlalchemy.pool import QueuePool, NullPool, StaticPool
|
19
|
+
|
20
|
+
# Assuming these are your project's internal modules
|
12
21
|
from sibi_dst.utils import Logger
|
13
22
|
from ._sql_model_builder import SqlAlchemyModelBuilder
|
14
23
|
|
15
24
|
|
16
25
|
class SqlAlchemyConnectionConfig(BaseModel):
|
17
26
|
"""
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
27
|
+
A thread-safe, registry-backed SQLAlchemy connection manager.
|
28
|
+
|
29
|
+
This class encapsulates database connection configuration and provides robust,
|
30
|
+
shared resource management. It is designed to be used as a context manager
|
31
|
+
to ensure resources are always released correctly.
|
32
|
+
|
33
|
+
Recommended Usage is via the `with` statement.
|
34
|
+
with SqlAlchemyConnectionConfig(...) as config:
|
35
|
+
session = config.get_session()
|
36
|
+
# ... do work ...
|
37
|
+
# config.close() is called automatically upon exiting the block.
|
38
|
+
|
39
|
+
Key Features:
|
40
|
+
- Context Manager Support: Guarantees resource cleanup.
|
41
|
+
- Shared Engine & Pool: Reuses a single SQLAlchemy Engine for identical
|
42
|
+
database URLs and pool settings, improving application performance.
|
43
|
+
- Reference Counting: Safely manages the lifecycle of the shared engine,
|
44
|
+
disposing of it only when the last user has closed its connection config.
|
24
45
|
"""
|
46
|
+
# --- Public Configuration ---
|
25
47
|
connection_url: str
|
26
48
|
table: Optional[str] = None
|
27
|
-
model: Optional[Any] = None
|
28
|
-
engine: Optional[Engine] = None
|
29
|
-
logger: Logger = None
|
30
49
|
debug: bool = False
|
31
50
|
|
51
|
+
# --- Pool Configuration ---
|
32
52
|
pool_size: int = 5
|
33
53
|
max_overflow: int = 10
|
34
54
|
pool_timeout: int = 30
|
35
|
-
pool_recycle: int =
|
55
|
+
pool_recycle: int = 1800
|
36
56
|
pool_pre_ping: bool = True
|
37
|
-
poolclass: Type = QueuePool
|
57
|
+
poolclass: Type[QueuePool] = QueuePool
|
38
58
|
|
59
|
+
# --- Internal & Runtime State ---
|
60
|
+
model: Optional[Type[Any]] = None
|
61
|
+
engine: Optional[Engine] = None
|
62
|
+
logger: Optional[Logger] = None
|
39
63
|
session_factory: Optional[sessionmaker] = None
|
40
|
-
_owns_engine: bool = False
|
41
64
|
|
42
|
-
|
65
|
+
# --- Private State ---
|
66
|
+
_engine_key_instance: tuple = ()
|
67
|
+
_closed: bool = False # Flag to prevent double-closing.
|
68
|
+
|
69
|
+
# --- Class-level Shared Resources ---
|
70
|
+
_engine_registry: ClassVar[Dict[tuple, Dict[str, Any]]] = {}
|
43
71
|
_registry_lock: ClassVar[threading.Lock] = threading.Lock()
|
44
|
-
_active_connections: ClassVar[int] = 0
|
45
72
|
|
46
|
-
|
47
|
-
|
48
|
-
|
73
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
74
|
+
|
75
|
+
# Add __enter__ and __exit__ for context manager protocol
|
76
|
+
def __enter__(self) -> SqlAlchemyConnectionConfig:
|
77
|
+
"""Enter the runtime context, returning self."""
|
78
|
+
return self
|
79
|
+
|
80
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
81
|
+
"""Exit the runtime context, ensuring that close() is called."""
|
82
|
+
self.close()
|
49
83
|
|
50
84
|
@field_validator("pool_size", "max_overflow", "pool_timeout", "pool_recycle")
|
51
85
|
@classmethod
|
@@ -56,78 +90,139 @@ class SqlAlchemyConnectionConfig(BaseModel):
|
|
56
90
|
|
57
91
|
@model_validator(mode="after")
|
58
92
|
def _init_all(self) -> SqlAlchemyConnectionConfig:
|
93
|
+
"""Orchestrates the initialization process after Pydantic validation."""
|
59
94
|
self._init_logger()
|
95
|
+
self._engine_key_instance = self._get_engine_key()
|
60
96
|
self._init_engine()
|
61
97
|
self._validate_conn()
|
62
98
|
self._build_model()
|
63
|
-
|
99
|
+
if self.engine:
|
100
|
+
self.session_factory = sessionmaker(bind=self.engine, expire_on_commit=False)
|
64
101
|
return self
|
65
102
|
|
66
103
|
def _init_logger(self) -> None:
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
104
|
+
"""Initializes the logger for this instance."""
|
105
|
+
if self.logger is None:
|
106
|
+
self.logger = Logger.default_logger(logger_name=self.__class__.__name__)
|
107
|
+
log_level = Logger.DEBUG if self.debug else Logger.INFO
|
108
|
+
self.logger.set_level(log_level)
|
109
|
+
|
110
|
+
def _get_engine_key(self) -> tuple:
|
111
|
+
"""Generates a unique, normalized key for an engine configuration."""
|
71
112
|
parsed = sqlalchemy_url.make_url(self.connection_url)
|
72
113
|
query = {k: v for k, v in parsed.query.items() if not k.startswith("pool_")}
|
73
|
-
|
74
|
-
|
114
|
+
normalized_url = parsed.set(query=query)
|
115
|
+
key_parts = [str(normalized_url)]
|
75
116
|
if self.poolclass not in (NullPool, StaticPool):
|
76
|
-
|
77
|
-
|
117
|
+
key_parts += [
|
118
|
+
self.pool_size, self.max_overflow, self.pool_timeout,
|
119
|
+
self.pool_recycle, self.pool_pre_ping
|
120
|
+
]
|
121
|
+
return tuple(key_parts)
|
78
122
|
|
79
123
|
def _init_engine(self) -> None:
|
80
|
-
|
124
|
+
"""Initializes or reuses a shared SQLAlchemy Engine."""
|
81
125
|
with self._registry_lock:
|
82
|
-
|
83
|
-
if
|
84
|
-
self.engine =
|
85
|
-
|
86
|
-
self.logger.debug(f"Reusing engine {
|
126
|
+
engine_wrapper = self._engine_registry.get(self._engine_key_instance)
|
127
|
+
if engine_wrapper:
|
128
|
+
self.engine = engine_wrapper['engine']
|
129
|
+
engine_wrapper['ref_count'] += 1
|
130
|
+
self.logger.debug(f"Reusing engine. Ref count: {engine_wrapper['ref_count']}.")
|
87
131
|
else:
|
88
|
-
self.logger.debug(f"Creating engine {
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
132
|
+
self.logger.debug(f"Creating new engine for key: {self._engine_key_instance}")
|
133
|
+
try:
|
134
|
+
new_engine = create_engine(
|
135
|
+
self.connection_url, pool_size=self.pool_size,
|
136
|
+
max_overflow=self.max_overflow, pool_timeout=self.pool_timeout,
|
137
|
+
pool_recycle=self.pool_recycle, pool_pre_ping=self.pool_pre_ping,
|
138
|
+
poolclass=self.poolclass,
|
139
|
+
)
|
140
|
+
self.engine = new_engine
|
141
|
+
self._attach_events()
|
142
|
+
self._engine_registry[self._engine_key_instance] = {
|
143
|
+
'engine': new_engine, 'ref_count': 1, 'active_connections': 0
|
144
|
+
}
|
145
|
+
except Exception as e:
|
146
|
+
self.logger.error(f"Failed to create engine: {e}")
|
147
|
+
raise SQLAlchemyError(f"Engine creation failed: {e}") from e
|
148
|
+
|
149
|
+
def close(self) -> None:
|
150
|
+
"""
|
151
|
+
Decrements the engine's reference count and disposes of the engine
|
152
|
+
if the count reaches zero. This is now typically called automatically
|
153
|
+
when exiting a `with` block.
|
154
|
+
"""
|
155
|
+
# Prevent the method from running more than once per instance.
|
156
|
+
if self._closed:
|
157
|
+
self.logger.debug("Attempted to close an already-closed config instance.")
|
158
|
+
return
|
159
|
+
|
160
|
+
with self._registry_lock:
|
161
|
+
key = self._engine_key_instance
|
162
|
+
engine_wrapper = self._engine_registry.get(key)
|
163
|
+
|
164
|
+
if not engine_wrapper:
|
165
|
+
self.logger.warning("Attempted to close a config whose engine is not in the registry.")
|
166
|
+
return
|
167
|
+
|
168
|
+
engine_wrapper['ref_count'] -= 1
|
169
|
+
self.logger.debug(f"Closing config. Ref count is now {engine_wrapper['ref_count']}.")
|
170
|
+
|
171
|
+
if engine_wrapper['ref_count'] <= 0:
|
172
|
+
self.logger.debug(f"Disposing engine as reference count is zero. Key: {key}")
|
173
|
+
engine_wrapper['engine'].dispose()
|
174
|
+
del self._engine_registry[key]
|
175
|
+
|
176
|
+
# Mark this instance as closed to prevent subsequent calls.
|
177
|
+
self._closed = True
|
178
|
+
|
179
|
+
# ... (the rest of your methods like _attach_events, _on_checkout, get_session, etc. remain unchanged)
|
180
|
+
# They are omitted here for brevity but should be included in your final file.
|
101
181
|
|
102
182
|
def _attach_events(self) -> None:
|
103
|
-
|
104
|
-
|
183
|
+
"""Attaches checkout/checkin events to the engine for connection tracking."""
|
184
|
+
if self.engine:
|
185
|
+
event.listen(self.engine, "checkout", self._on_checkout)
|
186
|
+
event.listen(self.engine, "checkin", self._on_checkin)
|
105
187
|
|
106
188
|
def _on_checkout(self, *args) -> None:
|
189
|
+
"""Event listener for when a connection is checked out from the pool."""
|
107
190
|
with self._registry_lock:
|
108
|
-
|
109
|
-
|
191
|
+
wrapper = self._engine_registry.get(self._engine_key_instance)
|
192
|
+
if wrapper:
|
193
|
+
wrapper['active_connections'] += 1
|
194
|
+
self.logger.debug(f"Connection checked out. Active: {self.active_connections}")
|
110
195
|
|
111
196
|
def _on_checkin(self, *args) -> None:
|
197
|
+
"""Event listener for when a connection is returned to the pool."""
|
112
198
|
with self._registry_lock:
|
113
|
-
|
114
|
-
|
199
|
+
wrapper = self._engine_registry.get(self._engine_key_instance)
|
200
|
+
if wrapper:
|
201
|
+
wrapper['active_connections'] = max(0, wrapper['active_connections'] - 1)
|
202
|
+
self.logger.debug(f"Connection checked in. Active: {self.active_connections}")
|
115
203
|
|
116
204
|
@property
|
117
205
|
def active_connections(self) -> int:
|
118
|
-
|
206
|
+
"""Returns the number of active connections for this instance's engine."""
|
207
|
+
with self._registry_lock:
|
208
|
+
wrapper = self._engine_registry.get(self._engine_key_instance)
|
209
|
+
return wrapper['active_connections'] if wrapper else 0
|
119
210
|
|
120
211
|
def _validate_conn(self) -> None:
|
212
|
+
"""Tests the database connection by executing a simple query."""
|
121
213
|
try:
|
122
214
|
with self.managed_connection() as conn:
|
123
215
|
conn.execute(text("SELECT 1"))
|
124
|
-
self.logger.debug("
|
216
|
+
self.logger.debug("Database connection validated successfully.")
|
125
217
|
except OperationalError as e:
|
126
|
-
self.logger.error(f"
|
127
|
-
raise
|
218
|
+
self.logger.error(f"Database connection failed: {e}")
|
219
|
+
raise ValueError(f"DB connection failed: {e}") from e
|
128
220
|
|
129
221
|
@contextmanager
|
130
|
-
def managed_connection(self) -> Generator[Any, None,
|
222
|
+
def managed_connection(self) -> Generator[Any, None, None]:
|
223
|
+
"""Provides a single database connection from the engine pool."""
|
224
|
+
if not self.engine:
|
225
|
+
raise RuntimeError("Engine not initialized. Cannot get a connection.")
|
131
226
|
conn = self.engine.connect()
|
132
227
|
try:
|
133
228
|
yield conn
|
@@ -135,68 +230,19 @@ class SqlAlchemyConnectionConfig(BaseModel):
|
|
135
230
|
conn.close()
|
136
231
|
|
137
232
|
def get_session(self) -> Session:
|
233
|
+
"""Returns a new SQLAlchemy Session from the session factory."""
|
138
234
|
if not self.session_factory:
|
139
|
-
raise RuntimeError("Session factory not initialized")
|
235
|
+
raise RuntimeError("Session factory not initialized. Cannot get a session.")
|
140
236
|
return self.session_factory()
|
141
237
|
|
142
238
|
def _build_model(self) -> None:
|
143
|
-
"""Dynamically
|
239
|
+
"""Dynamically builds an ORM model if `self.table` is set."""
|
144
240
|
if not self.table or not self.engine:
|
145
241
|
return
|
146
242
|
try:
|
147
243
|
builder = SqlAlchemyModelBuilder(self.engine, self.table)
|
148
244
|
self.model = builder.build_model()
|
149
|
-
self.logger.debug(f"
|
245
|
+
self.logger.debug(f"Successfully built ORM model for table: {self.table}")
|
150
246
|
except Exception as e:
|
151
|
-
self.logger.error(f"
|
152
|
-
raise
|
153
|
-
|
154
|
-
def dispose_idle_connections(self) -> int:
|
155
|
-
key = self._engine_key()
|
156
|
-
with self._registry_lock:
|
157
|
-
if self._engine_registry.get(key) is not self.engine:
|
158
|
-
self.logger.debug("Engine changed")
|
159
|
-
return 0
|
160
|
-
pool = self.engine.pool
|
161
|
-
if isinstance(pool, QueuePool):
|
162
|
-
count = pool.checkedin()
|
163
|
-
pool.dispose()
|
164
|
-
self.logger.debug(f"Disposed {count}")
|
165
|
-
return count
|
166
|
-
self.logger.warning(f"No idle dispose for {type(pool).__name__}")
|
167
|
-
return 0
|
168
|
-
|
169
|
-
def terminate_idle_connections(self, idle_seconds: int = 300) -> int:
|
170
|
-
terminated = 0
|
171
|
-
dialect = self.engine.dialect.name
|
172
|
-
with self.managed_connection() as conn:
|
173
|
-
if dialect == 'postgresql':
|
174
|
-
res = conn.execute(text(
|
175
|
-
f"SELECT pg_terminate_backend(pid) FROM pg_stat_activity "
|
176
|
-
f"WHERE state='idle' AND (now() - query_start) > interval '{idle_seconds} seconds' "
|
177
|
-
f"AND pid<>pg_backend_pid()"
|
178
|
-
))
|
179
|
-
terminated = res.rowcount
|
180
|
-
elif dialect == 'mysql':
|
181
|
-
for row in conn.execute(text("SHOW PROCESSLIST")):
|
182
|
-
if row.Command == 'Sleep' and row.Time > idle_seconds:
|
183
|
-
conn.execute(text(f"KILL {row.Id}"))
|
184
|
-
terminated += 1
|
185
|
-
else:
|
186
|
-
self.logger.warning(f"Idle termination not supported: {dialect}")
|
187
|
-
self.logger.debug(f"Terminated {terminated}")
|
188
|
-
return terminated
|
189
|
-
|
190
|
-
def close(self) -> None:
|
191
|
-
with self._registry_lock:
|
192
|
-
key = self._engine_key()
|
193
|
-
if not self._owns_engine:
|
194
|
-
self.logger.debug("Not owner, skipping close")
|
195
|
-
return
|
196
|
-
if self._engine_registry.get(key) != self.engine:
|
197
|
-
self.logger.debug("Engine not in registry")
|
198
|
-
return
|
199
|
-
self.engine.dispose()
|
200
|
-
del self._engine_registry[key]
|
201
|
-
type(self)._active_connections = 0
|
202
|
-
self.logger.debug(f"Engine closed {key}")
|
247
|
+
self.logger.error(f"Failed to build ORM model for table '{self.table}': {e}")
|
248
|
+
raise ValueError(f"Model construction failed for table '{self.table}': {e}") from e
|