sibi-dst 0.3.62__py3-none-any.whl → 0.3.64__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sibi_dst/df_helper/_df_helper.py +184 -591
- sibi_dst/df_helper/_parquet_artifact.py +2 -0
- sibi_dst/df_helper/backends/sqlalchemy/__init__.py +0 -2
- sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +161 -115
- sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +141 -97
- sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +34 -105
- sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +175 -162
- sibi_dst/df_helper/core/_query_config.py +2 -2
- sibi_dst/utils/data_wrapper.py +2 -2
- sibi_dst/utils/log_utils.py +15 -11
- sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +325 -50
- sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +2 -2
- sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +330 -51
- sibi_dst/v3/__init__.py +0 -0
- sibi_dst/v3/backends/__init__.py +0 -0
- sibi_dst/v3/df_helper/__init__.py +0 -0
- sibi_dst/v3/df_helper/_df_helper.py +91 -0
- {sibi_dst-0.3.62.dist-info → sibi_dst-0.3.64.dist-info}/METADATA +1 -1
- {sibi_dst-0.3.62.dist-info → sibi_dst-0.3.64.dist-info}/RECORD +20 -17
- sibi_dst/df_helper/backends/sqlalchemy/_filter_handler.py +0 -119
- {sibi_dst-0.3.62.dist-info → sibi_dst-0.3.64.dist-info}/WHEEL +0 -0
@@ -1,78 +1,357 @@
|
|
1
|
-
from
|
1
|
+
from __future__ import annotations
|
2
2
|
|
3
|
-
|
4
|
-
from
|
5
|
-
from
|
6
|
-
from sqlalchemy.exc import OperationalError
|
3
|
+
import threading
|
4
|
+
from contextlib import contextmanager
|
5
|
+
from typing import Any, Optional, ClassVar, Generator, Type, Dict
|
7
6
|
|
8
|
-
from
|
9
|
-
|
7
|
+
from pydantic import (
|
8
|
+
BaseModel,
|
9
|
+
field_validator,
|
10
|
+
model_validator,
|
11
|
+
ConfigDict,
|
12
|
+
)
|
13
|
+
from sqlalchemy import create_engine, event, text
|
14
|
+
from sqlalchemy.engine import url as sqlalchemy_url
|
15
|
+
from sqlalchemy.engine import Engine
|
16
|
+
from sqlalchemy.exc import OperationalError, SQLAlchemyError
|
17
|
+
from sqlalchemy.orm import sessionmaker, Session
|
18
|
+
from sqlalchemy.pool import QueuePool, NullPool, StaticPool
|
19
|
+
|
20
|
+
# Restoring the original intended imports
|
21
|
+
from sibi_dst.utils import Logger
|
22
|
+
from ._model_builder import SQLModelModelBuilder
|
10
23
|
|
11
24
|
|
12
25
|
class SQLModelConnectionConfig(BaseModel):
|
13
26
|
"""
|
14
|
-
|
15
|
-
|
27
|
+
A thread-safe, registry-backed SQLAlchemy connection manager.
|
28
|
+
|
29
|
+
This class encapsulates database connection configuration and provides robust,
|
30
|
+
shared resource management. It is designed to be instantiated multiple times
|
31
|
+
with the same connection parameters, where it will safely share a single
|
32
|
+
underlying database engine and connection pool.
|
33
|
+
|
34
|
+
Key Features:
|
35
|
+
- Shared Engine & Pool: Reuses a single SQLAlchemy Engine for identical
|
36
|
+
database URLs and pool settings, improving application performance.
|
37
|
+
- Reference Counting: Safely manages the lifecycle of the shared engine,
|
38
|
+
disposing of it only when the last user has closed its connection config.
|
39
|
+
- Per-Engine Connection Tracking: Monitors active connections for each
|
40
|
+
engine individually.
|
41
|
+
- Dynamic ORM Model Building: If a table name is provided, it can
|
42
|
+
dynamically generate a SQLAlchemy ORM model for that table.
|
43
|
+
- Integrated Session Management: Provides a pre-configured session factory
|
44
|
+
for convenient database interactions.
|
45
|
+
|
46
|
+
Attributes:
|
47
|
+
connection_url (str): The database URL (e.g., "postgresql://u:p@h/d").
|
48
|
+
table (Optional[str]): If provided, an ORM model will be built for this table.
|
49
|
+
model (Optional[Any]): The dynamically generated ORM model class.
|
50
|
+
engine (Optional[Engine]): The underlying SQLAlchemy engine.
|
51
|
+
logger (Logger): The logger instance.
|
52
|
+
debug (bool): If True, sets logger to DEBUG level.
|
53
|
+
session_factory (Optional[sessionmaker]): A factory for creating Session objects.
|
16
54
|
"""
|
17
|
-
|
55
|
+
# --- Public Configuration ---
|
18
56
|
connection_url: str
|
19
57
|
table: Optional[str] = None
|
20
|
-
model: Any = None
|
21
|
-
engine: Optional[Any] = None
|
22
58
|
debug: bool = False
|
59
|
+
|
60
|
+
# --- Pool Configuration ---
|
61
|
+
pool_size: int = 5
|
62
|
+
max_overflow: int = 10
|
63
|
+
pool_timeout: int = 30
|
64
|
+
pool_recycle: int = 300
|
65
|
+
pool_pre_ping: bool = True
|
66
|
+
poolclass: Type[QueuePool] = QueuePool
|
67
|
+
|
68
|
+
# --- Internal & Runtime State ---
|
69
|
+
model: Optional[Type[Any]] = None
|
70
|
+
engine: Optional[Engine] = None
|
23
71
|
logger: Optional[Logger] = None
|
24
|
-
|
25
|
-
|
26
|
-
|
72
|
+
session_factory: Optional[sessionmaker] = None
|
73
|
+
|
74
|
+
# --- Private State ---
|
75
|
+
_engine_key_instance: tuple = ()
|
76
|
+
|
77
|
+
# --- Class-level Shared Resources ---
|
78
|
+
# Registry stores engine wrappers with metadata like ref_count.
|
79
|
+
# Format: { engine_key: {'engine': Engine, 'ref_count': int, 'active_connections': int} }
|
80
|
+
_engine_registry: ClassVar[Dict[tuple, Dict[str, Any]]] = {}
|
81
|
+
_registry_lock: ClassVar[threading.Lock] = threading.Lock()
|
82
|
+
|
83
|
+
# Pydantic v2 configuration
|
84
|
+
model_config = ConfigDict(
|
85
|
+
arbitrary_types_allowed=True,
|
86
|
+
)
|
87
|
+
|
88
|
+
@field_validator("pool_size", "max_overflow", "pool_timeout", "pool_recycle")
|
89
|
+
@classmethod
|
90
|
+
def _validate_pool_params(cls, v: int) -> int:
|
91
|
+
if v < 0:
|
92
|
+
raise ValueError("Pool parameters must be non-negative")
|
93
|
+
return v
|
27
94
|
|
28
95
|
@model_validator(mode="after")
|
29
|
-
def
|
96
|
+
def _init_all(self) -> SQLModelConnectionConfig:
|
97
|
+
"""
|
98
|
+
Orchestrates the initialization process after Pydantic validation.
|
99
|
+
This method sets up the logger, engine, validates the connection,
|
100
|
+
builds the ORM model, and creates the session factory.
|
101
|
+
"""
|
102
|
+
self._init_logger()
|
103
|
+
|
104
|
+
# The engine key is generated and stored on the instance to ensure
|
105
|
+
# that even if public attributes are changed later, this instance
|
106
|
+
# can always find its original engine in the registry.
|
107
|
+
self._engine_key_instance = self._get_engine_key()
|
108
|
+
|
109
|
+
self._init_engine()
|
110
|
+
self._validate_conn()
|
111
|
+
self._build_model()
|
112
|
+
|
113
|
+
if self.engine:
|
114
|
+
self.session_factory = sessionmaker(bind=self.engine, expire_on_commit=False)
|
115
|
+
|
116
|
+
return self
|
117
|
+
|
118
|
+
def _init_logger(self) -> None:
|
119
|
+
"""Initializes the logger for this instance."""
|
120
|
+
if self.logger is None:
|
121
|
+
self.logger = Logger.default_logger(logger_name=self.__class__.__name__)
|
122
|
+
# Assuming the Logger has a set_level method that accepts standard logging levels
|
123
|
+
log_level = Logger.DEBUG if self.debug else Logger.INFO
|
124
|
+
self.logger.set_level(log_level)
|
125
|
+
|
126
|
+
def _get_engine_key(self) -> tuple:
|
127
|
+
"""
|
128
|
+
Generates a unique, normalized key for an engine configuration.
|
129
|
+
This key is used as the dictionary key in the shared _engine_registry.
|
130
|
+
It intentionally excludes instance-specific details like `table` to
|
131
|
+
ensure proper engine sharing.
|
132
|
+
"""
|
133
|
+
parsed = sqlalchemy_url.make_url(self.connection_url)
|
134
|
+
# Exclude pooling from the query params as they are handled separately
|
135
|
+
query = {k: v for k, v in parsed.query.items() if not k.startswith("pool_")}
|
136
|
+
normalized_url = parsed.set(query=query)
|
137
|
+
|
138
|
+
key_parts = [str(normalized_url)]
|
139
|
+
if self.poolclass not in (NullPool, StaticPool):
|
140
|
+
key_parts += [
|
141
|
+
self.pool_size, self.max_overflow, self.pool_timeout,
|
142
|
+
self.pool_recycle, self.pool_pre_ping
|
143
|
+
]
|
144
|
+
return tuple(key_parts)
|
145
|
+
|
146
|
+
def _init_engine(self) -> None:
|
30
147
|
"""
|
31
|
-
|
32
|
-
|
148
|
+
Initializes the SQLAlchemy Engine.
|
149
|
+
If an identical engine already exists in the registry, it is reused
|
150
|
+
and its reference count is incremented. Otherwise, a new engine
|
151
|
+
is created and added to the registry.
|
33
152
|
"""
|
34
|
-
|
35
|
-
|
153
|
+
with self._registry_lock:
|
154
|
+
engine_wrapper = self._engine_registry.get(self._engine_key_instance)
|
155
|
+
|
156
|
+
if engine_wrapper:
|
157
|
+
# --- Reuse existing engine ---
|
158
|
+
self.engine = engine_wrapper['engine']
|
159
|
+
engine_wrapper['ref_count'] += 1
|
160
|
+
self.logger.debug(
|
161
|
+
f"Reusing engine. Ref count: {engine_wrapper['ref_count']}. Key: {self._engine_key_instance}")
|
162
|
+
else:
|
163
|
+
# --- Create new engine ---
|
164
|
+
self.logger.debug(f"Creating new engine for key: {self._engine_key_instance}")
|
165
|
+
try:
|
166
|
+
new_engine = create_engine(
|
167
|
+
self.connection_url,
|
168
|
+
pool_size=self.pool_size,
|
169
|
+
max_overflow=self.max_overflow,
|
170
|
+
pool_timeout=self.pool_timeout,
|
171
|
+
pool_recycle=self.pool_recycle,
|
172
|
+
pool_pre_ping=self.pool_pre_ping,
|
173
|
+
poolclass=self.poolclass,
|
174
|
+
)
|
175
|
+
self.engine = new_engine
|
176
|
+
self._attach_events()
|
177
|
+
|
178
|
+
# Store the new engine and its metadata in the registry
|
179
|
+
self._engine_registry[self._engine_key_instance] = {
|
180
|
+
'engine': new_engine,
|
181
|
+
'ref_count': 1,
|
182
|
+
'active_connections': 0
|
183
|
+
}
|
184
|
+
except Exception as e:
|
185
|
+
self.logger.error(f"Failed to create engine: {e}")
|
186
|
+
raise SQLAlchemyError(f"Engine creation failed: {e}") from e
|
187
|
+
|
188
|
+
def _attach_events(self) -> None:
|
189
|
+
"""Attaches checkout/checkin events to the engine for connection tracking."""
|
190
|
+
if self.engine:
|
191
|
+
event.listen(self.engine, "checkout", self._on_checkout)
|
192
|
+
event.listen(self.engine, "checkin", self._on_checkin)
|
193
|
+
|
194
|
+
def _on_checkout(self, *args) -> None:
|
195
|
+
"""Event listener for when a connection is checked out from the pool."""
|
196
|
+
with self._registry_lock:
|
197
|
+
wrapper = self._engine_registry.get(self._engine_key_instance)
|
198
|
+
if wrapper:
|
199
|
+
wrapper['active_connections'] += 1
|
200
|
+
self.logger.debug(f"Connection checked out. Active: {self.active_connections}")
|
201
|
+
|
202
|
+
def _on_checkin(self, *args) -> None:
|
203
|
+
"""Event listener for when a connection is returned to the pool."""
|
204
|
+
with self._registry_lock:
|
205
|
+
wrapper = self._engine_registry.get(self._engine_key_instance)
|
206
|
+
if wrapper:
|
207
|
+
wrapper['active_connections'] = max(0, wrapper['active_connections'] - 1)
|
208
|
+
self.logger.debug(f"Connection checked in. Active: {self.active_connections}")
|
36
209
|
|
37
|
-
|
38
|
-
|
210
|
+
@property
|
211
|
+
def active_connections(self) -> int:
|
212
|
+
"""Returns the number of active connections for this instance's engine."""
|
213
|
+
with self._registry_lock:
|
214
|
+
wrapper = self._engine_registry.get(self._engine_key_instance)
|
215
|
+
return wrapper['active_connections'] if wrapper else 0
|
39
216
|
|
40
|
-
|
41
|
-
|
42
|
-
|
217
|
+
def _validate_conn(self) -> None:
|
218
|
+
"""Tests the database connection by executing a simple query."""
|
219
|
+
try:
|
220
|
+
with self.managed_connection() as conn:
|
221
|
+
conn.execute(text("SELECT 1"))
|
222
|
+
self.logger.debug("Database connection validated successfully.")
|
223
|
+
except OperationalError as e:
|
224
|
+
self.logger.error(f"Database connection failed: {e}")
|
225
|
+
# This will be caught by Pydantic and raised as a ValidationError
|
226
|
+
raise ValueError(f"DB connection failed: {e}") from e
|
43
227
|
|
44
|
-
|
45
|
-
|
228
|
+
@contextmanager
|
229
|
+
def managed_connection(self) -> Generator[Any, None, None]:
|
230
|
+
"""Provides a single database connection from the engine pool."""
|
231
|
+
if not self.engine:
|
232
|
+
raise RuntimeError("Engine not initialized. Cannot get a connection.")
|
233
|
+
conn = self.engine.connect()
|
234
|
+
try:
|
235
|
+
yield conn
|
236
|
+
finally:
|
237
|
+
conn.close()
|
46
238
|
|
47
|
-
|
48
|
-
|
239
|
+
def get_session(self) -> Session:
|
240
|
+
"""Returns a new SQLAlchemy Session from the session factory."""
|
241
|
+
if not self.session_factory:
|
242
|
+
raise RuntimeError("Session factory not initialized. Cannot get a session.")
|
243
|
+
return self.session_factory()
|
49
244
|
|
245
|
+
def _build_model(self) -> None:
|
246
|
+
"""
|
247
|
+
Dynamically builds and assigns an ORM model if `self.table` is set.
|
248
|
+
Uses SqlAlchemyModelBuilder to reflect the table schema from the database.
|
249
|
+
"""
|
250
|
+
if not self.table or not self.engine:
|
251
|
+
return
|
50
252
|
try:
|
51
|
-
builder = SQLModelModelBuilder(
|
52
|
-
self.engine,
|
53
|
-
self.table,
|
54
|
-
self.add_relationships,
|
55
|
-
self.debug,
|
56
|
-
self.logger
|
57
|
-
)
|
253
|
+
builder = SQLModelModelBuilder(self.engine, self.table)
|
58
254
|
self.model = builder.build_model()
|
59
|
-
|
60
|
-
builder.export_models_to_file(self.export_file_name)
|
61
|
-
self.logger.debug(f"Successfully built model for table: {self.table}")
|
255
|
+
self.logger.debug(f"Successfully built ORM model for table: {self.table}")
|
62
256
|
except Exception as e:
|
63
|
-
|
257
|
+
self.logger.error(f"Failed to build ORM model for table '{self.table}': {e}")
|
258
|
+
# Propagate as a ValueError to be caught by Pydantic validation
|
259
|
+
raise ValueError(f"Model construction failed for table '{self.table}': {e}") from e
|
64
260
|
|
65
|
-
|
261
|
+
def close(self) -> None:
|
262
|
+
"""
|
263
|
+
Decrements the engine's reference count and disposes of the engine
|
264
|
+
if the count reaches zero. This is the primary method for releasing
|
265
|
+
resources managed by this configuration object.
|
266
|
+
"""
|
267
|
+
with self._registry_lock:
|
268
|
+
key = self._engine_key_instance
|
269
|
+
engine_wrapper = self._engine_registry.get(key)
|
270
|
+
|
271
|
+
if not engine_wrapper:
|
272
|
+
self.logger.warning("Attempted to close a config whose engine is not in the registry.")
|
273
|
+
return
|
274
|
+
|
275
|
+
engine_wrapper['ref_count'] -= 1
|
276
|
+
self.logger.debug(f"Closing config. Ref count is now {engine_wrapper['ref_count']} for key {key}.")
|
277
|
+
|
278
|
+
if engine_wrapper['ref_count'] <= 0:
|
279
|
+
self.logger.debug(f"Disposing engine as reference count is zero. Key: {key}")
|
280
|
+
engine_wrapper['engine'].dispose()
|
281
|
+
del self._engine_registry[key]
|
66
282
|
|
67
|
-
def
|
283
|
+
def dispose_idle_connections(self) -> int:
|
68
284
|
"""
|
69
|
-
|
70
|
-
|
71
|
-
|
285
|
+
Closes and discards idle connections in this engine's connection pool.
|
286
|
+
|
287
|
+
This is a pool-level operation that affects only the connections held
|
288
|
+
by the pool, not those active at the database level.
|
289
|
+
|
290
|
+
Returns:
|
291
|
+
int: The number of connections disposed of.
|
72
292
|
"""
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
293
|
+
if not self.engine:
|
294
|
+
self.logger.warning("Cannot dispose idle connections: engine not initialized.")
|
295
|
+
return 0
|
296
|
+
|
297
|
+
pool = self.engine.pool
|
298
|
+
if isinstance(pool, QueuePool):
|
299
|
+
# The pool.dispose() method checks in all connections and clears the pool.
|
300
|
+
# The number of checked-in connections is the number of idle connections.
|
301
|
+
count = pool.checkedin()
|
302
|
+
pool.dispose()
|
303
|
+
self.logger.debug(f"Disposed {count} idle connections from the pool.")
|
304
|
+
return count
|
305
|
+
|
306
|
+
self.logger.warning(f"Idle connection disposal not applicable for pool type: {type(pool).__name__}")
|
307
|
+
return 0
|
308
|
+
|
309
|
+
def terminate_idle_connections(self, idle_seconds: int = 300) -> int:
|
310
|
+
"""
|
311
|
+
Terminates idle connections at the database server level.
|
312
|
+
|
313
|
+
This is an administrative database operation that forcibly closes
|
314
|
+
backend processes. Use with caution. Support is dialect-specific.
|
315
|
+
|
316
|
+
Args:
|
317
|
+
idle_seconds (int): The minimum idle time in seconds for a
|
318
|
+
connection to be terminated.
|
319
|
+
|
320
|
+
Returns:
|
321
|
+
int: The number of connections terminated.
|
322
|
+
"""
|
323
|
+
if not self.engine:
|
324
|
+
self.logger.warning("Cannot terminate connections: engine not initialized.")
|
325
|
+
return 0
|
326
|
+
|
327
|
+
terminated_count = 0
|
328
|
+
dialect = self.engine.dialect.name
|
329
|
+
|
330
|
+
with self.managed_connection() as conn:
|
331
|
+
try:
|
332
|
+
if dialect == 'postgresql':
|
333
|
+
query = text(
|
334
|
+
"SELECT pg_terminate_backend(pid) FROM pg_stat_activity "
|
335
|
+
"WHERE state = 'idle' "
|
336
|
+
"AND (now() - query_start) > INTERVAL ':idle_seconds seconds' "
|
337
|
+
"AND pid <> pg_backend_pid()"
|
338
|
+
)
|
339
|
+
res = conn.execute(query, {"idle_seconds": idle_seconds})
|
340
|
+
terminated_count = res.rowcount if res.rowcount is not None else 0
|
341
|
+
|
342
|
+
elif dialect == 'mysql':
|
343
|
+
process_list = conn.execute(text("SHOW PROCESSLIST"))
|
344
|
+
for row in process_list:
|
345
|
+
# Pylint compatible attribute access
|
346
|
+
if getattr(row, 'Command', '') == 'Sleep' and getattr(row, 'Time', 0) > idle_seconds:
|
347
|
+
conn.execute(text(f"KILL {getattr(row, 'Id')}"))
|
348
|
+
terminated_count += 1
|
349
|
+
else:
|
350
|
+
self.logger.warning(f"Idle connection termination is not supported for dialect: {dialect}")
|
351
|
+
except SQLAlchemyError as e:
|
352
|
+
self.logger.error(f"Error terminating idle connections for dialect {dialect}: {e}")
|
353
|
+
|
354
|
+
if terminated_count > 0:
|
355
|
+
self.logger.debug(f"Terminated {terminated_count} idle connections on the database server.")
|
356
|
+
return terminated_count
|
357
|
+
|
sibi_dst/v3/__init__.py
ADDED
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,91 @@
|
|
1
|
+
from typing import Union, Optional
|
2
|
+
import pandas as pd
|
3
|
+
import dask.dataframe as dd
|
4
|
+
|
5
|
+
# Refactored DfHelper class
|
6
|
+
class DfHelper:
|
7
|
+
"""
|
8
|
+
DfHelper is a utility class that orchestrates loading and processing data.
|
9
|
+
It uses a configured BackendStrategy to handle the specifics of data loading.
|
10
|
+
"""
|
11
|
+
df: Union[dd.DataFrame, pd.DataFrame] = None
|
12
|
+
|
13
|
+
def __init__(self, backend_strategy: BackendStrategy,
|
14
|
+
params_config: ParamsConfig,
|
15
|
+
as_pandas: bool = False,
|
16
|
+
debug: bool = False,
|
17
|
+
logger: Optional[Logger] = None):
|
18
|
+
|
19
|
+
self.backend_strategy = backend_strategy
|
20
|
+
self._backend_params = params_config # Needed for post-processing and field mapping
|
21
|
+
self.as_pandas = as_pandas
|
22
|
+
self.debug = debug
|
23
|
+
self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
|
24
|
+
self.logger.set_level(Logger.DEBUG if self.debug else Logger.INFO)
|
25
|
+
|
26
|
+
# Other attributes like parquet saving paths can be passed in here if needed
|
27
|
+
# self.parquet_storage_path = kwargs.get("parquet_storage_path")
|
28
|
+
|
29
|
+
def __enter__(self):
|
30
|
+
return self
|
31
|
+
|
32
|
+
def __exit__(self, exc_type, exc_val, traceback):
|
33
|
+
# Cleanup logic for resources that DfHelper itself might manage
|
34
|
+
# The connection cleanup is now the responsibility of the caller who creates the strategy
|
35
|
+
if hasattr(self.backend_strategy, 'connection') and hasattr(self.backend_strategy.connection, 'close'):
|
36
|
+
self.backend_strategy.connection.close()
|
37
|
+
return False
|
38
|
+
|
39
|
+
def load(self, **options):
|
40
|
+
"""
|
41
|
+
Loads data using the configured backend strategy, applies transformations,
|
42
|
+
and returns a DataFrame.
|
43
|
+
"""
|
44
|
+
try:
|
45
|
+
self.logger.debug(f"Loading data using {self.backend_strategy.__class__.__name__}...")
|
46
|
+
# 1. Delegate loading to the strategy object
|
47
|
+
self.df = self.backend_strategy.load(**options)
|
48
|
+
|
49
|
+
# 2. Perform post-processing (these methods remain in DfHelper)
|
50
|
+
self.__process_loaded_data()
|
51
|
+
self.__post_process_df()
|
52
|
+
self.logger.debug("Data successfully loaded and processed.")
|
53
|
+
|
54
|
+
except Exception as e:
|
55
|
+
self.logger.error(f"Failed to load data using {self.backend_strategy.__class__.__name__}: {e}")
|
56
|
+
self.df = dd.from_pandas(pd.DataFrame(), npartitions=1)
|
57
|
+
|
58
|
+
if self.as_pandas and isinstance(self.df, dd.DataFrame):
|
59
|
+
return self.df.compute()
|
60
|
+
return self.df
|
61
|
+
|
62
|
+
def load_period(self, start: str, end: str, dt_field: str, **kwargs):
|
63
|
+
"""
|
64
|
+
Loads data for a specific period by delegating filter creation to the strategy.
|
65
|
+
"""
|
66
|
+
if dt_field is None:
|
67
|
+
raise ValueError("dt_field must be provided")
|
68
|
+
|
69
|
+
# Parse and validate dates
|
70
|
+
start_dt = self.parse_date(start)
|
71
|
+
end_dt = self.parse_date(end)
|
72
|
+
if start_dt > end_dt:
|
73
|
+
raise ValueError("The 'start' date cannot be later than the 'end' date.")
|
74
|
+
|
75
|
+
# Delegate the creation of filter logic to the current strategy
|
76
|
+
field_map = getattr(self._backend_params, 'field_map', {}) or {}
|
77
|
+
period_filters = self.backend_strategy.build_period_filter(
|
78
|
+
dt_field, start_dt, end_dt, field_map
|
79
|
+
)
|
80
|
+
|
81
|
+
# Combine with other filters and load
|
82
|
+
all_filters = {**kwargs, **period_filters}
|
83
|
+
self.logger.debug(f"Loading period with combined filters: {all_filters}")
|
84
|
+
return self.load(**all_filters)
|
85
|
+
|
86
|
+
# The methods __process_loaded_data, __post_process_df, save_to_parquet,
|
87
|
+
# save_to_clickhouse, and parse_date remain unchanged as they are
|
88
|
+
# part of the orchestration logic, not the loading strategy itself.
|
89
|
+
|
90
|
+
# ... (paste those methods here without modification) ...
|
91
|
+
# ... __process_loaded_data, __post_process_df, save_to_parquet, etc. ...
|
@@ -1,8 +1,8 @@
|
|
1
1
|
sibi_dst/__init__.py,sha256=3pbriM7Ym5f9gew7n9cO4G_p9n-0bnxdmQ0hwBdJjr4,253
|
2
2
|
sibi_dst/df_helper/__init__.py,sha256=McYrw2N0MsMgtawLrONXTGdyHfQWVOBUvIDbklfjb54,342
|
3
3
|
sibi_dst/df_helper/_artifact_updater_multi_wrapper.py,sha256=-Y4i5KAxKY2BNkmoVeMEZxjTFD7zaM9oQ0aRsvUbQrs,9340
|
4
|
-
sibi_dst/df_helper/_df_helper.py,sha256=
|
5
|
-
sibi_dst/df_helper/_parquet_artifact.py,sha256=
|
4
|
+
sibi_dst/df_helper/_df_helper.py,sha256=tpSX5o7caTq5TnbIZ78OLJFBbT1J4Ukeld_ZTULg4yE,10856
|
5
|
+
sibi_dst/df_helper/_parquet_artifact.py,sha256=Nio5GSD6rTYl52nf_TSpQhYIF0hKqRrB3H3A4zYnaG8,14987
|
6
6
|
sibi_dst/df_helper/_parquet_reader.py,sha256=L6mr2FeKtTeIn37G9EGpvOx8PwMqXb6qnEECqBaiwxo,3954
|
7
7
|
sibi_dst/df_helper/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
8
|
sibi_dst/df_helper/backends/django/__init__.py,sha256=uWHi-DtQX5re7b2HcqoXUH3_FZWOw1VTmDf552FAkNs,256
|
@@ -15,17 +15,16 @@ sibi_dst/df_helper/backends/http/_http_config.py,sha256=eGPFdqZ5M3Tscqx2P93B6XoB
|
|
15
15
|
sibi_dst/df_helper/backends/parquet/__init__.py,sha256=esWJ9aSuYC26d-T01z9dPrJ1uqJzvdaPNTYRb5qXTlQ,182
|
16
16
|
sibi_dst/df_helper/backends/parquet/_filter_handler.py,sha256=TvDf0RXta7mwJv11GNQttYJsXgFf2XDj4oLIjt4xTzA,5219
|
17
17
|
sibi_dst/df_helper/backends/parquet/_parquet_options.py,sha256=TaU5_wG1Y3lQC8DVCItVvMnc6ZJmECLu3avssVEMbaM,10591
|
18
|
-
sibi_dst/df_helper/backends/sqlalchemy/__init__.py,sha256=
|
19
|
-
sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py,sha256=
|
20
|
-
sibi_dst/df_helper/backends/sqlalchemy/
|
21
|
-
sibi_dst/df_helper/backends/sqlalchemy/
|
22
|
-
sibi_dst/df_helper/backends/sqlalchemy/
|
23
|
-
sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py,sha256=ksvJ0EvktrVsoJ9DTMIQHzHe8ghw2mzDIBD_YgWytgw,8402
|
18
|
+
sibi_dst/df_helper/backends/sqlalchemy/__init__.py,sha256=LjWm9B7CweTvlvFOgB90XjSe0lVLILAIYMWKPkFXFm8,265
|
19
|
+
sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py,sha256=gppZrXLGK8U8xfkzRQPZCIFoWY-miP04nDNHpV8lXtU,10600
|
20
|
+
sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py,sha256=UKLWY3s3l8kMmieg3XZtEol-pj2WMb3ZB3496KbmmiA,6805
|
21
|
+
sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py,sha256=NXVhtYF2mYsrW2fXBkL29VQ5gxAlOYPJkYa8HZKYUyM,2846
|
22
|
+
sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py,sha256=Q93O_xqK0SdrS3IrijVcqky_Zf6xKjtPHdI3qnf1g8E,7457
|
24
23
|
sibi_dst/df_helper/core/__init__.py,sha256=o4zDwgVmaijde3oix0ezb6KLxI5QFy-SGUhFTDVFLT4,569
|
25
24
|
sibi_dst/df_helper/core/_defaults.py,sha256=eNpHD2sZxir-2xO0b3_V16ryw8YP_5FfpIKK0HNuiN4,7011
|
26
25
|
sibi_dst/df_helper/core/_filter_handler.py,sha256=Pmbzygry2mpkNPVS7DBMulHpAb1yYZNFqUU0bJTWJF0,11214
|
27
26
|
sibi_dst/df_helper/core/_params_config.py,sha256=DYx2drDz3uF-lSPzizPkchhy-kxRrQKE5FQRxcEWsac,6736
|
28
|
-
sibi_dst/df_helper/core/_query_config.py,sha256=
|
27
|
+
sibi_dst/df_helper/core/_query_config.py,sha256=1ApqmuSGXTC3CdF-xMsSbCa3V2Z5hOP3Wq5huhzZwqY,439
|
29
28
|
sibi_dst/df_helper/data_cleaner.py,sha256=lkxQoXLvGzXCicFUimnA5nen5qkrO1oxgl_p2Be2o8w,5183
|
30
29
|
sibi_dst/geopy_helper/__init__.py,sha256=Q1RJiUZIOlV0QNNLjxZ_2IZS5LqIe5jRbeQkfD1Vm60,112
|
31
30
|
sibi_dst/geopy_helper/geo_location_service.py,sha256=1ArI980QF_gRw096ZsABHwJt-m55jrfOlB8tPwL1BvY,2959
|
@@ -44,12 +43,12 @@ sibi_dst/utils/clickhouse_writer.py,sha256=iAUe4_Kn2WR1xZjpLW2FOWCWfOTw6fCGMTUcW
|
|
44
43
|
sibi_dst/utils/credentials.py,sha256=cHJPPsmVyijqbUQIq7WWPe-lIallA-mI5RAy3YUuRME,1724
|
45
44
|
sibi_dst/utils/data_from_http_source.py,sha256=AcpKNsqTgN2ClNwuhgUpuNCx62r5_DdsAiKY8vcHEBA,1867
|
46
45
|
sibi_dst/utils/data_utils.py,sha256=MqbwXk33BuANWeKKmsabHouhb8GZswSmbM-VetWWE-M,10357
|
47
|
-
sibi_dst/utils/data_wrapper.py,sha256=
|
46
|
+
sibi_dst/utils/data_wrapper.py,sha256=Tb9bHIHI6qVsdH791BOFN1VrPb-7GS4fHhhHV8hktec,9641
|
48
47
|
sibi_dst/utils/date_utils.py,sha256=T3ij-WOQu3cIfmNAweSVMWWr-hVtuBcTGjEY-cMJIvU,18627
|
49
48
|
sibi_dst/utils/df_utils.py,sha256=TzIAUCLbgOn3bvCFvzkc1S9YU-OlZTImdCj-88dtg8g,11401
|
50
49
|
sibi_dst/utils/file_utils.py,sha256=Z99CZ_4nPDIaZqbCfzzUDfAYJjSudWDj-mwEO8grhbc,1253
|
51
50
|
sibi_dst/utils/filepath_generator.py,sha256=-HHO0U-PR8fysDDFwnWdHRlgqksh_RkmgBZLWv9hM7s,6669
|
52
|
-
sibi_dst/utils/log_utils.py,sha256=
|
51
|
+
sibi_dst/utils/log_utils.py,sha256=C2wkbxGC2n6hZIEU-z8rHrunDcq95MHkf3B1zGynHnE,4904
|
53
52
|
sibi_dst/utils/manifest_manager.py,sha256=eyk6Dvrn86gUpAaAsnQvNnEJn5-Tno-sDDJsDMfHtTA,18161
|
54
53
|
sibi_dst/utils/parquet_saver.py,sha256=O62xwPfphOpKgEiHqnts20CPSU96pxs49Cg7PVetLK0,8193
|
55
54
|
sibi_dst/utils/phone_formatter.py,sha256=tsVTDamuthFYgy4-5UwmQkPQ-FGTGH7MjZyH8utAkIY,4945
|
@@ -62,12 +61,12 @@ sibi_dst/v2/df_helper/__init__.py,sha256=XuH6jKYAPg2DdRbsxxBSxp9X3x-ARyaT0xe27uI
|
|
62
61
|
sibi_dst/v2/df_helper/_df_helper.py,sha256=9pED3bjQ2Z81zqzJrZ9e7SguoO4-hBmNTJK4WOKrr4M,9297
|
63
62
|
sibi_dst/v2/df_helper/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
64
63
|
sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py,sha256=MOEedyWqcb1_RiRYKyyWX0uFNCfBgmyYbTjco8-GBxU,262
|
65
|
-
sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py,sha256=
|
66
|
-
sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py,sha256=
|
64
|
+
sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py,sha256=1xeMxPBpChXDMpmQmTYtta6y43ndU6x6szdon8KIj9g,15443
|
65
|
+
sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py,sha256=LmUBK-HRrF-RncnZ2DsZSqxmTrAeQEyD8oqaBDfr518,5449
|
67
66
|
sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py,sha256=jhgN0OO5Sk1zQFHrMUhJn2F_hHB5g3x3EJ8j5PXNb0U,6295
|
68
67
|
sibi_dst/v2/df_helper/backends/sqlalchemy/_model_builder.py,sha256=jX_mQAzl_6xdh7CTYw4uvUIX2wMp3NzXMlfbC5alOzs,13632
|
69
68
|
sibi_dst/v2/df_helper/backends/sqlmodel/__init__.py,sha256=LcwJjVVxxrnVZalWqnz5m7r77i9tmJR0-U2k8eSQ-m8,249
|
70
|
-
sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py,sha256=
|
69
|
+
sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py,sha256=n3CDbda0OY3X7eTeu_PR2KcZ5hYyEJL7Hroo8yQkjG8,15435
|
71
70
|
sibi_dst/v2/df_helper/backends/sqlmodel/_io_dask.py,sha256=wVgNPo5V75aLtlZr_SIQ-yteyXq-Rg93eMfR8JCfkSo,5422
|
72
71
|
sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py,sha256=FIs6UrNxdJ7eDHDvTv-cJuybIue2-oCRedhW-MNe7CU,6285
|
73
72
|
sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py,sha256=k0dnMLkLMMvkDYDYWkGFgibW5UD8pJgB3YrEg_R7pj8,13556
|
@@ -77,6 +76,10 @@ sibi_dst/v2/df_helper/core/_params_config.py,sha256=DYx2drDz3uF-lSPzizPkchhy-kxR
|
|
77
76
|
sibi_dst/v2/df_helper/core/_query_config.py,sha256=Y8LVSyaKuVkrPluRDkQoOwuXHQxner1pFWG3HPfnDHM,441
|
78
77
|
sibi_dst/v2/utils/__init__.py,sha256=6H4cvhqTiFufnFPETBF0f8beVVMpfJfvUs6Ne0TQZNY,58
|
79
78
|
sibi_dst/v2/utils/log_utils.py,sha256=rfk5VsLAt-FKpv6aPTC1FToIPiyrnHAFFBAkHme24po,4123
|
80
|
-
sibi_dst
|
81
|
-
sibi_dst
|
82
|
-
sibi_dst-0
|
79
|
+
sibi_dst/v3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
80
|
+
sibi_dst/v3/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
81
|
+
sibi_dst/v3/df_helper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
82
|
+
sibi_dst/v3/df_helper/_df_helper.py,sha256=NKIQ4Y-Tn-e841sbZxzLh3Q071_Zo9Vu4y3OAXcsO98,3900
|
83
|
+
sibi_dst-0.3.64.dist-info/METADATA,sha256=S-GXRa0njyB4k2RB51TboYGXeumRmGunMY9km0UJNUE,4292
|
84
|
+
sibi_dst-0.3.64.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
85
|
+
sibi_dst-0.3.64.dist-info/RECORD,,
|