sibi-dst 2025.1.12__py3-none-any.whl → 2025.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. sibi_dst/__init__.py +7 -1
  2. sibi_dst/df_helper/_artifact_updater_multi_wrapper.py +235 -342
  3. sibi_dst/df_helper/_df_helper.py +417 -117
  4. sibi_dst/df_helper/_parquet_artifact.py +255 -283
  5. sibi_dst/df_helper/backends/parquet/_parquet_options.py +8 -4
  6. sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +68 -107
  7. sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py +15 -0
  8. sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +105 -255
  9. sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +90 -42
  10. sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py +192 -0
  11. sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +122 -72
  12. sibi_dst/osmnx_helper/__init__.py +1 -0
  13. sibi_dst/osmnx_helper/basemaps/route_map_plotter.py +203 -0
  14. sibi_dst/osmnx_helper/route_path_builder.py +97 -0
  15. sibi_dst/osmnx_helper/utils.py +2 -0
  16. sibi_dst/utils/base.py +302 -96
  17. sibi_dst/utils/clickhouse_writer.py +472 -206
  18. sibi_dst/utils/data_utils.py +139 -186
  19. sibi_dst/utils/data_wrapper.py +317 -73
  20. sibi_dst/utils/date_utils.py +1 -0
  21. sibi_dst/utils/df_utils.py +193 -213
  22. sibi_dst/utils/file_utils.py +3 -2
  23. sibi_dst/utils/filepath_generator.py +314 -152
  24. sibi_dst/utils/log_utils.py +581 -242
  25. sibi_dst/utils/manifest_manager.py +60 -76
  26. sibi_dst/utils/parquet_saver.py +33 -27
  27. sibi_dst/utils/phone_formatter.py +88 -95
  28. sibi_dst/utils/update_planner.py +180 -178
  29. sibi_dst/utils/webdav_client.py +116 -166
  30. {sibi_dst-2025.1.12.dist-info → sibi_dst-2025.8.1.dist-info}/METADATA +1 -1
  31. {sibi_dst-2025.1.12.dist-info → sibi_dst-2025.8.1.dist-info}/RECORD +32 -28
  32. {sibi_dst-2025.1.12.dist-info → sibi_dst-2025.8.1.dist-info}/WHEEL +0 -0
@@ -1,15 +1,11 @@
1
+
1
2
  from __future__ import annotations
2
3
  import os
3
4
  import threading
4
5
  from contextlib import contextmanager
5
6
  from typing import Any, Optional, ClassVar, Generator, Type, Dict
6
7
 
7
- from pydantic import (
8
- BaseModel,
9
- field_validator,
10
- model_validator,
11
- ConfigDict,
12
- )
8
+ from pydantic import BaseModel, field_validator, model_validator, ConfigDict
13
9
  from sqlalchemy import create_engine, event, text
14
10
  from sqlalchemy.engine import url as sqlalchemy_url
15
11
  from sqlalchemy.engine import Engine
@@ -17,32 +13,18 @@ from sqlalchemy.exc import OperationalError, SQLAlchemyError
17
13
  from sqlalchemy.orm import sessionmaker, Session
18
14
  from sqlalchemy.pool import QueuePool, NullPool, StaticPool, Pool
19
15
 
20
- # Assuming these are your project's internal modules
21
16
  from sibi_dst.utils import Logger
22
17
  from ._sql_model_builder import SqlAlchemyModelBuilder
23
18
 
19
+ _ENGINE_REGISTRY_LOCK = threading.RLock()
20
+ _ENGINE_REGISTRY: Dict[tuple, Dict[str, Any]] = {}
21
+
24
22
 
25
23
  class SqlAlchemyConnectionConfig(BaseModel):
26
24
  """
27
- A thread-safe, registry-backed SQLAlchemy connection manager.
28
-
29
- This class encapsulates database connection configuration and provides robust,
30
- shared resource management. It is designed to be used as a context manager
31
- to ensure resources are always released correctly.
32
-
33
- Recommended Usage is via the `with` statement.
34
- with SqlAlchemyConnectionConfig(...) as config:
35
- session = config.get_session()
36
- # ... do work ...
37
- # config.close() is called automatically upon exiting the block.
38
-
39
- Key Features:
40
- - Context Manager Support: Guarantees resource cleanup.
41
- - Shared Engine & Pool: Reuses a single SQLAlchemy Engine for identical
42
- database URLs and pool settings, improving application performance.
43
- - Reference Counting: Safely manages the lifecycle of the shared engine,
44
- disposing of it only when the last user has closed its connection config.
25
+ Thread-safe, registry-backed SQLAlchemy connection manager.
45
26
  """
27
+
46
28
  # --- Public Configuration ---
47
29
  connection_url: str
48
30
  table: Optional[str] = None
@@ -50,36 +32,28 @@ class SqlAlchemyConnectionConfig(BaseModel):
50
32
 
51
33
  # --- Pool Configuration ---
52
34
  pool_size: int = int(os.environ.get("DB_POOL_SIZE", 5))
53
- max_overflow: int = int(os.environ.get("DB_MAX_OVERFLOW",10))
35
+ max_overflow: int = int(os.environ.get("DB_MAX_OVERFLOW", 10))
54
36
  pool_timeout: int = int(os.environ.get("DB_POOL_TIMEOUT", 30))
55
37
  pool_recycle: int = int(os.environ.get("DB_POOL_RECYCLE", 1800))
56
38
  pool_pre_ping: bool = True
57
39
  poolclass: Type[Pool] = QueuePool
58
40
 
59
- # --- Internal & Runtime State ---
41
+ # --- Internal & Runtime State (normal fields; Pydantic allowed) ---
60
42
  model: Optional[Type[Any]] = None
61
43
  engine: Optional[Engine] = None
62
44
  logger: Optional[Logger] = None
63
- _own_logger: bool = False # Indicates if this instance owns the logger.
45
+ _own_logger: bool = False
64
46
  session_factory: Optional[sessionmaker] = None
65
47
 
66
- # --- Private State ---
48
+ # --- Private State (plain Python values only) ---
67
49
  _engine_key_instance: tuple = ()
68
- _closed: bool = False # Flag to prevent double-closing.
69
-
70
- # --- Class-level Shared Resources ---
71
- _engine_registry: ClassVar[Dict[tuple, Dict[str, Any]]] = {}
72
- _registry_lock: ClassVar[threading.Lock] = threading.Lock()
73
-
50
+ _closed: bool = False # prevent double-closing
74
51
  model_config = ConfigDict(arbitrary_types_allowed=True)
75
52
 
76
- # Add __enter__ and __exit__ for context manager protocol
77
- def __enter__(self) -> SqlAlchemyConnectionConfig:
78
- """Enter the runtime context, returning self."""
53
+ def __enter__(self) -> "SqlAlchemyConnectionConfig":
79
54
  return self
80
55
 
81
56
  def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
82
- """Exit the runtime context, ensuring that close() is called."""
83
57
  self.close()
84
58
 
85
59
  @field_validator("pool_size", "max_overflow", "pool_timeout", "pool_recycle")
@@ -90,8 +64,7 @@ class SqlAlchemyConnectionConfig(BaseModel):
90
64
  return v
91
65
 
92
66
  @model_validator(mode="after")
93
- def _init_all(self) -> SqlAlchemyConnectionConfig:
94
- """Orchestrates the initialization process after Pydantic validation."""
67
+ def _init_all(self) -> "SqlAlchemyConnectionConfig":
95
68
  self._init_logger()
96
69
  self._engine_key_instance = self._get_engine_key()
97
70
  self._init_engine()
@@ -102,17 +75,12 @@ class SqlAlchemyConnectionConfig(BaseModel):
102
75
  return self
103
76
 
104
77
  def _init_logger(self) -> None:
105
- """Initializes the logger for this instance."""
106
- # This is not a ManagedResource subclass, so we handle logger initialization directly.
107
- # unless a logger is provided, we create our own.
108
78
  if self.logger is None:
109
79
  self._own_logger = True
110
80
  self.logger = Logger.default_logger(logger_name=self.__class__.__name__)
111
- log_level = Logger.DEBUG if self.debug else Logger.INFO
112
- self.logger.set_level(log_level)
81
+ self.logger.set_level(Logger.DEBUG if self.debug else Logger.INFO)
113
82
 
114
83
  def _get_engine_key(self) -> tuple:
115
- """Generates a unique, normalized key for an engine configuration."""
116
84
  parsed = sqlalchemy_url.make_url(self.connection_url)
117
85
  query = {k: v for k, v in parsed.query.items() if not k.startswith("pool_")}
118
86
  normalized_url = parsed.set(query=query)
@@ -125,104 +93,97 @@ class SqlAlchemyConnectionConfig(BaseModel):
125
93
  return tuple(key_parts)
126
94
 
127
95
  def _init_engine(self) -> None:
128
- """Initializes or reuses a shared SQLAlchemy Engine."""
129
- with self._registry_lock:
130
- engine_wrapper = self._engine_registry.get(self._engine_key_instance)
131
- if engine_wrapper:
132
- self.engine = engine_wrapper['engine']
133
- engine_wrapper['ref_count'] += 1
134
- self.logger.debug(f"Reusing engine. Ref count: {engine_wrapper['ref_count']}.")
96
+ with _ENGINE_REGISTRY_LOCK:
97
+ wrapper = _ENGINE_REGISTRY.get(self._engine_key_instance)
98
+ if wrapper:
99
+ self.engine = wrapper["engine"]
100
+ wrapper["ref_count"] += 1
101
+ if self.debug:
102
+ self.logger.debug(f"Reusing engine. Ref count: {wrapper['ref_count']}.")
135
103
  else:
136
- self.logger.debug(f"Creating new engine for key: {self._engine_key_instance}")
104
+ if self.debug:
105
+ self.logger.debug(f"Creating new engine for key: {self._engine_key_instance}")
137
106
  try:
138
107
  new_engine = create_engine(
139
- self.connection_url, pool_size=self.pool_size,
140
- max_overflow=self.max_overflow, pool_timeout=self.pool_timeout,
141
- pool_recycle=self.pool_recycle, pool_pre_ping=self.pool_pre_ping,
108
+ self.connection_url,
109
+ pool_size=self.pool_size,
110
+ max_overflow=self.max_overflow,
111
+ pool_timeout=self.pool_timeout,
112
+ pool_recycle=self.pool_recycle,
113
+ pool_pre_ping=self.pool_pre_ping,
142
114
  poolclass=self.poolclass,
143
115
  )
144
116
  self.engine = new_engine
145
117
  self._attach_events()
146
- self._engine_registry[self._engine_key_instance] = {
147
- 'engine': new_engine, 'ref_count': 1, 'active_connections': 0
118
+ _ENGINE_REGISTRY[self._engine_key_instance] = {
119
+ "engine": new_engine,
120
+ "ref_count": 1,
121
+ "active_connections": 0,
148
122
  }
149
123
  except Exception as e:
150
124
  self.logger.error(f"Failed to create engine: {e}")
151
125
  raise SQLAlchemyError(f"Engine creation failed: {e}") from e
152
126
 
153
- #self.logger.debug(f"Connections Active: {self.active_connections}")
154
-
155
127
  def close(self) -> None:
156
- """
157
- Decrements the engine's reference count and disposes of the engine
158
- if the count reaches zero. This is now typically called automatically
159
- when exiting a `with` block.
160
- """
161
- # Prevent the method from running more than once per instance.
162
128
  if self._closed:
163
- self.logger.debug("Attempted to close an already-closed config instance.")
129
+ if self.debug:
130
+ self.logger.debug("Attempted to close an already-closed config instance.")
164
131
  return
165
132
 
166
- with self._registry_lock:
133
+ with _ENGINE_REGISTRY_LOCK:
167
134
  key = self._engine_key_instance
168
- engine_wrapper = self._engine_registry.get(key)
169
-
170
- if not engine_wrapper:
135
+ wrapper = _ENGINE_REGISTRY.get(key)
136
+ if not wrapper:
171
137
  self.logger.warning("Attempted to close a config whose engine is not in the registry.")
172
- return
173
-
174
- engine_wrapper['ref_count'] -= 1
175
- self.logger.debug(f"Closing connection within engine wrapper. Ref count is now {engine_wrapper['ref_count']}.")
176
-
177
- if engine_wrapper['ref_count'] <= 0:
178
- self.logger.debug(f"Disposing engine as reference count is zero. Key: {key}")
179
- engine_wrapper['engine'].dispose()
180
- del self._engine_registry[key]
181
-
182
- # Mark this instance as closed to prevent subsequent calls.
138
+ else:
139
+ wrapper["ref_count"] -= 1
140
+ if self.debug:
141
+ self.logger.debug(f"Closing connection. Ref count now {wrapper['ref_count']}.")
142
+ if wrapper["ref_count"] <= 0:
143
+ if self.debug:
144
+ self.logger.debug(f"Disposing engine as reference count is zero. Key: {key}")
145
+ try:
146
+ wrapper["engine"].dispose()
147
+ finally:
148
+ del _ENGINE_REGISTRY[key]
183
149
  self._closed = True
184
150
 
185
-
186
151
  def _attach_events(self) -> None:
187
- """Attaches checkout/checkin events to the engine for connection tracking."""
188
- if self.engine:
189
- event.listen(self.engine, "checkout", self._on_checkout)
190
- event.listen(self.engine, "checkin", self._on_checkin)
152
+ if not self.engine:
153
+ return
154
+ event.listen(self.engine, "checkout", self._on_checkout)
155
+ event.listen(self.engine, "checkin", self._on_checkin)
191
156
 
192
157
  def _on_checkout(self, *args) -> None:
193
- """Event listener for when a connection is checked out from the pool."""
194
- with self._registry_lock:
195
- wrapper = self._engine_registry.get(self._engine_key_instance)
158
+ with _ENGINE_REGISTRY_LOCK:
159
+ wrapper = _ENGINE_REGISTRY.get(self._engine_key_instance)
196
160
  if wrapper:
197
- wrapper['active_connections'] += 1
161
+ wrapper["active_connections"] += 1
198
162
 
199
163
  def _on_checkin(self, *args) -> None:
200
- """Event listener for when a connection is returned to the pool."""
201
- with self._registry_lock:
202
- wrapper = self._engine_registry.get(self._engine_key_instance)
164
+ with _ENGINE_REGISTRY_LOCK:
165
+ wrapper = _ENGINE_REGISTRY.get(self._engine_key_instance)
203
166
  if wrapper:
204
- wrapper['active_connections'] = max(0, wrapper['active_connections'] - 1)
167
+ wrapper["active_connections"] = max(0, wrapper["active_connections"] - 1)
205
168
 
206
169
  @property
207
170
  def active_connections(self) -> int:
208
- """Returns the number of active connections for this instance's engine."""
209
- with self._registry_lock:
210
- wrapper = self._engine_registry.get(self._engine_key_instance)
211
- return wrapper['active_connections'] if wrapper else 0
171
+ with _ENGINE_REGISTRY_LOCK:
172
+ wrapper = _ENGINE_REGISTRY.get(self._engine_key_instance)
173
+ return wrapper["active_connections"] if wrapper else 0
212
174
 
213
175
  def _validate_conn(self) -> None:
214
- """Tests the database connection by executing a simple query."""
215
176
  try:
216
177
  with self.managed_connection() as conn:
217
178
  conn.execute(text("SELECT 1"))
218
- self.logger.debug("Database connection validated successfully.")
179
+ if self.debug:
180
+ self.logger.debug("Database connection validated successfully.")
219
181
  except OperationalError as e:
220
182
  self.logger.error(f"Database connection failed: {e}")
221
183
  raise ValueError(f"DB connection failed: {e}") from e
222
184
 
223
185
  @contextmanager
224
186
  def managed_connection(self) -> Generator[Any, None, None]:
225
- """Provides a single database connection from the engine pool."""
226
187
  if not self.engine:
227
188
  raise RuntimeError("Engine not initialized. Cannot get a connection.")
228
189
  conn = self.engine.connect()
@@ -232,19 +193,19 @@ class SqlAlchemyConnectionConfig(BaseModel):
232
193
  conn.close()
233
194
 
234
195
  def get_session(self) -> Session:
235
- """Returns a new SQLAlchemy Session from the session factory."""
236
196
  if not self.session_factory:
237
197
  raise RuntimeError("Session factory not initialized. Cannot get a session.")
238
198
  return self.session_factory()
239
199
 
240
200
  def _build_model(self) -> None:
241
- """Dynamically builds an ORM model if `self.table` is set."""
242
201
  if not self.table or not self.engine:
243
202
  return
244
203
  try:
245
204
  builder = SqlAlchemyModelBuilder(self.engine, self.table)
246
205
  self.model = builder.build_model()
247
- self.logger.debug(f"Successfully built ORM model for table: {self.table}")
206
+ if self.debug:
207
+ self.logger.debug(f"Successfully built ORM model for table: {self.table}")
248
208
  except Exception as e:
249
209
  self.logger.error(f"Failed to build ORM model for table '{self.table}': {e}")
250
210
  raise ValueError(f"Model construction failed for table '{self.table}': {e}") from e
211
+
@@ -0,0 +1,15 @@
1
+ import threading
2
+
3
+
4
+ class DBGatekeeper:
5
+ _locks = {}
6
+ _global_lock = threading.Lock()
7
+
8
+ @classmethod
9
+ def get(cls, key: str, max_concurrency: int):
10
+ with cls._global_lock:
11
+ sem = cls._locks.get(key)
12
+ if sem is None:
13
+ sem = threading.BoundedSemaphore(max_concurrency)
14
+ cls._locks[key] = sem
15
+ return sem