sibi-dst 2025.1.13__py3-none-any.whl → 2025.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. sibi_dst/__init__.py +7 -1
  2. sibi_dst/df_helper/_artifact_updater_multi_wrapper.py +235 -342
  3. sibi_dst/df_helper/_df_helper.py +417 -117
  4. sibi_dst/df_helper/_parquet_artifact.py +255 -283
  5. sibi_dst/df_helper/backends/parquet/_parquet_options.py +8 -4
  6. sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +68 -107
  7. sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py +15 -0
  8. sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +105 -255
  9. sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +90 -42
  10. sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py +192 -0
  11. sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +122 -72
  12. sibi_dst/osmnx_helper/route_path_builder.py +45 -46
  13. sibi_dst/utils/base.py +302 -96
  14. sibi_dst/utils/clickhouse_writer.py +472 -206
  15. sibi_dst/utils/data_utils.py +139 -186
  16. sibi_dst/utils/data_wrapper.py +317 -73
  17. sibi_dst/utils/date_utils.py +1 -0
  18. sibi_dst/utils/df_utils.py +193 -213
  19. sibi_dst/utils/file_utils.py +3 -2
  20. sibi_dst/utils/filepath_generator.py +314 -152
  21. sibi_dst/utils/log_utils.py +581 -242
  22. sibi_dst/utils/manifest_manager.py +60 -76
  23. sibi_dst/utils/parquet_saver.py +33 -27
  24. sibi_dst/utils/phone_formatter.py +88 -95
  25. sibi_dst/utils/update_planner.py +180 -178
  26. sibi_dst/utils/webdav_client.py +116 -166
  27. {sibi_dst-2025.1.13.dist-info → sibi_dst-2025.8.1.dist-info}/METADATA +1 -1
  28. {sibi_dst-2025.1.13.dist-info → sibi_dst-2025.8.1.dist-info}/RECORD +29 -27
  29. {sibi_dst-2025.1.13.dist-info → sibi_dst-2025.8.1.dist-info}/WHEEL +0 -0
sibi_dst/utils/base.py CHANGED
@@ -1,117 +1,323 @@
1
- import asyncio
2
- from .log_utils import Logger
1
+ import abc
2
+ import threading
3
+ import weakref
4
+ from typing import Self, Optional, Callable
5
+
6
+ import fsspec
7
+
8
+ from sibi_dst.utils import Logger
3
9
 
4
- class ManagedResource:
5
- """
6
- A base class providing context management for resources like loggers and filesystems.
7
10
 
8
- It handles the creation and cleanup of these resources, ensuring they are only
9
- closed if they were created by the instance itself.
11
+ class ManagedResource(abc.ABC):
12
+ """
13
+ Boilerplate ABC for components that manage a logger and an fsspec filesystem
14
+ with sync/async lifecycle helpers.
10
15
  """
11
16
 
12
- def __init__(self, **kwargs):
13
- self.debug = kwargs.get("debug", False)
14
- self.verbose = kwargs.get("verbose", False)
17
+ def __init__(
18
+ self,
19
+ *,
20
+ verbose: bool = False,
21
+ debug: bool = False,
22
+ logger: Optional[Logger] = None,
23
+ fs: Optional[fsspec.AbstractFileSystem] = None,
24
+ fs_factory: Optional[Callable[[], fsspec.AbstractFileSystem]] = None,
25
+ **_: object,
26
+ ) -> None:
27
+ self.verbose = verbose
28
+ self.debug = debug
15
29
 
16
- # --- Logger Management (Refactored) ---
17
- logger = kwargs.get("logger")
18
- if logger:
19
- # An existing logger instance was provided by the user
30
+ # --- Logger ownership ---
31
+ if logger is None:
32
+ self.logger = Logger.default_logger(logger_name=self.__class__.__name__)
33
+ self._owns_logger = True
34
+ self.logger.set_level(Logger.DEBUG if self.debug else Logger.INFO)
35
+ else:
20
36
  self.logger = logger
21
- self._own_logger = False
22
- self.logger.debug(f"'{self.__class__.__name__}' is tapping into an existing logger.")
37
+ self._owns_logger = False
38
+ # Do NOT mutate external logger level
39
+
40
+ # --- FS ownership ---
41
+ self._owns_fs = fs is None
42
+ if fs is not None:
43
+ self.fs: Optional[fsspec.AbstractFileSystem] = fs
44
+ elif fs_factory is not None:
45
+ created = fs_factory()
46
+ if not isinstance(created, fsspec.AbstractFileSystem):
47
+ raise TypeError(
48
+ f"fs_factory() must return fsspec.AbstractFileSystem, got {type(created)!r}"
49
+ )
50
+ self.fs = created
23
51
  else:
24
- # No pre-configured logger, so we will create and "own" a new one.
25
- self._own_logger = True
26
- logger_config = kwargs.get("logger_config", {})
52
+ self.fs = None # optional; subclasses may not need fs
27
53
 
28
- # Set default logger_name if not specified in the config
29
- logger_config.setdefault("logger_name", self.__class__.__name__)
54
+ self._is_closed = False
55
+ self._close_lock = threading.RLock()
30
56
 
31
- # Set log_level based on debug flag, but respect user-provided level
32
- default_level = Logger.DEBUG if self.debug else Logger.INFO
33
- logger_config.setdefault("log_level", default_level)
57
+ # register a best-effort finalizer
58
+ self._finalizer = weakref.finalize(self, self._finalize_silent)
34
59
 
35
- # Create the logger using the provided or default configuration
36
- self.logger = Logger.default_logger(**logger_config)
37
- if self.logger:
38
- self.logger.debug(f"'{self.__class__.__name__}' is starting its own logger.")
60
+ # Early debug
61
+ self.logger.debug("Component %s initialized.", self.__class__.__name__)
39
62
 
40
- fs = kwargs.get("fs")
41
- self._own_fs = fs is None
42
- self.fs = fs or None # we want to allow None as a valid fs to trigger a failure if needed
63
+ # ---------- Introspection ----------
64
+ @property
65
+ def is_closed(self) -> bool:
66
+ return self._is_closed
43
67
 
44
- self._entered = False
68
+ @property
69
+ def closed(self) -> bool: # alias
70
+ return self._is_closed
45
71
 
46
- def __enter__(self):
47
- """Enter the runtime context."""
48
- self._entered = True
49
- return self
72
+ def __repr__(self) -> str:
73
+ class_name = self.__class__.__name__
74
+ logger_status = "own" if self._owns_logger else "external"
75
+ fs_status = "none" if self.fs is None else ("own" if self._owns_fs else "external")
76
+ return f"<{class_name} debug={self.debug} logger={logger_status} fs={fs_status}>"
77
+
78
+ # ---------- Hooks for subclasses ----------
79
+ def _cleanup(self) -> None:
80
+ """Sync cleanup for resources created BY THE SUBCLASS."""
81
+ return
50
82
 
51
- def __exit__(self, exc_type, exc_val, exc_tb):
52
- """Exit the runtime context and trigger cleanup."""
53
- self.cleanup()
54
- return False # Propagate exceptions
83
+ async def _acleanup(self) -> None:
84
+ """Async cleanup for resources created BY THE SUBCLASS."""
85
+ return
55
86
 
56
- # --- Asynchronous Context Management ---
87
+ # ---------- Owned resource shutdown ----------
88
+ def _shutdown_logger(self) -> None:
89
+ if not self._owns_logger:
90
+ self.logger.debug("%s: skipping logger shutdown (not owned).", self.__class__.__name__)
91
+ return
92
+ self.logger.debug("%s: shutting down owned logger.", self.__class__.__name__)
93
+ try:
94
+ self.logger.shutdown()
95
+ except Exception: # keep shutdown robust
96
+ pass
57
97
 
58
- async def __aenter__(self):
59
- """Enter the runtime context for 'async with' statements."""
60
- self._entered = True
98
+ def _shutdown_owned_resources(self) -> None:
99
+ # fsspec FS usually has no close; if it does, call it.
100
+ if self._owns_fs and self.fs is not None:
101
+ self.logger.debug("%s: releasing owned fsspec filesystem.", self.__class__.__name__)
102
+ close = getattr(self.fs, "close", None)
103
+ try:
104
+ if callable(close):
105
+ close()
106
+ finally:
107
+ self.fs = None
108
+ else:
109
+ self.logger.debug(
110
+ "%s: skipping fs shutdown (not owned or none).", self.__class__.__name__
111
+ )
112
+ self._shutdown_logger()
113
+
114
+ async def _ashutdown_owned_resources(self) -> None:
115
+ # No async close in fsspec by default, keep parity with sync
116
+ if self._owns_fs and self.fs is not None:
117
+ self.logger.debug("%s: releasing owned fsspec filesystem (async).", self.__class__.__name__)
118
+ close = getattr(self.fs, "close", None)
119
+ try:
120
+ if callable(close):
121
+ close()
122
+ finally:
123
+ self.fs = None
124
+ self._shutdown_logger()
125
+
126
+ # ---------- Public lifecycle ----------
127
+ def close(self) -> None:
128
+ with self._close_lock:
129
+ if self._is_closed:
130
+ return
131
+ self.logger.debug("Closing component %s...", self.__class__.__name__)
132
+ try:
133
+ self._cleanup()
134
+ except Exception:
135
+ # log and propagate — callers need to know
136
+ self.logger.error(
137
+ "Error during %s._cleanup()", self.__class__.__name__, exc_info=True
138
+ )
139
+ raise
140
+ finally:
141
+ self._is_closed = True
142
+ self._shutdown_owned_resources()
143
+ self.logger.debug("Component %s closed.", self.__class__.__name__)
144
+
145
+ async def aclose(self) -> None:
146
+ with self._close_lock:
147
+ if self._is_closed:
148
+ return
149
+ self.logger.debug("Asynchronously closing component %s...", self.__class__.__name__)
150
+ # run subclass async cleanup outside of lock
151
+ try:
152
+ await self._acleanup()
153
+ except Exception:
154
+ self.logger.error(
155
+ "Error during %s._acleanup()", self.__class__.__name__, exc_info=True
156
+ )
157
+ raise
158
+ finally:
159
+ with self._close_lock:
160
+ self._is_closed = True
161
+ await self._ashutdown_owned_resources()
162
+ self.logger.debug("Async component %s closed.", self.__class__.__name__)
163
+
164
+ # ---------- Context managers ----------
165
+ def __enter__(self) -> Self:
61
166
  return self
62
167
 
63
- async def __aexit__(self, exc_type, exc_val, exc_tb):
64
- """Exit the runtime context and trigger cleanup for 'async with' statements."""
65
- await self.acleanup()
66
- return False # Propagate exceptions
168
+ def __exit__(self, exc_type, exc, tb) -> bool:
169
+ self.close()
170
+ return False # propagate exceptions
67
171
 
68
- def __repr__(self) -> str:
69
- """Return an unambiguous string representation of the ManagedResource."""
70
- # Dynamically get the name of the class or subclass
71
- class_name = self.__class__.__name__
172
+ async def __aenter__(self) -> Self:
173
+ return self
174
+
175
+ async def __aexit__(self, exc_type, exc, tb) -> bool:
176
+ await self.aclose()
177
+ return False
72
178
 
73
- # Determine the status of the logger and filesystem
74
- logger_status = "own" if self._own_logger else "external"
75
- fs_status = "own" if self._own_fs else "external"
76
-
77
- return (
78
- f"<{class_name} debug={self.debug}, "
79
- f"logger='{logger_status}', fs='{fs_status}'>"
80
- )
81
-
82
- def cleanup(self):
83
- """
84
- Cleanup resources managed by this instance.
85
- """
86
- if self._own_fs and hasattr(self.fs, "clear_instance_cache"):
87
- if self.logger:
88
- self.logger.debug(f"'{self.__class__.__name__}' is clearing its own filesystem cache.")
89
- self.fs.clear_instance_cache()
90
-
91
- if self._own_logger and hasattr(self.logger, "shutdown"):
92
- # Ensure the logger exists before trying to use or shut it down
93
- if self.logger:
94
- self.logger.debug(f"'{self.__class__.__name__}' is shutting down its own logger.")
95
- self.logger.shutdown()
96
- self.logger = None # Set to None after shutdown
97
-
98
- self._entered = False
99
-
100
- async def acleanup(self):
101
- """
102
- Async Cleanup resources managed by this instance.
103
- """
104
- if self._own_fs and hasattr(self.fs, "clear_instance_cache"):
105
- if self.logger:
106
- self.logger.debug(f"'{self.__class__.__name__}' is clearing its own filesystem cache.")
107
- self.fs.clear_instance_cache()
108
-
109
- if self._own_logger and hasattr(self.logger, "shutdown"):
110
- # Ensure the logger exists before trying to use or shut it down
111
- if self.logger:
112
- self.logger.debug(f"'{self.__class__.__name__}' is shutting down its own logger.")
113
- self.logger.shutdown()
114
- self.logger = None # Set to None after shutdown
115
-
116
- self._entered = False
179
+ # ---------- Finalizer ----------
180
+ def _finalize_silent(self) -> None:
181
+ # Best-effort, no logging (avoid noisy GC-time logs).
182
+ try:
183
+ if not self._is_closed:
184
+ self.close()
185
+ except Exception:
186
+ # absolutely swallow — GC context
187
+ pass
117
188
 
189
+ # import abc
190
+ # from typing import Self, Optional, Callable, Any
191
+ #
192
+ # import fsspec
193
+ #
194
+ # from sibi_dst.utils import Logger
195
+ #
196
+ #
197
+ # class ManagedResource(abc.ABC):
198
+ # """
199
+ # A unified boilerplate ABC for creating manageable components.
200
+ #
201
+ # It provides integrated ownership and lifecycle management for a custom
202
+ # logger and a fsspec filesystem client, with full async support.
203
+ # """
204
+ #
205
+ # def __init__(
206
+ # self,
207
+ # *,
208
+ # verbose: bool = False,
209
+ # debug: bool = False,
210
+ # logger: Optional[Logger] = None,
211
+ # fs: Optional[fsspec.AbstractFileSystem] = None,
212
+ # fs_factory: Optional[Callable[[], Any]] = None,
213
+ # **kwargs: Any,
214
+ # ) -> None:
215
+ # self.debug = debug
216
+ # self.verbose = verbose
217
+ #
218
+ # self._is_closed = False
219
+ # self._owns_logger: bool
220
+ # self.fs, self._owns_fs = (fs, False) if fs else (None, True)
221
+ # if self._owns_fs and fs_factory:
222
+ # self.fs = fs_factory
223
+ # self.logger, self._owns_logger = (logger, False) if logger else (
224
+ # Logger.default_logger(logger_name=f"{self.__class__.__name__}"), True)
225
+ # self.logger.set_level(Logger.DEBUG if self.debug else Logger.INFO)
226
+ # self.logger.debug(f"Component: {self.__class__.__name__} initialized.")
227
+ #
228
+ # @property
229
+ # def is_closed(self) -> bool:
230
+ # return self._is_closed
231
+ #
232
+ # # Private methods for cleanup in the subclass
233
+ # def _cleanup(self) -> None:
234
+ # """Cleanup for resources created BY THE SUBCLASS."""
235
+ # pass
236
+ #
237
+ # async def _acleanup(self) -> None:
238
+ # """Async cleanup for resources created BY THE SUBCLASS."""
239
+ # pass
240
+ #
241
+ # # --- Private Shutdown Helpers ---
242
+ # def _shutdown_logger(self) -> None:
243
+ # # Your provided logger shutdown logic
244
+ # if not self._owns_logger:
245
+ # self.logger.debug(f"{self.__class__.__name__} is skipping logger shutdown (not owned).")
246
+ # return
247
+ # self.logger.debug(f"{self.__class__.__name__} is shutting down self-managed logger.")
248
+ # self.logger.shutdown()
249
+ #
250
+ # def _shutdown_owned_resources(self) -> None:
251
+ # if self._owns_fs and isinstance(self.fs, fsspec.AbstractFileSystem):
252
+ # self.logger.debug(f"{self.__class__.__name__} is shutting down self-managed fsspec client synchronously.")
253
+ # del self.fs
254
+ # else:
255
+ # self.logger.debug(
256
+ # f"{self.__class__.__name__} is skipping fsspec client shutdown (not owned or not an fsspec client).")
257
+ # self._shutdown_logger()
258
+ #
259
+ # async def _ashutdown_owned_resources(self) -> None:
260
+ # """Internal method to shut down all owned resources ASYNCHRONOUSLY."""
261
+ #
262
+ # if self._owns_fs and isinstance(self.fs, fsspec.AbstractFileSystem):
263
+ # self.logger.debug(f"{self.__class__.__name__} is shutting down self-managed fsspec client asynchronously.")
264
+ # del self.fs
265
+ #
266
+ # self._shutdown_logger()
267
+ #
268
+ # # Methods for Cleanup ---
269
+ # def close(self) -> None:
270
+ # if self._is_closed: return
271
+ # self.logger.debug(f"Closing component...{self.__class__.__name__}")
272
+ # try:
273
+ # self._cleanup()
274
+ # except Exception as e:
275
+ # self.logger.error(f"Error during subclass {self.__class__.__name__} cleanup: {e}", exc_info=True)
276
+ # raise
277
+ # finally:
278
+ # self._is_closed = True
279
+ # self._shutdown_owned_resources()
280
+ # self.logger.debug(f"Component {self.__class__.__name__} closed successfully.")
281
+ #
282
+ # async def aclose(self) -> None:
283
+ # if self._is_closed: return
284
+ # self.logger.debug(f"Asynchronously closing component...{self.__class__.__name__}")
285
+ # try:
286
+ # await self._acleanup()
287
+ # except Exception as e:
288
+ # self.logger.error(f"Error during async subclass cleanup: {e}", exc_info=True)
289
+ # raise
290
+ # finally:
291
+ # self._is_closed = True
292
+ # await self._ashutdown_owned_resources()
293
+ # self.logger.debug(f"Async Component {self.__class__.__name__} closed successfully.")
294
+ #
295
+ # def __repr__(self) -> str:
296
+ # """Return a string representation of the ManagedResource."""
297
+ # # Dynamically get the name of the class or subclass
298
+ # class_name = self.__class__.__name__
299
+ #
300
+ # # Determine the status of the logger and filesystem
301
+ # logger_status = "own" if self._owns_logger else "external"
302
+ # fs_status = "own" if self._owns_fs else "external"
303
+ # return (
304
+ # f"<{class_name} debug={self.debug}, "
305
+ # f"logger='{logger_status}', fs='{fs_status}'>"
306
+ # )
307
+ #
308
+ # # --- Context Management and Destruction ---
309
+ # def __enter__(self) -> Self:
310
+ # return self
311
+ #
312
+ # def __exit__(self, *args) -> None:
313
+ # self.close()
314
+ #
315
+ # async def __aenter__(self) -> Self:
316
+ # return self
317
+ #
318
+ # async def __aexit__(self, *args) -> None:
319
+ # await self.aclose()
320
+ #
321
+ # def __del__(self) -> None:
322
+ # if not self._is_closed:
323
+ # self.logger.critical(f"CRITICAL: Component {self!r} was not closed properly.")