sibi-dst 2025.1.12__py3-none-any.whl → 2025.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. sibi_dst/__init__.py +7 -1
  2. sibi_dst/df_helper/_artifact_updater_multi_wrapper.py +235 -342
  3. sibi_dst/df_helper/_df_helper.py +417 -117
  4. sibi_dst/df_helper/_parquet_artifact.py +255 -283
  5. sibi_dst/df_helper/backends/parquet/_parquet_options.py +8 -4
  6. sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +68 -107
  7. sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py +15 -0
  8. sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +105 -255
  9. sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +90 -42
  10. sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py +192 -0
  11. sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +122 -72
  12. sibi_dst/osmnx_helper/__init__.py +1 -0
  13. sibi_dst/osmnx_helper/basemaps/route_map_plotter.py +203 -0
  14. sibi_dst/osmnx_helper/route_path_builder.py +97 -0
  15. sibi_dst/osmnx_helper/utils.py +2 -0
  16. sibi_dst/utils/base.py +302 -96
  17. sibi_dst/utils/clickhouse_writer.py +472 -206
  18. sibi_dst/utils/data_utils.py +139 -186
  19. sibi_dst/utils/data_wrapper.py +317 -73
  20. sibi_dst/utils/date_utils.py +1 -0
  21. sibi_dst/utils/df_utils.py +193 -213
  22. sibi_dst/utils/file_utils.py +3 -2
  23. sibi_dst/utils/filepath_generator.py +314 -152
  24. sibi_dst/utils/log_utils.py +581 -242
  25. sibi_dst/utils/manifest_manager.py +60 -76
  26. sibi_dst/utils/parquet_saver.py +33 -27
  27. sibi_dst/utils/phone_formatter.py +88 -95
  28. sibi_dst/utils/update_planner.py +180 -178
  29. sibi_dst/utils/webdav_client.py +116 -166
  30. {sibi_dst-2025.1.12.dist-info → sibi_dst-2025.8.1.dist-info}/METADATA +1 -1
  31. {sibi_dst-2025.1.12.dist-info → sibi_dst-2025.8.1.dist-info}/RECORD +32 -28
  32. {sibi_dst-2025.1.12.dist-info → sibi_dst-2025.8.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,97 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import networkx as nx
4
+ import osmnx as ox
5
+ from typing import List, Optional
6
+ from pydantic import BaseModel
7
+
8
+ class RoutePathBuilderConfig(BaseModel):
9
+ """
10
+ A Pydantic model to validate the configuration for the RoutePathBuilder.
11
+ """
12
+ graph: nx.MultiDiGraph
13
+ sort_key: List[str] # Made mandatory
14
+ grouping_col: Optional[str] = None
15
+ lat_col: str = "latitude"
16
+ lon_col: str = "longitude"
17
+
18
+ class Config:
19
+ arbitrary_types_allowed = True
20
+
21
+ class RoutePathBuilder:
22
+ """
23
+ Builds shortest paths (Dijkstra Algorithm) for consecutive GPS points.
24
+ This version requires an explicit sort_key for correctness.
25
+ """
26
+
27
+ def __init__(self, config: RoutePathBuilderConfig):
28
+ """
29
+ Initializes the builder with a validated configuration object.
30
+ """
31
+ self.config = config
32
+
33
+ # Static methods _get_shortest_path and _path_length_from_nodes remain unchanged...
34
+ @staticmethod
35
+ def _get_shortest_path(u: int, v: int, graph: nx.MultiDiGraph) -> List[int]:
36
+ try:
37
+ return nx.shortest_path(graph, u, v, weight="length", method="dijkstra")
38
+ except nx.NetworkXNoPath:
39
+ return []
40
+
41
+ @staticmethod
42
+ def _path_length_from_nodes(node_list: List[int], graph: nx.MultiDiGraph) -> float:
43
+ if len(node_list) < 2:
44
+ return np.nan
45
+ total = 0.0
46
+ for u, v in zip(node_list[:-1], node_list[1:]):
47
+ edge_data = graph.get_edge_data(u, v)
48
+ lengths = [edata.get("length", 0) for edata in edge_data.values()]
49
+ total += min(lengths) if lengths else 0
50
+ return total
51
+
52
+
53
+ def build_routes(self, df: pd.DataFrame) -> pd.DataFrame:
54
+ """
55
+ Generates routes from a DataFrame of GPS points.
56
+ """
57
+ df = df.copy()
58
+
59
+ df = df.sort_values(by=self.config.sort_key).reset_index(drop=True)
60
+
61
+ # 2. Create destination columns by shifting within each group or across the df
62
+ if self.config.grouping_col:
63
+ df["dest_lat"] = df.groupby(by=self.config.grouping_col)[self.config.lat_col].shift(-1)
64
+ df["dest_lon"] = df.groupby(by=self.config.grouping_col)[self.config.lon_col].shift(-1)
65
+ else:
66
+ df["dest_lat"] = df[self.config.lat_col].shift(-1)
67
+ df["dest_lon"] = df[self.config.lon_col].shift(-1)
68
+
69
+ df = df.dropna(subset=["dest_lat", "dest_lon"]).reset_index(drop=True)
70
+
71
+ # 3. Snap origin & destination coordinates to the nearest graph nodes
72
+ df["origin_node"] = ox.nearest_nodes(
73
+ self.config.graph, X=df[self.config.lon_col].values, Y=df[self.config.lat_col].values
74
+ )
75
+ df["dest_node"] = ox.nearest_nodes(
76
+ self.config.graph, X=df["dest_lon"].values, Y=df["dest_lat"].values
77
+ )
78
+
79
+ # 4. Calculate paths, coordinates, and distances
80
+ df["path_nodes"] = [
81
+ self._get_shortest_path(u, v, self.config.graph)
82
+ for u, v in zip(df["origin_node"], df["dest_node"])
83
+ ]
84
+
85
+ df = df[df["path_nodes"].str.len() > 0].reset_index(drop=True)
86
+
87
+ df["path_coords"] = df["path_nodes"].apply(
88
+ lambda nl: [(self.config.graph.nodes[n]["y"], self.config.graph.nodes[n]["x"]) for n in nl]
89
+ )
90
+
91
+ df["distance_m"] = df["path_nodes"].apply(
92
+ lambda nl: self._path_length_from_nodes(nl, self.config.graph)
93
+ )
94
+ df["distance_m"] = df["distance_m"].fillna(0)
95
+
96
+ # The final sort is no longer needed, as it was done at the beginning
97
+ return df
@@ -442,6 +442,8 @@ def get_graph(**options):
442
442
  - The list or collection of edges that describe relationships
443
443
  between nodes in the graph
444
444
  """
445
+ if not options:
446
+ raise ValueError("No options provided to PBFHandler for graph creation.")
445
447
  handler = PBFHandler(**options)
446
448
  handler.load()
447
449
  return handler.graph, handler.nodes, handler.edges
sibi_dst/utils/base.py CHANGED
@@ -1,117 +1,323 @@
1
- import asyncio
2
- from .log_utils import Logger
1
+ import abc
2
+ import threading
3
+ import weakref
4
+ from typing import Self, Optional, Callable
5
+
6
+ import fsspec
7
+
8
+ from sibi_dst.utils import Logger
3
9
 
4
- class ManagedResource:
5
- """
6
- A base class providing context management for resources like loggers and filesystems.
7
10
 
8
- It handles the creation and cleanup of these resources, ensuring they are only
9
- closed if they were created by the instance itself.
11
+ class ManagedResource(abc.ABC):
12
+ """
13
+ Boilerplate ABC for components that manage a logger and an fsspec filesystem
14
+ with sync/async lifecycle helpers.
10
15
  """
11
16
 
12
- def __init__(self, **kwargs):
13
- self.debug = kwargs.get("debug", False)
14
- self.verbose = kwargs.get("verbose", False)
17
+ def __init__(
18
+ self,
19
+ *,
20
+ verbose: bool = False,
21
+ debug: bool = False,
22
+ logger: Optional[Logger] = None,
23
+ fs: Optional[fsspec.AbstractFileSystem] = None,
24
+ fs_factory: Optional[Callable[[], fsspec.AbstractFileSystem]] = None,
25
+ **_: object,
26
+ ) -> None:
27
+ self.verbose = verbose
28
+ self.debug = debug
15
29
 
16
- # --- Logger Management (Refactored) ---
17
- logger = kwargs.get("logger")
18
- if logger:
19
- # An existing logger instance was provided by the user
30
+ # --- Logger ownership ---
31
+ if logger is None:
32
+ self.logger = Logger.default_logger(logger_name=self.__class__.__name__)
33
+ self._owns_logger = True
34
+ self.logger.set_level(Logger.DEBUG if self.debug else Logger.INFO)
35
+ else:
20
36
  self.logger = logger
21
- self._own_logger = False
22
- self.logger.debug(f"'{self.__class__.__name__}' is tapping into an existing logger.")
37
+ self._owns_logger = False
38
+ # Do NOT mutate external logger level
39
+
40
+ # --- FS ownership ---
41
+ self._owns_fs = fs is None
42
+ if fs is not None:
43
+ self.fs: Optional[fsspec.AbstractFileSystem] = fs
44
+ elif fs_factory is not None:
45
+ created = fs_factory()
46
+ if not isinstance(created, fsspec.AbstractFileSystem):
47
+ raise TypeError(
48
+ f"fs_factory() must return fsspec.AbstractFileSystem, got {type(created)!r}"
49
+ )
50
+ self.fs = created
23
51
  else:
24
- # No pre-configured logger, so we will create and "own" a new one.
25
- self._own_logger = True
26
- logger_config = kwargs.get("logger_config", {})
52
+ self.fs = None # optional; subclasses may not need fs
27
53
 
28
- # Set default logger_name if not specified in the config
29
- logger_config.setdefault("logger_name", self.__class__.__name__)
54
+ self._is_closed = False
55
+ self._close_lock = threading.RLock()
30
56
 
31
- # Set log_level based on debug flag, but respect user-provided level
32
- default_level = Logger.DEBUG if self.debug else Logger.INFO
33
- logger_config.setdefault("log_level", default_level)
57
+ # register a best-effort finalizer
58
+ self._finalizer = weakref.finalize(self, self._finalize_silent)
34
59
 
35
- # Create the logger using the provided or default configuration
36
- self.logger = Logger.default_logger(**logger_config)
37
- if self.logger:
38
- self.logger.debug(f"'{self.__class__.__name__}' is starting its own logger.")
60
+ # Early debug
61
+ self.logger.debug("Component %s initialized.", self.__class__.__name__)
39
62
 
40
- fs = kwargs.get("fs")
41
- self._own_fs = fs is None
42
- self.fs = fs or None # we want to allow None as a valid fs to trigger a failure if needed
63
+ # ---------- Introspection ----------
64
+ @property
65
+ def is_closed(self) -> bool:
66
+ return self._is_closed
43
67
 
44
- self._entered = False
68
+ @property
69
+ def closed(self) -> bool: # alias
70
+ return self._is_closed
45
71
 
46
- def __enter__(self):
47
- """Enter the runtime context."""
48
- self._entered = True
49
- return self
72
+ def __repr__(self) -> str:
73
+ class_name = self.__class__.__name__
74
+ logger_status = "own" if self._owns_logger else "external"
75
+ fs_status = "none" if self.fs is None else ("own" if self._owns_fs else "external")
76
+ return f"<{class_name} debug={self.debug} logger={logger_status} fs={fs_status}>"
77
+
78
+ # ---------- Hooks for subclasses ----------
79
+ def _cleanup(self) -> None:
80
+ """Sync cleanup for resources created BY THE SUBCLASS."""
81
+ return
50
82
 
51
- def __exit__(self, exc_type, exc_val, exc_tb):
52
- """Exit the runtime context and trigger cleanup."""
53
- self.cleanup()
54
- return False # Propagate exceptions
83
+ async def _acleanup(self) -> None:
84
+ """Async cleanup for resources created BY THE SUBCLASS."""
85
+ return
55
86
 
56
- # --- Asynchronous Context Management ---
87
+ # ---------- Owned resource shutdown ----------
88
+ def _shutdown_logger(self) -> None:
89
+ if not self._owns_logger:
90
+ self.logger.debug("%s: skipping logger shutdown (not owned).", self.__class__.__name__)
91
+ return
92
+ self.logger.debug("%s: shutting down owned logger.", self.__class__.__name__)
93
+ try:
94
+ self.logger.shutdown()
95
+ except Exception: # keep shutdown robust
96
+ pass
57
97
 
58
- async def __aenter__(self):
59
- """Enter the runtime context for 'async with' statements."""
60
- self._entered = True
98
+ def _shutdown_owned_resources(self) -> None:
99
+ # fsspec FS usually has no close; if it does, call it.
100
+ if self._owns_fs and self.fs is not None:
101
+ self.logger.debug("%s: releasing owned fsspec filesystem.", self.__class__.__name__)
102
+ close = getattr(self.fs, "close", None)
103
+ try:
104
+ if callable(close):
105
+ close()
106
+ finally:
107
+ self.fs = None
108
+ else:
109
+ self.logger.debug(
110
+ "%s: skipping fs shutdown (not owned or none).", self.__class__.__name__
111
+ )
112
+ self._shutdown_logger()
113
+
114
+ async def _ashutdown_owned_resources(self) -> None:
115
+ # No async close in fsspec by default, keep parity with sync
116
+ if self._owns_fs and self.fs is not None:
117
+ self.logger.debug("%s: releasing owned fsspec filesystem (async).", self.__class__.__name__)
118
+ close = getattr(self.fs, "close", None)
119
+ try:
120
+ if callable(close):
121
+ close()
122
+ finally:
123
+ self.fs = None
124
+ self._shutdown_logger()
125
+
126
+ # ---------- Public lifecycle ----------
127
+ def close(self) -> None:
128
+ with self._close_lock:
129
+ if self._is_closed:
130
+ return
131
+ self.logger.debug("Closing component %s...", self.__class__.__name__)
132
+ try:
133
+ self._cleanup()
134
+ except Exception:
135
+ # log and propagate — callers need to know
136
+ self.logger.error(
137
+ "Error during %s._cleanup()", self.__class__.__name__, exc_info=True
138
+ )
139
+ raise
140
+ finally:
141
+ self._is_closed = True
142
+ self._shutdown_owned_resources()
143
+ self.logger.debug("Component %s closed.", self.__class__.__name__)
144
+
145
+ async def aclose(self) -> None:
146
+ with self._close_lock:
147
+ if self._is_closed:
148
+ return
149
+ self.logger.debug("Asynchronously closing component %s...", self.__class__.__name__)
150
+ # run subclass async cleanup outside of lock
151
+ try:
152
+ await self._acleanup()
153
+ except Exception:
154
+ self.logger.error(
155
+ "Error during %s._acleanup()", self.__class__.__name__, exc_info=True
156
+ )
157
+ raise
158
+ finally:
159
+ with self._close_lock:
160
+ self._is_closed = True
161
+ await self._ashutdown_owned_resources()
162
+ self.logger.debug("Async component %s closed.", self.__class__.__name__)
163
+
164
+ # ---------- Context managers ----------
165
+ def __enter__(self) -> Self:
61
166
  return self
62
167
 
63
- async def __aexit__(self, exc_type, exc_val, exc_tb):
64
- """Exit the runtime context and trigger cleanup for 'async with' statements."""
65
- await self.acleanup()
66
- return False # Propagate exceptions
168
+ def __exit__(self, exc_type, exc, tb) -> bool:
169
+ self.close()
170
+ return False # propagate exceptions
67
171
 
68
- def __repr__(self) -> str:
69
- """Return an unambiguous string representation of the ManagedResource."""
70
- # Dynamically get the name of the class or subclass
71
- class_name = self.__class__.__name__
172
+ async def __aenter__(self) -> Self:
173
+ return self
174
+
175
+ async def __aexit__(self, exc_type, exc, tb) -> bool:
176
+ await self.aclose()
177
+ return False
72
178
 
73
- # Determine the status of the logger and filesystem
74
- logger_status = "own" if self._own_logger else "external"
75
- fs_status = "own" if self._own_fs else "external"
76
-
77
- return (
78
- f"<{class_name} debug={self.debug}, "
79
- f"logger='{logger_status}', fs='{fs_status}'>"
80
- )
81
-
82
- def cleanup(self):
83
- """
84
- Cleanup resources managed by this instance.
85
- """
86
- if self._own_fs and hasattr(self.fs, "clear_instance_cache"):
87
- if self.logger:
88
- self.logger.debug(f"'{self.__class__.__name__}' is clearing its own filesystem cache.")
89
- self.fs.clear_instance_cache()
90
-
91
- if self._own_logger and hasattr(self.logger, "shutdown"):
92
- # Ensure the logger exists before trying to use or shut it down
93
- if self.logger:
94
- self.logger.debug(f"'{self.__class__.__name__}' is shutting down its own logger.")
95
- self.logger.shutdown()
96
- self.logger = None # Set to None after shutdown
97
-
98
- self._entered = False
99
-
100
- async def acleanup(self):
101
- """
102
- Async Cleanup resources managed by this instance.
103
- """
104
- if self._own_fs and hasattr(self.fs, "clear_instance_cache"):
105
- if self.logger:
106
- self.logger.debug(f"'{self.__class__.__name__}' is clearing its own filesystem cache.")
107
- self.fs.clear_instance_cache()
108
-
109
- if self._own_logger and hasattr(self.logger, "shutdown"):
110
- # Ensure the logger exists before trying to use or shut it down
111
- if self.logger:
112
- self.logger.debug(f"'{self.__class__.__name__}' is shutting down its own logger.")
113
- self.logger.shutdown()
114
- self.logger = None # Set to None after shutdown
115
-
116
- self._entered = False
179
+ # ---------- Finalizer ----------
180
+ def _finalize_silent(self) -> None:
181
+ # Best-effort, no logging (avoid noisy GC-time logs).
182
+ try:
183
+ if not self._is_closed:
184
+ self.close()
185
+ except Exception:
186
+ # absolutely swallow — GC context
187
+ pass
117
188
 
189
+ # import abc
190
+ # from typing import Self, Optional, Callable, Any
191
+ #
192
+ # import fsspec
193
+ #
194
+ # from sibi_dst.utils import Logger
195
+ #
196
+ #
197
+ # class ManagedResource(abc.ABC):
198
+ # """
199
+ # A unified boilerplate ABC for creating manageable components.
200
+ #
201
+ # It provides integrated ownership and lifecycle management for a custom
202
+ # logger and a fsspec filesystem client, with full async support.
203
+ # """
204
+ #
205
+ # def __init__(
206
+ # self,
207
+ # *,
208
+ # verbose: bool = False,
209
+ # debug: bool = False,
210
+ # logger: Optional[Logger] = None,
211
+ # fs: Optional[fsspec.AbstractFileSystem] = None,
212
+ # fs_factory: Optional[Callable[[], Any]] = None,
213
+ # **kwargs: Any,
214
+ # ) -> None:
215
+ # self.debug = debug
216
+ # self.verbose = verbose
217
+ #
218
+ # self._is_closed = False
219
+ # self._owns_logger: bool
220
+ # self.fs, self._owns_fs = (fs, False) if fs else (None, True)
221
+ # if self._owns_fs and fs_factory:
222
+ # self.fs = fs_factory
223
+ # self.logger, self._owns_logger = (logger, False) if logger else (
224
+ # Logger.default_logger(logger_name=f"{self.__class__.__name__}"), True)
225
+ # self.logger.set_level(Logger.DEBUG if self.debug else Logger.INFO)
226
+ # self.logger.debug(f"Component: {self.__class__.__name__} initialized.")
227
+ #
228
+ # @property
229
+ # def is_closed(self) -> bool:
230
+ # return self._is_closed
231
+ #
232
+ # # Private methods for cleanup in the subclass
233
+ # def _cleanup(self) -> None:
234
+ # """Cleanup for resources created BY THE SUBCLASS."""
235
+ # pass
236
+ #
237
+ # async def _acleanup(self) -> None:
238
+ # """Async cleanup for resources created BY THE SUBCLASS."""
239
+ # pass
240
+ #
241
+ # # --- Private Shutdown Helpers ---
242
+ # def _shutdown_logger(self) -> None:
243
+ # # Your provided logger shutdown logic
244
+ # if not self._owns_logger:
245
+ # self.logger.debug(f"{self.__class__.__name__} is skipping logger shutdown (not owned).")
246
+ # return
247
+ # self.logger.debug(f"{self.__class__.__name__} is shutting down self-managed logger.")
248
+ # self.logger.shutdown()
249
+ #
250
+ # def _shutdown_owned_resources(self) -> None:
251
+ # if self._owns_fs and isinstance(self.fs, fsspec.AbstractFileSystem):
252
+ # self.logger.debug(f"{self.__class__.__name__} is shutting down self-managed fsspec client synchronously.")
253
+ # del self.fs
254
+ # else:
255
+ # self.logger.debug(
256
+ # f"{self.__class__.__name__} is skipping fsspec client shutdown (not owned or not an fsspec client).")
257
+ # self._shutdown_logger()
258
+ #
259
+ # async def _ashutdown_owned_resources(self) -> None:
260
+ # """Internal method to shut down all owned resources ASYNCHRONOUSLY."""
261
+ #
262
+ # if self._owns_fs and isinstance(self.fs, fsspec.AbstractFileSystem):
263
+ # self.logger.debug(f"{self.__class__.__name__} is shutting down self-managed fsspec client asynchronously.")
264
+ # del self.fs
265
+ #
266
+ # self._shutdown_logger()
267
+ #
268
+ # # Methods for Cleanup ---
269
+ # def close(self) -> None:
270
+ # if self._is_closed: return
271
+ # self.logger.debug(f"Closing component...{self.__class__.__name__}")
272
+ # try:
273
+ # self._cleanup()
274
+ # except Exception as e:
275
+ # self.logger.error(f"Error during subclass {self.__class__.__name__} cleanup: {e}", exc_info=True)
276
+ # raise
277
+ # finally:
278
+ # self._is_closed = True
279
+ # self._shutdown_owned_resources()
280
+ # self.logger.debug(f"Component {self.__class__.__name__} closed successfully.")
281
+ #
282
+ # async def aclose(self) -> None:
283
+ # if self._is_closed: return
284
+ # self.logger.debug(f"Asynchronously closing component...{self.__class__.__name__}")
285
+ # try:
286
+ # await self._acleanup()
287
+ # except Exception as e:
288
+ # self.logger.error(f"Error during async subclass cleanup: {e}", exc_info=True)
289
+ # raise
290
+ # finally:
291
+ # self._is_closed = True
292
+ # await self._ashutdown_owned_resources()
293
+ # self.logger.debug(f"Async Component {self.__class__.__name__} closed successfully.")
294
+ #
295
+ # def __repr__(self) -> str:
296
+ # """Return a string representation of the ManagedResource."""
297
+ # # Dynamically get the name of the class or subclass
298
+ # class_name = self.__class__.__name__
299
+ #
300
+ # # Determine the status of the logger and filesystem
301
+ # logger_status = "own" if self._owns_logger else "external"
302
+ # fs_status = "own" if self._owns_fs else "external"
303
+ # return (
304
+ # f"<{class_name} debug={self.debug}, "
305
+ # f"logger='{logger_status}', fs='{fs_status}'>"
306
+ # )
307
+ #
308
+ # # --- Context Management and Destruction ---
309
+ # def __enter__(self) -> Self:
310
+ # return self
311
+ #
312
+ # def __exit__(self, *args) -> None:
313
+ # self.close()
314
+ #
315
+ # async def __aenter__(self) -> Self:
316
+ # return self
317
+ #
318
+ # async def __aexit__(self, *args) -> None:
319
+ # await self.aclose()
320
+ #
321
+ # def __del__(self) -> None:
322
+ # if not self._is_closed:
323
+ # self.logger.critical(f"CRITICAL: Component {self!r} was not closed properly.")