sibi-dst 2025.1.13__py3-none-any.whl → 2025.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sibi_dst/__init__.py +7 -1
- sibi_dst/df_helper/__init__.py +3 -2
- sibi_dst/df_helper/_artifact_updater_async.py +238 -0
- sibi_dst/df_helper/_artifact_updater_threaded.py +195 -0
- sibi_dst/df_helper/_df_helper.py +418 -118
- sibi_dst/df_helper/_parquet_artifact.py +275 -283
- sibi_dst/df_helper/_parquet_reader.py +9 -10
- sibi_dst/df_helper/backends/parquet/_parquet_options.py +8 -4
- sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +68 -107
- sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py +15 -0
- sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +105 -255
- sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +90 -42
- sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py +192 -0
- sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +122 -72
- sibi_dst/osmnx_helper/route_path_builder.py +45 -46
- sibi_dst/utils/__init__.py +2 -0
- sibi_dst/utils/base.py +235 -100
- sibi_dst/utils/business_days.py +248 -0
- sibi_dst/utils/clickhouse_writer.py +472 -206
- sibi_dst/utils/data_utils.py +139 -186
- sibi_dst/utils/data_wrapper.py +392 -88
- sibi_dst/utils/date_utils.py +711 -393
- sibi_dst/utils/df_utils.py +193 -213
- sibi_dst/utils/file_age_checker.py +301 -0
- sibi_dst/utils/file_utils.py +3 -2
- sibi_dst/utils/filepath_generator.py +314 -152
- sibi_dst/utils/log_utils.py +581 -242
- sibi_dst/utils/manifest_manager.py +60 -76
- sibi_dst/utils/parquet_saver.py +33 -27
- sibi_dst/utils/periods.py +42 -0
- sibi_dst/utils/phone_formatter.py +88 -95
- sibi_dst/utils/update_planner.py +180 -178
- sibi_dst/utils/webdav_client.py +116 -166
- {sibi_dst-2025.1.13.dist-info → sibi_dst-2025.8.2.dist-info}/METADATA +1 -1
- {sibi_dst-2025.1.13.dist-info → sibi_dst-2025.8.2.dist-info}/RECORD +36 -30
- sibi_dst/df_helper/_artifact_updater_multi_wrapper.py +0 -422
- {sibi_dst-2025.1.13.dist-info → sibi_dst-2025.8.2.dist-info}/WHEEL +0 -0
@@ -0,0 +1,192 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import hashlib
|
4
|
+
import threading
|
5
|
+
from typing import Dict, Optional, Tuple
|
6
|
+
|
7
|
+
from sqlalchemy import MetaData, Table
|
8
|
+
from sqlalchemy.engine import Engine
|
9
|
+
from sqlalchemy.orm import DeclarativeBase
|
10
|
+
|
11
|
+
|
12
|
+
class Base(DeclarativeBase):
|
13
|
+
"""Shared declarative base for all ORM models."""
|
14
|
+
pass
|
15
|
+
|
16
|
+
|
17
|
+
# Backward-compatible default module label for generated classes
|
18
|
+
apps_label = "datacubes.models"
|
19
|
+
|
20
|
+
|
21
|
+
class ModelRegistry:
|
22
|
+
"""
|
23
|
+
Thread-safe registry that reflects tables once per (engine, schema) and
|
24
|
+
returns a single mapped class per (engine, schema, table).
|
25
|
+
"""
|
26
|
+
|
27
|
+
def __init__(self) -> None:
|
28
|
+
self._metadata_cache: Dict[Tuple[str, Optional[str]], MetaData] = {}
|
29
|
+
self._model_cache: Dict[Tuple[str, Optional[str], str], type] = {}
|
30
|
+
self._lock = threading.RLock()
|
31
|
+
self._md_locks: Dict[Tuple[str, Optional[str]], threading.Lock] = {}
|
32
|
+
|
33
|
+
# ---------- key helpers ----------
|
34
|
+
@staticmethod
|
35
|
+
def _engine_key(engine: Engine) -> str:
|
36
|
+
return str(engine.url)
|
37
|
+
|
38
|
+
@staticmethod
|
39
|
+
def _qualified_key(schema: Optional[str], table: str) -> str:
|
40
|
+
return f"{schema}.{table}" if schema else table
|
41
|
+
|
42
|
+
@staticmethod
|
43
|
+
def _split_schema_and_table(name: str) -> Tuple[Optional[str], str]:
|
44
|
+
if "." in name:
|
45
|
+
s, t = name.split(".", 1)
|
46
|
+
return (s or None), t
|
47
|
+
return None, name
|
48
|
+
|
49
|
+
# ---------- class name helpers ----------
|
50
|
+
@staticmethod
|
51
|
+
def _normalize_class_name(table_name: str) -> str:
|
52
|
+
return "".join(part.capitalize() for part in table_name.split("_"))
|
53
|
+
|
54
|
+
@staticmethod
|
55
|
+
def _short_hash(*parts: str, length: int = 8) -> str:
|
56
|
+
h = hashlib.sha1("|".join(parts).encode("utf-8")).hexdigest()
|
57
|
+
return h[:length]
|
58
|
+
|
59
|
+
def _is_class_name_taken(self, class_name: str, module_label: str) -> bool:
|
60
|
+
# Avoid SA private registries; inspect mappers instead (public)
|
61
|
+
for mapper in list(Base.registry.mappers):
|
62
|
+
try:
|
63
|
+
cls = mapper.class_
|
64
|
+
if getattr(cls, "__name__", None) == class_name and getattr(cls, "__module__", None) == module_label:
|
65
|
+
return True
|
66
|
+
except Exception:
|
67
|
+
continue
|
68
|
+
return False
|
69
|
+
|
70
|
+
def _find_existing_model_for_table(self, tbl: Table) -> Optional[type]:
|
71
|
+
for mapper in list(Base.registry.mappers):
|
72
|
+
try:
|
73
|
+
mapped_cls = mapper.class_
|
74
|
+
mapped_tbl = getattr(mapped_cls, "__table__", None)
|
75
|
+
if mapped_tbl is tbl:
|
76
|
+
return mapped_cls
|
77
|
+
if isinstance(mapped_tbl, Table):
|
78
|
+
if (mapped_tbl.schema == tbl.schema) and (mapped_tbl.name == tbl.name):
|
79
|
+
return mapped_cls
|
80
|
+
except Exception:
|
81
|
+
continue
|
82
|
+
return None
|
83
|
+
|
84
|
+
# ---------- metadata helpers ----------
|
85
|
+
def _get_or_create_metadata(self, ekey: str, schema: Optional[str]) -> MetaData:
|
86
|
+
md_key = (ekey, schema)
|
87
|
+
with self._lock:
|
88
|
+
md = self._metadata_cache.get(md_key)
|
89
|
+
if md is None:
|
90
|
+
md = MetaData(schema=schema)
|
91
|
+
self._metadata_cache[md_key] = md
|
92
|
+
return md
|
93
|
+
|
94
|
+
def _get_or_create_md_lock(self, md_key: Tuple[str, Optional[str]]) -> threading.Lock:
|
95
|
+
with self._lock:
|
96
|
+
lock = self._md_locks.get(md_key)
|
97
|
+
if lock is None:
|
98
|
+
lock = threading.Lock()
|
99
|
+
self._md_locks[md_key] = lock
|
100
|
+
return lock
|
101
|
+
|
102
|
+
# ---------- public API ----------
|
103
|
+
def get_model(
|
104
|
+
self,
|
105
|
+
engine: Engine,
|
106
|
+
table_name: str,
|
107
|
+
*,
|
108
|
+
refresh: bool = False,
|
109
|
+
schema: Optional[str] = None,
|
110
|
+
module_label: Optional[str] = None,
|
111
|
+
prefer_stable_names: bool = True,
|
112
|
+
) -> type:
|
113
|
+
s2, tname = self._split_schema_and_table(table_name)
|
114
|
+
schema = schema if schema is not None else s2
|
115
|
+
ekey = self._engine_key(engine)
|
116
|
+
model_key = (ekey, schema, tname)
|
117
|
+
md_key = (ekey, schema)
|
118
|
+
module_label = module_label or apps_label
|
119
|
+
|
120
|
+
if refresh:
|
121
|
+
with self._lock:
|
122
|
+
self._model_cache.pop(model_key, None)
|
123
|
+
self._metadata_cache.pop(md_key, None)
|
124
|
+
self._md_locks.pop(md_key, None)
|
125
|
+
|
126
|
+
# fast path: already cached model
|
127
|
+
with self._lock:
|
128
|
+
m = self._model_cache.get(model_key)
|
129
|
+
if m is not None:
|
130
|
+
return m
|
131
|
+
|
132
|
+
# ensure metadata and reflection are serialized per (engine, schema)
|
133
|
+
md = self._get_or_create_metadata(ekey, schema)
|
134
|
+
md_lock = self._get_or_create_md_lock(md_key)
|
135
|
+
qname = self._qualified_key(schema, tname)
|
136
|
+
|
137
|
+
tbl = md.tables.get(qname)
|
138
|
+
if tbl is None:
|
139
|
+
with md_lock:
|
140
|
+
# double-checked reflection
|
141
|
+
tbl = md.tables.get(qname)
|
142
|
+
if tbl is None:
|
143
|
+
md.reflect(bind=engine, only=[qname])
|
144
|
+
tbl = md.tables.get(qname)
|
145
|
+
|
146
|
+
if tbl is None:
|
147
|
+
raise ValueError(f"Table '{qname}' does not exist in the database.")
|
148
|
+
|
149
|
+
# If a mapped model for this Table already exists (anywhere), reuse it
|
150
|
+
reused = self._find_existing_model_for_table(tbl)
|
151
|
+
if reused is not None:
|
152
|
+
with self._lock:
|
153
|
+
self._model_cache[model_key] = reused
|
154
|
+
return reused
|
155
|
+
|
156
|
+
# pick class name
|
157
|
+
base_name = self._normalize_class_name(tname)
|
158
|
+
final_name = base_name
|
159
|
+
if self._is_class_name_taken(base_name, module_label):
|
160
|
+
# optionally keep stable names by suffixing with a short hash
|
161
|
+
if prefer_stable_names:
|
162
|
+
suffix = self._short_hash(ekey, schema or "", tname)
|
163
|
+
final_name = f"{base_name}_{suffix}"
|
164
|
+
else:
|
165
|
+
# let SQLAlchemy registry replacement occur (not recommended)
|
166
|
+
suffix = self._short_hash(ekey, schema or "", tname)
|
167
|
+
final_name = f"{base_name}_{suffix}"
|
168
|
+
|
169
|
+
# build the model
|
170
|
+
attrs = {
|
171
|
+
"__tablename__": tbl.name,
|
172
|
+
"__table__": tbl,
|
173
|
+
"__module__": module_label,
|
174
|
+
}
|
175
|
+
model_cls = type(final_name, (Base,), attrs)
|
176
|
+
|
177
|
+
with self._lock:
|
178
|
+
self._model_cache[model_key] = model_cls
|
179
|
+
return model_cls
|
180
|
+
|
181
|
+
def clear(self) -> None:
|
182
|
+
with self._lock:
|
183
|
+
self._metadata_cache.clear()
|
184
|
+
self._model_cache.clear()
|
185
|
+
self._md_locks.clear()
|
186
|
+
|
187
|
+
|
188
|
+
# Process-wide registry & helper
|
189
|
+
_global_registry = ModelRegistry()
|
190
|
+
|
191
|
+
def get_global_registry() -> ModelRegistry:
|
192
|
+
return _global_registry
|
@@ -1,104 +1,154 @@
|
|
1
|
-
import re
|
2
1
|
import keyword
|
2
|
+
import re
|
3
3
|
import threading
|
4
|
-
from sqlalchemy import
|
5
|
-
from sqlalchemy.orm import DeclarativeBase
|
6
|
-
|
4
|
+
from sqlalchemy.engine import Engine
|
7
5
|
|
8
|
-
|
9
|
-
"""Shared declarative base for all ORM models."""
|
10
|
-
pass
|
6
|
+
from ._model_registry import ModelRegistry, apps_label
|
11
7
|
|
12
8
|
|
13
|
-
|
9
|
+
# Global process-wide registry for backward compatibility
|
10
|
+
_global_model_registry = ModelRegistry()
|
14
11
|
|
15
12
|
|
16
13
|
class SqlAlchemyModelBuilder:
|
17
14
|
"""
|
18
15
|
Builds a single SQLAlchemy ORM model from a specific database table.
|
19
|
-
|
20
|
-
|
16
|
+
Thread-safe and uses a process-wide registry for reuse.
|
17
|
+
|
18
|
+
Backward compatibility:
|
19
|
+
- Keeps CamelCase(table) as preferred class name
|
20
|
+
- Publishes classes under `apps_label` unless overridden
|
21
|
+
- Public API unchanged
|
21
22
|
"""
|
23
|
+
|
22
24
|
_lock = threading.Lock()
|
23
|
-
_metadata_cache: dict[str, MetaData] = {}
|
24
25
|
|
25
26
|
def __init__(self, engine: Engine, table_name: str):
|
26
|
-
"""
|
27
|
-
Initializes the model builder for a specific table.
|
28
|
-
|
29
|
-
Args:
|
30
|
-
engine: The SQLAlchemy engine connected to the database.
|
31
|
-
table_name: The name of the table to generate the model for.
|
32
|
-
"""
|
33
27
|
self.engine = engine
|
34
28
|
self.table_name = table_name
|
35
|
-
self.class_name = self._normalize_class_name(self.table_name)
|
36
|
-
|
37
|
-
engine_key = str(engine.url)
|
38
|
-
|
39
|
-
# ✅ REFACTOR: Acquire lock to make cache access and creation atomic,
|
40
|
-
# preventing a race condition between multiple threads.
|
41
|
-
with self._lock:
|
42
|
-
if engine_key not in self._metadata_cache:
|
43
|
-
self._metadata_cache[engine_key] = MetaData()
|
44
|
-
self.metadata = self._metadata_cache[engine_key]
|
45
29
|
|
46
30
|
def build_model(self) -> type:
|
47
|
-
"""
|
48
|
-
Builds and returns a database model class for the specified table.
|
49
|
-
This process is atomic and thread-safe.
|
50
|
-
|
51
|
-
Raises:
|
52
|
-
ValueError: If the specified table does not exist in the database.
|
53
|
-
Returns:
|
54
|
-
The dynamically created ORM model class.
|
55
|
-
"""
|
56
31
|
with self._lock:
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
# Check if the table's schema is in our metadata cache
|
64
|
-
table = self.metadata.tables.get(self.table_name)
|
65
|
-
|
66
|
-
# If not cached, reflect it from the database
|
67
|
-
if table is None:
|
68
|
-
self.metadata.reflect(bind=self.engine, only=[self.table_name])
|
69
|
-
table = self.metadata.tables.get(self.table_name)
|
70
|
-
|
71
|
-
if table is None:
|
72
|
-
raise ValueError(
|
73
|
-
f"Table '{self.table_name}' does not exist in the database."
|
74
|
-
)
|
75
|
-
|
76
|
-
# Create the model class dynamically.
|
77
|
-
attrs = {
|
78
|
-
"__tablename__": table.name,
|
79
|
-
"__table__": table,
|
80
|
-
"__module__": apps_label,
|
81
|
-
}
|
82
|
-
model = type(self.class_name, (Base,), attrs)
|
83
|
-
|
84
|
-
return model
|
32
|
+
return _global_model_registry.get_model(
|
33
|
+
engine=self.engine,
|
34
|
+
table_name=self.table_name,
|
35
|
+
module_label=apps_label,
|
36
|
+
prefer_stable_names=True,
|
37
|
+
)
|
85
38
|
|
86
39
|
@staticmethod
|
87
40
|
def _normalize_class_name(table_name: str) -> str:
|
88
|
-
"""Converts a snake_case table_name to a CamelCase class name."""
|
89
41
|
return "".join(word.capitalize() for word in table_name.split("_"))
|
90
42
|
|
91
43
|
@staticmethod
|
92
44
|
def _normalize_column_name(column_name: str) -> str:
|
93
|
-
"""
|
94
|
-
Sanitizes a column name to be a valid Python identifier.
|
95
|
-
(Kept for utility, though not used in the final model creation).
|
96
|
-
"""
|
97
45
|
sane_name = re.sub(r"\W", "_", column_name)
|
98
46
|
sane_name = re.sub(r"^\d", r"_\g<0>", sane_name)
|
99
|
-
|
100
47
|
if keyword.iskeyword(sane_name):
|
101
48
|
return f"{sane_name}_field"
|
102
49
|
return sane_name
|
103
50
|
|
104
|
-
|
51
|
+
# import re
|
52
|
+
# import keyword
|
53
|
+
# import threading
|
54
|
+
# from sqlalchemy import MetaData, Engine
|
55
|
+
# from sqlalchemy.orm import DeclarativeBase
|
56
|
+
#
|
57
|
+
#
|
58
|
+
# class Base(DeclarativeBase):
|
59
|
+
# """Shared declarative base for all ORM models."""
|
60
|
+
# pass
|
61
|
+
#
|
62
|
+
#
|
63
|
+
# apps_label = "datacubes.models"
|
64
|
+
#
|
65
|
+
#
|
66
|
+
# class SqlAlchemyModelBuilder:
|
67
|
+
# """
|
68
|
+
# Builds a single SQLAlchemy ORM model from a specific database table.
|
69
|
+
# This class is thread-safe and caches reflected table metadata to
|
70
|
+
# improve performance across multiple instantiations.
|
71
|
+
# """
|
72
|
+
# _lock = threading.Lock()
|
73
|
+
# _metadata_cache: dict[str, MetaData] = {}
|
74
|
+
#
|
75
|
+
# def __init__(self, engine: Engine, table_name: str):
|
76
|
+
# """
|
77
|
+
# Initializes the model builder for a specific table.
|
78
|
+
#
|
79
|
+
# Args:
|
80
|
+
# engine: The SQLAlchemy engine connected to the database.
|
81
|
+
# table_name: The name of the table to generate the model for.
|
82
|
+
# """
|
83
|
+
# self.engine = engine
|
84
|
+
# self.table_name = table_name
|
85
|
+
# self.class_name = self._normalize_class_name(self.table_name)
|
86
|
+
#
|
87
|
+
# engine_key = str(engine.url)
|
88
|
+
#
|
89
|
+
# # ✅ REFACTOR: Acquire lock to make cache access and creation atomic,
|
90
|
+
# # preventing a race condition between multiple threads.
|
91
|
+
# with self._lock:
|
92
|
+
# if engine_key not in self._metadata_cache:
|
93
|
+
# self._metadata_cache[engine_key] = MetaData()
|
94
|
+
# self.metadata = self._metadata_cache[engine_key]
|
95
|
+
#
|
96
|
+
# def build_model(self) -> type:
|
97
|
+
# """
|
98
|
+
# Builds and returns a database model class for the specified table.
|
99
|
+
# This process is atomic and thread-safe.
|
100
|
+
#
|
101
|
+
# Raises:
|
102
|
+
# ValueError: If the specified table does not exist in the database.
|
103
|
+
# Returns:
|
104
|
+
# The dynamically created ORM model class.
|
105
|
+
# """
|
106
|
+
# with self._lock:
|
107
|
+
# # NOTE: Using a private SQLAlchemy API. This is a performance
|
108
|
+
# # optimization but may break in future versions of the library.
|
109
|
+
# registered_model = Base.registry._class_registry.get(self.class_name)
|
110
|
+
# if registered_model:
|
111
|
+
# return registered_model
|
112
|
+
#
|
113
|
+
# # Check if the table's schema is in our metadata cache
|
114
|
+
# table = self.metadata.tables.get(self.table_name)
|
115
|
+
#
|
116
|
+
# # If not cached, reflect it from the database
|
117
|
+
# if table is None:
|
118
|
+
# self.metadata.reflect(bind=self.engine, only=[self.table_name])
|
119
|
+
# table = self.metadata.tables.get(self.table_name)
|
120
|
+
#
|
121
|
+
# if table is None:
|
122
|
+
# raise ValueError(
|
123
|
+
# f"Table '{self.table_name}' does not exist in the database."
|
124
|
+
# )
|
125
|
+
#
|
126
|
+
# # Create the model class dynamically.
|
127
|
+
# attrs = {
|
128
|
+
# "__tablename__": table.name,
|
129
|
+
# "__table__": table,
|
130
|
+
# "__module__": apps_label,
|
131
|
+
# }
|
132
|
+
# model = type(self.class_name, (Base,), attrs)
|
133
|
+
#
|
134
|
+
# return model
|
135
|
+
#
|
136
|
+
# @staticmethod
|
137
|
+
# def _normalize_class_name(table_name: str) -> str:
|
138
|
+
# """Converts a snake_case table_name to a CamelCase class name."""
|
139
|
+
# return "".join(word.capitalize() for word in table_name.split("_"))
|
140
|
+
#
|
141
|
+
# @staticmethod
|
142
|
+
# def _normalize_column_name(column_name: str) -> str:
|
143
|
+
# """
|
144
|
+
# Sanitizes a column name to be a valid Python identifier.
|
145
|
+
# (Kept for utility, though not used in the final model creation).
|
146
|
+
# """
|
147
|
+
# sane_name = re.sub(r"\W", "_", column_name)
|
148
|
+
# sane_name = re.sub(r"^\d", r"_\g<0>", sane_name)
|
149
|
+
#
|
150
|
+
# if keyword.iskeyword(sane_name):
|
151
|
+
# return f"{sane_name}_field"
|
152
|
+
# return sane_name
|
153
|
+
#
|
154
|
+
#
|
@@ -2,48 +2,44 @@ import numpy as np
|
|
2
2
|
import pandas as pd
|
3
3
|
import networkx as nx
|
4
4
|
import osmnx as ox
|
5
|
-
from typing import List
|
5
|
+
from typing import List, Optional
|
6
|
+
from pydantic import BaseModel
|
7
|
+
|
8
|
+
class RoutePathBuilderConfig(BaseModel):
|
9
|
+
"""
|
10
|
+
A Pydantic model to validate the configuration for the RoutePathBuilder.
|
11
|
+
"""
|
12
|
+
graph: nx.MultiDiGraph
|
13
|
+
sort_key: List[str] # Made mandatory
|
14
|
+
grouping_col: Optional[str] = None
|
15
|
+
lat_col: str = "latitude"
|
16
|
+
lon_col: str = "longitude"
|
17
|
+
|
18
|
+
class Config:
|
19
|
+
arbitrary_types_allowed = True
|
6
20
|
|
7
21
|
class RoutePathBuilder:
|
8
22
|
"""
|
9
|
-
Builds shortest paths for consecutive GPS points
|
23
|
+
Builds shortest paths (Dijkstra Algorithm) for consecutive GPS points.
|
24
|
+
This version requires an explicit sort_key for correctness.
|
10
25
|
"""
|
11
26
|
|
12
|
-
def __init__(
|
13
|
-
self,
|
14
|
-
graph: nx.MultiDiGraph,
|
15
|
-
lat_col: str = "latitude",
|
16
|
-
lon_col: str = "longitude",
|
17
|
-
grouping_col: str = "associate_id",
|
18
|
-
sort_key=None # Default sort key for DataFrame
|
19
|
-
):
|
27
|
+
def __init__(self, config: RoutePathBuilderConfig):
|
20
28
|
"""
|
21
|
-
|
22
|
-
:param lat_col: Column name for latitude.
|
23
|
-
:param lon_col: Column name for longitude.
|
24
|
-
:param associate_col: Column name for associate/grouping key.
|
29
|
+
Initializes the builder with a validated configuration object.
|
25
30
|
"""
|
26
|
-
|
27
|
-
sort_key = ["associate_id", "date_time"]
|
28
|
-
self.graph = graph
|
29
|
-
self.lat_col = lat_col
|
30
|
-
self.lon_col = lon_col
|
31
|
-
self.grouping_col = grouping_col
|
32
|
-
self.sort_key = sort_key
|
33
|
-
if self.sort_key is None:
|
34
|
-
self.sort_key = [self.grouping_col, "date_time"]
|
31
|
+
self.config = config
|
35
32
|
|
33
|
+
# Static methods _get_shortest_path and _path_length_from_nodes remain unchanged...
|
36
34
|
@staticmethod
|
37
35
|
def _get_shortest_path(u: int, v: int, graph: nx.MultiDiGraph) -> List[int]:
|
38
|
-
"""Return the node sequence for the shortest path from u to v, or [] if none."""
|
39
36
|
try:
|
40
|
-
return nx.shortest_path(graph, u, v, weight="length")
|
37
|
+
return nx.shortest_path(graph, u, v, weight="length", method="dijkstra")
|
41
38
|
except nx.NetworkXNoPath:
|
42
39
|
return []
|
43
40
|
|
44
41
|
@staticmethod
|
45
42
|
def _path_length_from_nodes(node_list: List[int], graph: nx.MultiDiGraph) -> float:
|
46
|
-
"""Sum up the 'length' attribute along consecutive node pairs."""
|
47
43
|
if len(node_list) < 2:
|
48
44
|
return np.nan
|
49
45
|
total = 0.0
|
@@ -53,46 +49,49 @@ class RoutePathBuilder:
|
|
53
49
|
total += min(lengths) if lengths else 0
|
54
50
|
return total
|
55
51
|
|
52
|
+
|
56
53
|
def build_routes(self, df: pd.DataFrame) -> pd.DataFrame:
|
57
54
|
"""
|
58
|
-
|
59
|
-
|
60
|
-
:param df: Input DataFrame containing grouping_col, latitude, and longitude columns.
|
61
|
-
:return: DataFrame with added columns:
|
62
|
-
['dest_lat', 'dest_lon', 'origin_node', 'dest_node', 'path_nodes', 'path_coords', 'distance_m']
|
55
|
+
Generates routes from a DataFrame of GPS points.
|
63
56
|
"""
|
64
|
-
# 1) Build destination coordinates by shifting per grouping column
|
65
57
|
df = df.copy()
|
66
|
-
df["dest_lat"] = df.groupby(self.grouping_col)[self.lat_col].shift(-1)
|
67
|
-
df["dest_lon"] = df.groupby(self.grouping_col)[self.lon_col].shift(-1)
|
68
58
|
|
69
|
-
|
59
|
+
df = df.sort_values(by=self.config.sort_key).reset_index(drop=True)
|
60
|
+
|
61
|
+
# 2. Create destination columns by shifting within each group or across the df
|
62
|
+
if self.config.grouping_col:
|
63
|
+
df["dest_lat"] = df.groupby(by=self.config.grouping_col)[self.config.lat_col].shift(-1)
|
64
|
+
df["dest_lon"] = df.groupby(by=self.config.grouping_col)[self.config.lon_col].shift(-1)
|
65
|
+
else:
|
66
|
+
df["dest_lat"] = df[self.config.lat_col].shift(-1)
|
67
|
+
df["dest_lon"] = df[self.config.lon_col].shift(-1)
|
68
|
+
|
70
69
|
df = df.dropna(subset=["dest_lat", "dest_lon"]).reset_index(drop=True)
|
71
70
|
|
72
|
-
#
|
71
|
+
# 3. Snap origin & destination coordinates to the nearest graph nodes
|
73
72
|
df["origin_node"] = ox.nearest_nodes(
|
74
|
-
self.graph, X=df[self.lon_col].values, Y=df[self.lat_col].values
|
73
|
+
self.config.graph, X=df[self.config.lon_col].values, Y=df[self.config.lat_col].values
|
75
74
|
)
|
76
75
|
df["dest_node"] = ox.nearest_nodes(
|
77
|
-
self.graph, X=df["dest_lon"].values, Y=df["dest_lat"].values
|
76
|
+
self.config.graph, X=df["dest_lon"].values, Y=df["dest_lat"].values
|
78
77
|
)
|
79
78
|
|
80
|
-
#
|
79
|
+
# 4. Calculate paths, coordinates, and distances
|
81
80
|
df["path_nodes"] = [
|
82
|
-
self._get_shortest_path(u, v, self.graph)
|
81
|
+
self._get_shortest_path(u, v, self.config.graph)
|
83
82
|
for u, v in zip(df["origin_node"], df["dest_node"])
|
84
83
|
]
|
85
84
|
|
85
|
+
df = df[df["path_nodes"].str.len() > 0].reset_index(drop=True)
|
86
|
+
|
86
87
|
df["path_coords"] = df["path_nodes"].apply(
|
87
|
-
lambda nl: [(self.graph.nodes[n]["y"], self.graph.nodes[n]["x"]) for n in nl]
|
88
|
+
lambda nl: [(self.config.graph.nodes[n]["y"], self.config.graph.nodes[n]["x"]) for n in nl]
|
88
89
|
)
|
89
90
|
|
90
91
|
df["distance_m"] = df["path_nodes"].apply(
|
91
|
-
lambda nl: self._path_length_from_nodes(nl, self.graph)
|
92
|
+
lambda nl: self._path_length_from_nodes(nl, self.config.graph)
|
92
93
|
)
|
93
|
-
# Ensure NaN distances become 0
|
94
94
|
df["distance_m"] = df["distance_m"].fillna(0)
|
95
|
-
# Remove any legs with no path
|
96
|
-
df = df[df["path_nodes"].str.len() > 0].reset_index(drop=True)
|
97
95
|
|
98
|
-
|
96
|
+
# The final sort is no longer needed, as it was done at the beginning
|
97
|
+
return df
|
sibi_dst/utils/__init__.py
CHANGED
@@ -3,6 +3,8 @@ from __future__ import annotations
|
|
3
3
|
from .log_utils import Logger
|
4
4
|
from .base import ManagedResource
|
5
5
|
from .date_utils import *
|
6
|
+
from .file_age_checker import FileAgeChecker
|
7
|
+
from .business_days import BusinessDays
|
6
8
|
from .data_utils import DataUtils
|
7
9
|
from .file_utils import FileUtils
|
8
10
|
from .phone_formatter import PhoneNumberFormatter
|