sibi-dst 2025.1.11__tar.gz → 2025.1.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/PKG-INFO +1 -1
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/pyproject.toml +3 -1
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/osmnx_helper/__init__.py +1 -0
- sibi_dst-2025.1.13/sibi_dst/osmnx_helper/basemaps/route_map_plotter.py +203 -0
- sibi_dst-2025.1.13/sibi_dst/osmnx_helper/route_path_builder.py +98 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/osmnx_helper/utils.py +2 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/utils/date_utils.py +39 -38
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/README.md +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/__init__.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/__init__.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/_artifact_updater_multi_wrapper.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/_df_helper.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/_parquet_artifact.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/_parquet_reader.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/backends/__init__.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/backends/http/__init__.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/backends/http/_http_config.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/backends/parquet/__init__.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/backends/parquet/_filter_handler.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/backends/parquet/_parquet_options.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/backends/sqlalchemy/__init__.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/core/__init__.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/core/_defaults.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/core/_filter_handler.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/core/_params_config.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/core/_query_config.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/data_cleaner.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/geopy_helper/__init__.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/geopy_helper/geo_location_service.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/geopy_helper/utils.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/osmnx_helper/base_osm_map.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/osmnx_helper/basemaps/__init__.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/osmnx_helper/basemaps/calendar_html.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/osmnx_helper/basemaps/router_plotter.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/tests/__init__.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/tests/test_data_wrapper_class.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/utils/__init__.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/utils/base.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/utils/clickhouse_writer.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/utils/credentials.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/utils/data_from_http_source.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/utils/data_utils.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/utils/data_wrapper.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/utils/df_utils.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/utils/file_utils.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/utils/filepath_generator.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/utils/log_utils.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/utils/manifest_manager.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/utils/parquet_saver.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/utils/phone_formatter.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/utils/storage_config.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/utils/storage_manager.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/utils/update_planner.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/utils/webdav_client.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/__init__.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/__init__.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/_df_helper.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/backends/__init__.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/backends/sqlalchemy/_model_builder.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/backends/sqlmodel/__init__.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/backends/sqlmodel/_io_dask.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/core/__init__.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/core/_filter_handler.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/core/_params_config.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/core/_query_config.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/utils/__init__.py +0 -0
- {sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/utils/log_utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "sibi-dst"
|
3
|
-
version = "2025.1.
|
3
|
+
version = "2025.1.13"
|
4
4
|
description = "Data Science Toolkit"
|
5
5
|
authors = ["Luis Valverde <lvalverdeb@gmail.com>"]
|
6
6
|
readme = "README.md"
|
@@ -48,6 +48,8 @@ osmnx = "^2.0.5"
|
|
48
48
|
geopy = "^2.4.1"
|
49
49
|
folium = "^0.20.0"
|
50
50
|
geopandas = "^1.1.1"
|
51
|
+
networkx = "^3.5"
|
52
|
+
scikit-learn = "^1.7.1"
|
51
53
|
|
52
54
|
|
53
55
|
[tool.poetry.group.loggers.dependencies]
|
@@ -0,0 +1,203 @@
|
|
1
|
+
import folium
|
2
|
+
import math
|
3
|
+
import networkx as nx
|
4
|
+
import pandas as pd
|
5
|
+
from datetime import datetime
|
6
|
+
from folium.plugins import AntPath, PolyLineTextPath
|
7
|
+
from geopy.distance import geodesic
|
8
|
+
from pydantic import BaseModel, Field
|
9
|
+
from typing import Optional, Dict, List, Tuple
|
10
|
+
|
11
|
+
def get_default_action_colors() -> Dict[str, str]:
|
12
|
+
"""Provides a default dictionary for action colors."""
|
13
|
+
return {
|
14
|
+
"Reporte Posición": "blue", "Acceso o Salida": "lightblue",
|
15
|
+
"Producto para Reintentar": "orange", "Formalización iniciada": "purple",
|
16
|
+
"Producto Formalizado": "green", "Esfuerzo Registrado": "darkblue",
|
17
|
+
"Abandona Formalización": "lightred", "Producto Rechazado": "red",
|
18
|
+
"Producto Devuelto": "darkred",
|
19
|
+
}
|
20
|
+
|
21
|
+
class RouteMapPlotterSettings(BaseModel):
|
22
|
+
"""Manages all configuration settings for the RouteMapPlotter."""
|
23
|
+
use_antpath: bool = Field(True, description="If True, use animated AntPath; otherwise, use a static PolyLine.")
|
24
|
+
zoom_start: int = Field(14, description="Initial zoom level for the map.")
|
25
|
+
forward_color: str = Field("blue", description="Color for the outbound portion of the route.")
|
26
|
+
return_color: str = Field("red", description="Color for the return portion of the route.")
|
27
|
+
return_offset_m: float = Field(2.5, description="Meters to offset the return path for visibility.")
|
28
|
+
antpath_delay: int = Field(800, description="Delay in ms for the AntPath animation.")
|
29
|
+
antpath_weight: int = Field(5, description="Weight of the AntPath line.")
|
30
|
+
antpath_dash_array: List[int] = Field(default=[10, 20], description="Dash pattern for the AntPath.")
|
31
|
+
antpath_pulse_color: str = Field("white", description="Pulse color for the AntPath animation.")
|
32
|
+
marker_origin_color: str = Field("green", description="Color for the start marker.")
|
33
|
+
marker_end_color: str = Field("red", description="Color for the finish marker.")
|
34
|
+
furthest_marker_color: str = Field("orange", description="Color for the furthest point marker.")
|
35
|
+
arrow_color: str = Field("black", description="Color for direction arrows on static PolyLines.")
|
36
|
+
marker_radius: int = Field(6, description="Radius of the event circle markers.")
|
37
|
+
default_tile: str = Field("OpenStreetMap", description="The default map tile layer to display.")
|
38
|
+
arrow_spacing: int = Field(75, description="Spacing in pixels between arrows on static PolyLines.")
|
39
|
+
action_colors: Dict[str, str] = Field(default_factory=get_default_action_colors)
|
40
|
+
date_field: str = Field("date_time", description="Name of the date/time field in the DataFrame.")
|
41
|
+
lat_col: str = Field("latitude", description="Name of the latitude field in the DataFrame.")
|
42
|
+
lon_col: str = Field("longitude", description="Name of the longitude field in the DataFrame.")
|
43
|
+
|
44
|
+
class RouteDataPoint(BaseModel):
|
45
|
+
"""Defines the data schema for a single row in the input DataFrame."""
|
46
|
+
latitude: float
|
47
|
+
longitude: float
|
48
|
+
date_time: datetime
|
49
|
+
origin_node: int
|
50
|
+
dest_node: int
|
51
|
+
path_nodes: List[int]
|
52
|
+
action: Optional[str] = None
|
53
|
+
|
54
|
+
class RouteMapPlotter:
|
55
|
+
def __init__(
|
56
|
+
self,
|
57
|
+
graph: nx.Graph,
|
58
|
+
settings: Optional[RouteMapPlotterSettings] = None,
|
59
|
+
):
|
60
|
+
if not isinstance(graph, nx.Graph) or not graph.nodes:
|
61
|
+
raise ValueError("A valid NetworkX graph with nodes is required.")
|
62
|
+
self.graph = graph
|
63
|
+
self.settings = settings or RouteMapPlotterSettings()
|
64
|
+
self.tile_layers = {
|
65
|
+
"OpenStreetMap": "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png",
|
66
|
+
#"CartoDB Positron": "https://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}{r}.png",
|
67
|
+
#"CartoDB Dark Matter": "https://{s}.basemaps.cartocdn.com/dark_all/{z}/{x}/{y}{r}.png",
|
68
|
+
}
|
69
|
+
|
70
|
+
def _compute_distance_time_metrics(self, df: pd.DataFrame) -> pd.DataFrame:
|
71
|
+
df = df.copy().sort_values(self.settings.date_field).reset_index(drop=True)
|
72
|
+
df[self.settings.date_field] = pd.to_datetime(df[self.settings.date_field])
|
73
|
+
df["prev_lat"] = df[self.settings.lat_col].shift(1)
|
74
|
+
df["prev_lon"] = df[self.settings.lon_col].shift(1)
|
75
|
+
df["prev_time"] = df[self.settings.date_field].shift(1)
|
76
|
+
|
77
|
+
valid_points = df["prev_lat"].notna()
|
78
|
+
df.loc[valid_points, "distance_to_prev"] = df[valid_points].apply(
|
79
|
+
lambda r: geodesic((r["prev_lat"], r["prev_lon"]), (r["latitude"], r["longitude"])).meters, axis=1
|
80
|
+
)
|
81
|
+
df["time_elapsed"] = df[self.settings.date_field] - df["prev_time"]
|
82
|
+
df["cumulative_time"] = df["time_elapsed"].cumsum()
|
83
|
+
df.fillna({"distance_to_prev": 0.0, "time_elapsed": pd.Timedelta(0)}, inplace=True)
|
84
|
+
return df.drop(columns=["prev_lat", "prev_lon", "prev_time"])
|
85
|
+
|
86
|
+
def _format_timedelta(self, td: pd.Timedelta) -> str:
|
87
|
+
total_seconds = int(td.total_seconds())
|
88
|
+
hours, remainder = divmod(total_seconds, 3600)
|
89
|
+
minutes, seconds = divmod(remainder, 60)
|
90
|
+
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
|
91
|
+
|
92
|
+
def _get_midpoint(self, df: pd.DataFrame) -> Tuple[float, float]:
|
93
|
+
all_nodes = [node for path in df["path_nodes"] if path for node in path]
|
94
|
+
if not all_nodes:
|
95
|
+
return (df[self.settings.lat_col].mean(), df[self.settings.lon_col].mean())
|
96
|
+
avg_lat = sum(self.graph.nodes[n]["y"] for n in all_nodes) / len(all_nodes)
|
97
|
+
avg_lon = sum(self.graph.nodes[n]["x"] for n in all_nodes) / len(all_nodes)
|
98
|
+
return (avg_lat, avg_lon)
|
99
|
+
|
100
|
+
def _offset_coordinates(self, coords: List[Tuple[float, float]], offset_m: float) -> List[Tuple[float, float]]:
|
101
|
+
if len(coords) < 2 or offset_m == 0: return coords
|
102
|
+
offset_coords = []
|
103
|
+
for i, (lat, lon) in enumerate(coords):
|
104
|
+
if i == 0: p_prev, p_next = coords[i], coords[i + 1]
|
105
|
+
elif i == len(coords) - 1: p_prev, p_next = coords[i - 1], coords[i]
|
106
|
+
else: p_prev, p_next = coords[i - 1], coords[i + 1]
|
107
|
+
normal_angle = math.atan2(p_next[0] - p_prev[0], p_next[1] - p_prev[1]) + math.pi / 2
|
108
|
+
offset_lat = offset_m / 111111
|
109
|
+
offset_lon = offset_m / (111111 * math.cos(math.radians(lat)))
|
110
|
+
new_lat = lat + offset_lat * math.sin(normal_angle)
|
111
|
+
new_lon = lon + offset_lon * math.cos(normal_angle)
|
112
|
+
offset_coords.append((new_lat, new_lon))
|
113
|
+
return offset_coords
|
114
|
+
|
115
|
+
def _find_furthest_point(self, df: pd.DataFrame) -> Tuple[Optional[int], Optional[int], Optional[pd.Series]]:
|
116
|
+
if df["origin_node"].isnull().all(): return None, None, None
|
117
|
+
start_node = df["origin_node"].iloc[0]
|
118
|
+
start_lat, start_lon = self.graph.nodes[start_node]["y"], self.graph.nodes[start_node]["x"]
|
119
|
+
max_dist, furthest_node, furthest_idx, furthest_row = -1, None, None, None
|
120
|
+
for idx, row in df.iterrows():
|
121
|
+
if not row["path_nodes"]: continue
|
122
|
+
for n in row["path_nodes"]:
|
123
|
+
lat, lon = self.graph.nodes[n]["y"], self.graph.nodes[n]["x"]
|
124
|
+
dist = geodesic((start_lat, start_lon), (lat, lon)).meters
|
125
|
+
if dist > max_dist:
|
126
|
+
max_dist, furthest_node, furthest_idx, furthest_row = dist, n, idx, row
|
127
|
+
return furthest_node, furthest_idx, furthest_row
|
128
|
+
|
129
|
+
def _plot_path(self, m: folium.Map, coords: List[Tuple[float, float]], color: str):
|
130
|
+
if self.settings.use_antpath:
|
131
|
+
AntPath(locations=coords, color=color, weight=self.settings.antpath_weight,
|
132
|
+
delay=self.settings.antpath_delay, dash_array=self.settings.antpath_dash_array,
|
133
|
+
pulse_color=self.settings.antpath_pulse_color, opacity=0.8).add_to(m)
|
134
|
+
else:
|
135
|
+
polyline = folium.PolyLine(coords, color=color, weight=4, opacity=0.6).add_to(m)
|
136
|
+
PolyLineTextPath(polyline, "▶", repeat=True, offset=8, spacing=self.settings.arrow_spacing,
|
137
|
+
attributes={"fill": self.settings.arrow_color, "font-weight": "bold"}).add_to(m)
|
138
|
+
|
139
|
+
def _add_flag_marker(self, m: folium.Map, lat: float, lon: float, color: str, tooltip: str):
|
140
|
+
icon = folium.Icon(color=color, icon="flag", prefix="fa")
|
141
|
+
folium.Marker(location=(lat, lon), icon=icon, tooltip=tooltip).add_to(m)
|
142
|
+
|
143
|
+
def _add_point_markers(self, m: folium.Map, df: pd.DataFrame):
|
144
|
+
action_colors = self.settings.action_colors
|
145
|
+
action_layers = {action: folium.FeatureGroup(name=action) for action in action_colors}
|
146
|
+
action_layers["Others"] = folium.FeatureGroup(name="Others")
|
147
|
+
for idx, row in df.iterrows():
|
148
|
+
action = row.get("action", "Unknown")
|
149
|
+
tooltip = (f"<b>#{idx + 1} {action}</b><br>"
|
150
|
+
f"Time: {row['date_time'].strftime('%H:%M:%S')}<br>"
|
151
|
+
f"Dist from prev: {row['distance_to_prev']:.1f} m<br>"
|
152
|
+
f"Time since prev: {self._format_timedelta(row['time_elapsed'])}")
|
153
|
+
color = action_colors.get(action, "gray")
|
154
|
+
target_layer = action_layers.get(action, action_layers["Others"])
|
155
|
+
folium.CircleMarker(location=(row[self.settings.lat_col], row[self.settings.lon_col]), radius=self.settings.marker_radius,
|
156
|
+
color=color, fill=True, fill_opacity=0.9, tooltip=tooltip).add_to(target_layer)
|
157
|
+
for layer in action_layers.values(): layer.add_to(m)
|
158
|
+
|
159
|
+
# --- MAIN PLOT METHOD ---
|
160
|
+
|
161
|
+
def plot(self, df: pd.DataFrame) -> folium.Map:
|
162
|
+
try:
|
163
|
+
df_dict = df.iloc[0].to_dict()
|
164
|
+
df_dict[self.settings.date_field] = pd.to_datetime(df_dict[self.settings.date_field])
|
165
|
+
RouteDataPoint.model_validate(df_dict)
|
166
|
+
except Exception as e:
|
167
|
+
raise ValueError(f"DataFrame does not match required schema. Error: {e}")
|
168
|
+
|
169
|
+
processed_df = self._compute_distance_time_metrics(df)
|
170
|
+
midpoint = self._get_midpoint(processed_df)
|
171
|
+
|
172
|
+
m = folium.Map(location=midpoint, zoom_start=self.settings.zoom_start,
|
173
|
+
tiles=self.settings.default_tile)
|
174
|
+
for name, url in self.tile_layers.items():
|
175
|
+
if name != self.settings.default_tile:
|
176
|
+
folium.TileLayer(tiles=url, name=name, attr=name).add_to(m)
|
177
|
+
|
178
|
+
furthest_node, furthest_idx, furthest_row = self._find_furthest_point(processed_df)
|
179
|
+
|
180
|
+
if furthest_idx is not None:
|
181
|
+
for idx, row in processed_df.iterrows():
|
182
|
+
if not row["path_nodes"]: continue
|
183
|
+
coords = [(self.graph.nodes[n]["y"], self.graph.nodes[n]["x"]) for n in row["path_nodes"]]
|
184
|
+
is_forward = idx <= furthest_idx
|
185
|
+
path_color = self.settings.forward_color if is_forward else self.settings.return_color
|
186
|
+
path_coords = coords if is_forward else self._offset_coordinates(coords, self.settings.return_offset_m)
|
187
|
+
self._plot_path(m, path_coords, path_color)
|
188
|
+
|
189
|
+
start_node = processed_df["origin_node"].iloc[0]
|
190
|
+
self._add_flag_marker(m, self.graph.nodes[start_node]["y"], self.graph.nodes[start_node]["x"],
|
191
|
+
self.settings.marker_origin_color, f"Start<br>{processed_df[self.settings.date_field].iloc[0].strftime('%H:%M:%S')}")
|
192
|
+
|
193
|
+
end_node = processed_df["dest_node"].iloc[-1]
|
194
|
+
self._add_flag_marker(m, self.graph.nodes[end_node]["y"], self.graph.nodes[end_node]["x"],
|
195
|
+
self.settings.marker_end_color, f"Finish<br>{processed_df[self.settings.date_field].iloc[-1].strftime('%H:%M:%S')}")
|
196
|
+
|
197
|
+
if furthest_node:
|
198
|
+
self._add_flag_marker(m, self.graph.nodes[furthest_node]["y"], self.graph.nodes[furthest_node]["x"],
|
199
|
+
self.settings.furthest_marker_color, f"Furthest Point<br>{furthest_row[self.settings.date_field].strftime('%H:%M:%S')}")
|
200
|
+
|
201
|
+
self._add_point_markers(m, processed_df)
|
202
|
+
folium.LayerControl(collapsed=False).add_to(m)
|
203
|
+
return m
|
@@ -0,0 +1,98 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import pandas as pd
|
3
|
+
import networkx as nx
|
4
|
+
import osmnx as ox
|
5
|
+
from typing import List
|
6
|
+
|
7
|
+
class RoutePathBuilder:
|
8
|
+
"""
|
9
|
+
Builds shortest paths for consecutive GPS points (origins & destinations) within each associate's track.
|
10
|
+
"""
|
11
|
+
|
12
|
+
def __init__(
|
13
|
+
self,
|
14
|
+
graph: nx.MultiDiGraph,
|
15
|
+
lat_col: str = "latitude",
|
16
|
+
lon_col: str = "longitude",
|
17
|
+
grouping_col: str = "associate_id",
|
18
|
+
sort_key=None # Default sort key for DataFrame
|
19
|
+
):
|
20
|
+
"""
|
21
|
+
:param graph: The OSMnx MultiDiGraph.
|
22
|
+
:param lat_col: Column name for latitude.
|
23
|
+
:param lon_col: Column name for longitude.
|
24
|
+
:param associate_col: Column name for associate/grouping key.
|
25
|
+
"""
|
26
|
+
if sort_key is None:
|
27
|
+
sort_key = ["associate_id", "date_time"]
|
28
|
+
self.graph = graph
|
29
|
+
self.lat_col = lat_col
|
30
|
+
self.lon_col = lon_col
|
31
|
+
self.grouping_col = grouping_col
|
32
|
+
self.sort_key = sort_key
|
33
|
+
if self.sort_key is None:
|
34
|
+
self.sort_key = [self.grouping_col, "date_time"]
|
35
|
+
|
36
|
+
@staticmethod
|
37
|
+
def _get_shortest_path(u: int, v: int, graph: nx.MultiDiGraph) -> List[int]:
|
38
|
+
"""Return the node sequence for the shortest path from u to v, or [] if none."""
|
39
|
+
try:
|
40
|
+
return nx.shortest_path(graph, u, v, weight="length")
|
41
|
+
except nx.NetworkXNoPath:
|
42
|
+
return []
|
43
|
+
|
44
|
+
@staticmethod
|
45
|
+
def _path_length_from_nodes(node_list: List[int], graph: nx.MultiDiGraph) -> float:
|
46
|
+
"""Sum up the 'length' attribute along consecutive node pairs."""
|
47
|
+
if len(node_list) < 2:
|
48
|
+
return np.nan
|
49
|
+
total = 0.0
|
50
|
+
for u, v in zip(node_list[:-1], node_list[1:]):
|
51
|
+
edge_data = graph.get_edge_data(u, v)
|
52
|
+
lengths = [edata.get("length", 0) for edata in edge_data.values()]
|
53
|
+
total += min(lengths) if lengths else 0
|
54
|
+
return total
|
55
|
+
|
56
|
+
def build_routes(self, df: pd.DataFrame) -> pd.DataFrame:
|
57
|
+
"""
|
58
|
+
Generate destination coordinates, snap to graph nodes, and compute shortest paths.
|
59
|
+
|
60
|
+
:param df: Input DataFrame containing grouping_col, latitude, and longitude columns.
|
61
|
+
:return: DataFrame with added columns:
|
62
|
+
['dest_lat', 'dest_lon', 'origin_node', 'dest_node', 'path_nodes', 'path_coords', 'distance_m']
|
63
|
+
"""
|
64
|
+
# 1) Build destination coordinates by shifting per grouping column
|
65
|
+
df = df.copy()
|
66
|
+
df["dest_lat"] = df.groupby(self.grouping_col)[self.lat_col].shift(-1)
|
67
|
+
df["dest_lon"] = df.groupby(self.grouping_col)[self.lon_col].shift(-1)
|
68
|
+
|
69
|
+
# Drop tail rows without next point
|
70
|
+
df = df.dropna(subset=["dest_lat", "dest_lon"]).reset_index(drop=True)
|
71
|
+
|
72
|
+
# 2) Snap origin & destination points to graph nodes
|
73
|
+
df["origin_node"] = ox.nearest_nodes(
|
74
|
+
self.graph, X=df[self.lon_col].values, Y=df[self.lat_col].values
|
75
|
+
)
|
76
|
+
df["dest_node"] = ox.nearest_nodes(
|
77
|
+
self.graph, X=df["dest_lon"].values, Y=df["dest_lat"].values
|
78
|
+
)
|
79
|
+
|
80
|
+
# 3) Compute paths, coordinates, and distances
|
81
|
+
df["path_nodes"] = [
|
82
|
+
self._get_shortest_path(u, v, self.graph)
|
83
|
+
for u, v in zip(df["origin_node"], df["dest_node"])
|
84
|
+
]
|
85
|
+
|
86
|
+
df["path_coords"] = df["path_nodes"].apply(
|
87
|
+
lambda nl: [(self.graph.nodes[n]["y"], self.graph.nodes[n]["x"]) for n in nl]
|
88
|
+
)
|
89
|
+
|
90
|
+
df["distance_m"] = df["path_nodes"].apply(
|
91
|
+
lambda nl: self._path_length_from_nodes(nl, self.graph)
|
92
|
+
)
|
93
|
+
# Ensure NaN distances become 0
|
94
|
+
df["distance_m"] = df["distance_m"].fillna(0)
|
95
|
+
# Remove any legs with no path
|
96
|
+
df = df[df["path_nodes"].str.len() > 0].reset_index(drop=True)
|
97
|
+
|
98
|
+
return df.sort_values(self.sort_key).reset_index(drop=True)
|
@@ -442,6 +442,8 @@ def get_graph(**options):
|
|
442
442
|
- The list or collection of edges that describe relationships
|
443
443
|
between nodes in the graph
|
444
444
|
"""
|
445
|
+
if not options:
|
446
|
+
raise ValueError("No options provided to PBFHandler for graph creation.")
|
445
447
|
handler = PBFHandler(**options)
|
446
448
|
handler.load()
|
447
449
|
return handler.graph, handler.nodes, handler.edges
|
@@ -1,3 +1,5 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
1
3
|
import datetime
|
2
4
|
from typing import Union, Tuple, Callable, Dict, Optional
|
3
5
|
|
@@ -306,64 +308,62 @@ class FileAgeChecker:
|
|
306
308
|
|
307
309
|
|
308
310
|
# --- Vectorized Helper Functions ---
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
#
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
# Create a result series filled with NaN to handle rows with invalid dates
|
327
|
-
result = pd.Series(np.nan, index=partition.index, dtype=float)
|
328
|
-
|
329
|
-
# Create a boolean mask for valid, non-NaT date pairs
|
311
|
+
|
312
|
+
def _vectorized_busday_count(partition, begin_col, end_col, holidays):
|
313
|
+
"""
|
314
|
+
Calculates the number of business days between a start and end date.
|
315
|
+
"""
|
316
|
+
# Extract the raw columns
|
317
|
+
start_dates_raw = partition[begin_col]
|
318
|
+
end_dates_raw = partition[end_col]
|
319
|
+
|
320
|
+
|
321
|
+
start_dates = pd.to_datetime(start_dates_raw, errors='coerce')
|
322
|
+
end_dates = pd.to_datetime(end_dates_raw, errors='coerce')
|
323
|
+
|
324
|
+
# Initialize the result Series with NaN, as the output is a number
|
325
|
+
result = pd.Series(np.nan, index=partition.index)
|
326
|
+
|
327
|
+
# Create a mask for rows where both start and end dates are valid
|
330
328
|
valid_mask = pd.notna(start_dates) & pd.notna(end_dates)
|
331
329
|
|
332
|
-
# Perform the vectorized calculation only on the valid subset
|
330
|
+
# Perform the vectorized calculation only on the valid subset
|
331
|
+
# Convert to NumPy arrays of date type for the calculation
|
333
332
|
result.loc[valid_mask] = np.busday_count(
|
334
|
-
start_dates[valid_mask],
|
335
|
-
end_dates[valid_mask],
|
333
|
+
start_dates[valid_mask].values.astype('datetime64[D]'),
|
334
|
+
end_dates[valid_mask].values.astype('datetime64[D]'),
|
336
335
|
holidays=holidays
|
337
336
|
)
|
338
|
-
return result
|
339
337
|
|
338
|
+
return result
|
340
339
|
|
341
|
-
def _vectorized_sla_end_date(
|
342
|
-
partition: pd.DataFrame,
|
343
|
-
start_col: str,
|
344
|
-
n_days_col: str,
|
345
|
-
holidays: list
|
346
|
-
) -> pd.Series:
|
347
|
-
"""Vectorized function to calculate the SLA end date on a DataFrame partition."""
|
348
|
-
if partition.empty:
|
349
|
-
return pd.Series([], dtype='datetime64[ns]')
|
350
340
|
|
351
|
-
|
341
|
+
def _vectorized_sla_end_date(partition, start_col, n_days_col, holidays):
|
342
|
+
"""
|
343
|
+
Calculates the end date of an SLA, skipping weekends and holidays.
|
344
|
+
"""
|
345
|
+
# Extract the relevant columns as pandas Series
|
346
|
+
start_dates_raw = partition[start_col]
|
352
347
|
sla_days = partition[n_days_col]
|
353
348
|
|
354
|
-
|
349
|
+
|
350
|
+
start_dates = pd.to_datetime(start_dates_raw, errors='coerce')
|
351
|
+
|
352
|
+
# Initialize the result Series with NaT (Not a Time)
|
355
353
|
result = pd.Series(pd.NaT, index=partition.index, dtype='datetime64[ns]')
|
356
354
|
|
357
|
-
# Create a
|
355
|
+
# Create a mask for rows that have valid start dates and SLA days
|
358
356
|
valid_mask = pd.notna(start_dates) & pd.notna(sla_days)
|
359
357
|
|
360
358
|
# Perform the vectorized calculation only on the valid subset
|
359
|
+
# Note: np.busday_offset requires a NumPy array, so we use .values
|
361
360
|
result.loc[valid_mask] = np.busday_offset(
|
362
|
-
start_dates[valid_mask],
|
361
|
+
start_dates[valid_mask].values.astype('datetime64[D]'), # Convert to numpy array of dates
|
363
362
|
sla_days[valid_mask].astype(int), # Ensure days are integers
|
364
363
|
roll='forward',
|
365
364
|
holidays=holidays
|
366
365
|
)
|
366
|
+
|
367
367
|
return result
|
368
368
|
|
369
369
|
|
@@ -452,6 +452,7 @@ class BusinessDays:
|
|
452
452
|
meta=(result_col, 'datetime64[ns]')
|
453
453
|
)}
|
454
454
|
)
|
455
|
+
|
455
456
|
# Class enhancements
|
456
457
|
# DateUtils.register_period('next_week', lambda: (datetime.date.today() + datetime.timedelta(days=7),
|
457
458
|
# datetime.date.today() + datetime.timedelta(days=13)))
|
File without changes
|
File without changes
|
File without changes
|
{sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/_artifact_updater_multi_wrapper.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/backends/parquet/_filter_handler.py
RENAMED
File without changes
|
{sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/backends/parquet/_parquet_options.py
RENAMED
File without changes
|
{sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/backends/sqlalchemy/__init__.py
RENAMED
File without changes
|
{sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py
RENAMED
File without changes
|
{sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py
RENAMED
File without changes
|
{sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py
RENAMED
File without changes
|
File without changes
|
{sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py
RENAMED
File without changes
|
{sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py
RENAMED
File without changes
|
File without changes
|
{sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/backends/sqlmodel/__init__.py
RENAMED
File without changes
|
{sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py
RENAMED
File without changes
|
{sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/backends/sqlmodel/_io_dask.py
RENAMED
File without changes
|
{sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py
RENAMED
File without changes
|
{sibi_dst-2025.1.11 → sibi_dst-2025.1.13}/sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|