sibi-dst 0.3.26__py3-none-any.whl → 0.3.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sibi_dst/df_helper/__init__.py +2 -0
- sibi_dst/df_helper/_df_helper.py +180 -12
- sibi_dst/df_helper/core/_filter_handler.py +37 -0
- sibi_dst/df_helper/core/_params_config.py +2 -2
- sibi_dst/df_helper/data_cleaner.py +132 -0
- sibi_dst/osmnx_helper/__init__.py +9 -0
- sibi_dst/osmnx_helper/base_osm_map.py +165 -0
- sibi_dst/osmnx_helper/basemaps/__init__.py +0 -0
- sibi_dst/osmnx_helper/basemaps/calendar_html.py +122 -0
- sibi_dst/osmnx_helper/basemaps/router_plotter.py +186 -0
- sibi_dst/osmnx_helper/utils.py +267 -0
- sibi_dst/tests/__init__.py +0 -0
- sibi_dst/tests/test_data_wrapper_class.py +78 -0
- sibi_dst/utils/clickhouse_writer.py +1 -1
- sibi_dst/utils/data_utils.py +1 -1
- sibi_dst/utils/data_wrapper.py +0 -26
- {sibi_dst-0.3.26.dist-info → sibi_dst-0.3.28.dist-info}/METADATA +6 -1
- {sibi_dst-0.3.26.dist-info → sibi_dst-0.3.28.dist-info}/RECORD +19 -10
- {sibi_dst-0.3.26.dist-info → sibi_dst-0.3.28.dist-info}/WHEEL +1 -1
@@ -0,0 +1,122 @@
|
|
1
|
+
|
2
|
+
# HTML and CSS for the calendar button and popup
|
3
|
+
calendar_html = """
|
4
|
+
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/flatpickr/dist/flatpickr.min.css">
|
5
|
+
<script src="https://cdn.jsdelivr.net/npm/flatpickr"></script>
|
6
|
+
|
7
|
+
<style>
|
8
|
+
/* Style for the calendar button */
|
9
|
+
.calendar-btn {
|
10
|
+
background-color: white;
|
11
|
+
border: 1px solid gray;
|
12
|
+
border-radius: 3px;
|
13
|
+
padding: 5px;
|
14
|
+
font-size: 16px;
|
15
|
+
cursor: pointer;
|
16
|
+
position: fixed; /* Changed from absolute to fixed */
|
17
|
+
bottom: 50px; /* Adjust position relative to the viewport */
|
18
|
+
left: 10px; /* Adjust position relative to the viewport */
|
19
|
+
z-index: 10000; /* Ensure it stays on top of other elements */
|
20
|
+
}
|
21
|
+
|
22
|
+
/* Calendar popup with sufficient size */
|
23
|
+
.calendar-popup {
|
24
|
+
display: none;
|
25
|
+
position: fixed; /* Keep the popup fixed so it stays in view */
|
26
|
+
bottom: 100px;
|
27
|
+
left: 10px;
|
28
|
+
background-color: white;
|
29
|
+
padding: 10px;
|
30
|
+
border: 1px solid gray;
|
31
|
+
border-radius: 3px;
|
32
|
+
z-index: 10000; /* Ensure it stays on top of other elements */
|
33
|
+
width: 250px;
|
34
|
+
height: 300px;
|
35
|
+
}
|
36
|
+
|
37
|
+
/* Ensure the calendar fits properly */
|
38
|
+
#calendar {
|
39
|
+
width: 100%;
|
40
|
+
height: auto;
|
41
|
+
}
|
42
|
+
</style>
|
43
|
+
|
44
|
+
<!-- Calendar Button -->
|
45
|
+
<div class="calendar-btn">📅 Select Date</div>
|
46
|
+
|
47
|
+
<!-- Calendar Popup -->
|
48
|
+
<div class="calendar-popup" id="calendar-popup">
|
49
|
+
<div id="calendar"></div>
|
50
|
+
</div>
|
51
|
+
|
52
|
+
<script>
|
53
|
+
// Initialize Flatpickr calendar
|
54
|
+
const today = new Date().toISOString().split('T')[0];
|
55
|
+
// Function to show the "Please wait" message
|
56
|
+
function showLoadingMessage() {
|
57
|
+
let loadingMessage = document.createElement("div");
|
58
|
+
loadingMessage.id = "loading-message";
|
59
|
+
loadingMessage.style.position = "fixed";
|
60
|
+
loadingMessage.style.top = "50%";
|
61
|
+
loadingMessage.style.left = "50%";
|
62
|
+
loadingMessage.style.transform = "translate(-50%, -50%)";
|
63
|
+
loadingMessage.style.backgroundColor = "rgba(0, 0, 0, 0.8)";
|
64
|
+
loadingMessage.style.color = "white";
|
65
|
+
loadingMessage.style.padding = "20px";
|
66
|
+
loadingMessage.style.borderRadius = "5px";
|
67
|
+
loadingMessage.style.zIndex = "9999";
|
68
|
+
loadingMessage.innerText = "Please wait...";
|
69
|
+
document.body.appendChild(loadingMessage);
|
70
|
+
}
|
71
|
+
|
72
|
+
// Function to remove the "Please wait" message
|
73
|
+
function removeLoadingMessage() {
|
74
|
+
let loadingMessage = document.getElementById("loading-message");
|
75
|
+
if (loadingMessage) {
|
76
|
+
loadingMessage.remove();
|
77
|
+
}
|
78
|
+
}
|
79
|
+
|
80
|
+
|
81
|
+
flatpickr("#calendar", {
|
82
|
+
inline: true, // Render the calendar inline within the container
|
83
|
+
maxDate: today, // Disable future dates
|
84
|
+
onChange: function(selectedDates, dateStr, instance) {
|
85
|
+
console.log("Selected date: " + dateStr); // Debugging: Log the selected date
|
86
|
+
// Get the current URL and create a URL object to manipulate the query parameters
|
87
|
+
// Get the current URL from the parent window
|
88
|
+
showLoadingMessage();
|
89
|
+
let currentUrl = window.parent.location.href;
|
90
|
+
|
91
|
+
// If the URL contains "srcdoc", remove it and use the correct base path
|
92
|
+
if (currentUrl.includes("srcdoc")) {
|
93
|
+
currentUrl = currentUrl.replace("srcdoc", "");
|
94
|
+
}
|
95
|
+
|
96
|
+
const url = new URL(currentUrl);
|
97
|
+
|
98
|
+
// Set or update the 'date' parameter while preserving existing parameters
|
99
|
+
url.searchParams.set('date', dateStr);
|
100
|
+
|
101
|
+
console.log("Updated URL: " + url.toString()); // Debugging: Log the updated URL
|
102
|
+
|
103
|
+
// Update the parent window's location with the new URL
|
104
|
+
window.parent.location.href = url.toString();
|
105
|
+
}
|
106
|
+
});
|
107
|
+
// Remove the "Please wait" message once the page has finished loading
|
108
|
+
window.addEventListener("load", function() {
|
109
|
+
removeLoadingMessage();
|
110
|
+
});
|
111
|
+
|
112
|
+
// Toggle the calendar popup when the button is clicked
|
113
|
+
document.querySelector(".calendar-btn").addEventListener("click", function() {
|
114
|
+
var popup = document.getElementById("calendar-popup");
|
115
|
+
if (popup.style.display === "none" || popup.style.display === "") {
|
116
|
+
popup.style.display = "block";
|
117
|
+
} else {
|
118
|
+
popup.style.display = "none";
|
119
|
+
}
|
120
|
+
});
|
121
|
+
</script>
|
122
|
+
"""
|
@@ -0,0 +1,186 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
from sibi_dst.osmnx_helper.utils import get_distance_between_points, add_arrows
|
3
|
+
from collections import defaultdict
|
4
|
+
import folium
|
5
|
+
from folium.plugins import AntPath
|
6
|
+
import networkx as nx
|
7
|
+
|
8
|
+
from sibi_dst.osmnx_helper import BaseOsmMap
|
9
|
+
from sibi_dst.osmnx_helper.basemaps.calendar_html import calendar_html
|
10
|
+
|
11
|
+
class RoutePlotter(BaseOsmMap):
|
12
|
+
def __init__(self, osmnx_graph, df, **kwargs):
|
13
|
+
self.action_field = kwargs.pop('action_field', '')
|
14
|
+
self.action_groups = kwargs.pop('action_groups', {})
|
15
|
+
self.action_styles = kwargs.pop('action_styles', {})
|
16
|
+
self.use_ant_path = kwargs.pop('use_ant_path', True)
|
17
|
+
self.show_calendar = kwargs.pop('show_calendar', True)
|
18
|
+
self.show_map_title = kwargs.pop('show_map_title', True)
|
19
|
+
self.sort_keys = kwargs.pop('sort_keys', None)
|
20
|
+
self.main_route_layer = folium.FeatureGroup(name="Main Route")
|
21
|
+
self.feature_groups = {}
|
22
|
+
self.feature_group_counts = {}
|
23
|
+
self.total_distance = 0.0
|
24
|
+
self.actions = []
|
25
|
+
self.action_group_counts = {action_group: 0 for action_group in self.action_groups}
|
26
|
+
self.marker_count = 1
|
27
|
+
kwargs.update({'calc_nearest_nodes': True})
|
28
|
+
kwargs['dt_field'] = 'date_time'
|
29
|
+
super().__init__(osmnx_graph, df, **kwargs)
|
30
|
+
|
31
|
+
def pre_process_map(self):
|
32
|
+
super().pre_process_map()
|
33
|
+
self.actions = self.df[self.action_field].tolist()
|
34
|
+
|
35
|
+
def process_map(self):
|
36
|
+
self._calculate_routes()
|
37
|
+
self._plot_routes()
|
38
|
+
self._add_markers()
|
39
|
+
self.main_route_layer.add_to(self.osm_map)
|
40
|
+
if self.show_calendar:
|
41
|
+
self._add_calendar()
|
42
|
+
|
43
|
+
def _calculate_routes(self):
|
44
|
+
if self.verbose:
|
45
|
+
print("Calculating routes and markers...")
|
46
|
+
distances = [
|
47
|
+
get_distance_between_points(tuple(self.gps_points[0]), tuple(coord), 'm')
|
48
|
+
for coord in self.gps_points
|
49
|
+
]
|
50
|
+
self.max_distance_index = distances.index(max(distances))
|
51
|
+
self.max_time_index = self.dt.index(max(self.dt))
|
52
|
+
self.route_polylines = []
|
53
|
+
self.markers = defaultdict(list) # Store markers for action groups
|
54
|
+
for i in range(len(self.gps_points) - 1):
|
55
|
+
polyline, color, markers = self._calculate_route(i)
|
56
|
+
if polyline:
|
57
|
+
self.route_polylines.append((polyline, color))
|
58
|
+
for action_group, action_markers in markers.items():
|
59
|
+
self.markers[action_group].extend(action_markers)
|
60
|
+
self.action_group_counts[action_group] += 1
|
61
|
+
self.marker_count += 1
|
62
|
+
if self.verbose:
|
63
|
+
print("Route and marker calculation complete.")
|
64
|
+
|
65
|
+
for action_group in self.action_groups:
|
66
|
+
count = self.action_group_counts[action_group]
|
67
|
+
self.feature_groups[action_group] = folium.FeatureGroup(name=f"{action_group} ({count})").add_to(
|
68
|
+
self.osm_map)
|
69
|
+
self.osm_map.add_child(self.feature_groups[action_group])
|
70
|
+
|
71
|
+
def _calculate_route(self, i):
|
72
|
+
if self.verbose:
|
73
|
+
print(f"Calculating for item:{i}")
|
74
|
+
orig = self.nearest_nodes[i]
|
75
|
+
dest = self.nearest_nodes[i + 1]
|
76
|
+
try:
|
77
|
+
route = nx.shortest_path(self.G, orig, dest, weight='length')
|
78
|
+
route_length = sum(self.G[u][v][0]['length'] for u, v in zip(route[:-1], route[1:]))
|
79
|
+
self.total_distance += route_length
|
80
|
+
offset = 0 if i < self.max_distance_index else 0.0005
|
81
|
+
lats, lons = zip(*[(self.G.nodes[node]['y'] + offset, self.G.nodes[node]['x']) for node in route])
|
82
|
+
color = 'blue' if i < self.max_distance_index else 'red'
|
83
|
+
polyline = list(zip(lats, lons))
|
84
|
+
markers = self._calculate_markers(i)
|
85
|
+
return polyline, color, markers
|
86
|
+
except nx.NetworkXNoPath:
|
87
|
+
if self.verbose:
|
88
|
+
print(f"Item:{i}-No path found for {orig} to {dest}")
|
89
|
+
return None, None, {}
|
90
|
+
except nx.NodeNotFound:
|
91
|
+
if self.verbose:
|
92
|
+
print(f"Item:{i}-No path found for {orig} to {dest}")
|
93
|
+
return None, None, {}
|
94
|
+
|
95
|
+
def _calculate_markers(self, i):
|
96
|
+
# Calculate markers for action groups
|
97
|
+
markers = defaultdict(list)
|
98
|
+
for action_group in self.action_groups:
|
99
|
+
action_indices = [idx for idx, action in enumerate(self.actions) if action == action_group]
|
100
|
+
for idx in action_indices:
|
101
|
+
if idx == i:
|
102
|
+
location = self.gps_points[i]
|
103
|
+
tooltip = f"Result {self.marker_count}: {action_group}<br>Date/time:{self.dt[i]}"
|
104
|
+
popup_data = self._get_data(i)
|
105
|
+
action_style = self.action_styles.get(action_group,
|
106
|
+
{'color': 'blue', 'icon': 'marker', 'prefix': 'fa'})
|
107
|
+
markers[action_group].append((location, tooltip, popup_data, action_style))
|
108
|
+
return markers
|
109
|
+
|
110
|
+
def _plot_routes(self):
|
111
|
+
if self.verbose:
|
112
|
+
print("Plotting routes and markers...")
|
113
|
+
# self.action_group_counts = {action_group: 0 for action_group in self.feature_groups.keys()}
|
114
|
+
for polyline, color in self.route_polylines:
|
115
|
+
if self.use_ant_path:
|
116
|
+
AntPath(
|
117
|
+
locations=polyline,
|
118
|
+
color=color,
|
119
|
+
weight=3, # Increase line thickness
|
120
|
+
opacity=10, # Increase opacity
|
121
|
+
# pulse_color=color,
|
122
|
+
delay=1000, # Slower animation to reduce flickering
|
123
|
+
# dash_array=[20, 30] # Adjust dash pattern if needed
|
124
|
+
).add_to(self.main_route_layer)
|
125
|
+
else:
|
126
|
+
folium.PolyLine(locations=polyline, color=color).add_to(self.main_route_layer)
|
127
|
+
self.osm_map = add_arrows(self.osm_map, polyline, color, n_arrows=3)
|
128
|
+
# Plot markers for action groups
|
129
|
+
for action_group, action_markers in self.markers.items():
|
130
|
+
for location, tooltip, popup_data, action_style in action_markers:
|
131
|
+
folium.Marker(
|
132
|
+
location=location,
|
133
|
+
popup=folium.Popup(popup_data, max_width=600),
|
134
|
+
tooltip=tooltip,
|
135
|
+
icon=folium.Icon(
|
136
|
+
icon=action_style.get("icon"),
|
137
|
+
color=action_style.get("color"),
|
138
|
+
prefix=action_style.get("prefix")
|
139
|
+
)
|
140
|
+
).add_to(self.feature_groups[action_group])
|
141
|
+
|
142
|
+
if self.verbose:
|
143
|
+
print("Route and marker plotting complete.")
|
144
|
+
|
145
|
+
def _add_markers(self):
|
146
|
+
if self.verbose:
|
147
|
+
print("Adding markers...")
|
148
|
+
# Add start marker
|
149
|
+
start_popup = folium.Popup(f"Start of route at {self.dt[0]}", max_width=300)
|
150
|
+
folium.Marker(location=self.gps_points[0], popup=start_popup,
|
151
|
+
icon=folium.Icon(icon='flag-checkered', prefix='fa')).add_to(self.osm_map)
|
152
|
+
# Add total distance marker at the end
|
153
|
+
folium.Marker(
|
154
|
+
self.gps_points[-1],
|
155
|
+
popup=f"End of Route at {self.dt[self.max_time_index]}. Total Distance Travelled: {self.total_distance / 1000:.2f} km",
|
156
|
+
icon=folium.Icon(color="red", icon="flag-checkered", prefix="fa")
|
157
|
+
).add_to(self.osm_map)
|
158
|
+
if self.verbose:
|
159
|
+
print("Marker addition complete.")
|
160
|
+
|
161
|
+
def _add_calendar(self):
|
162
|
+
calendar_element = folium.Element(calendar_html)
|
163
|
+
self.osm_map.get_root().html.add_child(calendar_element)
|
164
|
+
|
165
|
+
def _add_map_title(self):
|
166
|
+
if self.map_html_title and self.show_map_title:
|
167
|
+
title_html = f'''
|
168
|
+
<div style="position: fixed;
|
169
|
+
top: 10px;
|
170
|
+
left: 50%;
|
171
|
+
transform: translate(-50%, 0%);
|
172
|
+
z-index: 9999;
|
173
|
+
font-size: 24px;
|
174
|
+
font-weight: bold;
|
175
|
+
background-color: white;
|
176
|
+
padding: 10px;
|
177
|
+
border: 2px solid black;
|
178
|
+
border-radius: 5px;">
|
179
|
+
{self.map_html_title}
|
180
|
+
</div>
|
181
|
+
'''
|
182
|
+
self.osm_map.get_root().html.add_child(folium.Element(title_html))
|
183
|
+
|
184
|
+
def _get_data(self, index):
|
185
|
+
# implement in subclass to populate popups
|
186
|
+
...
|
@@ -0,0 +1,267 @@
|
|
1
|
+
import math
|
2
|
+
import os
|
3
|
+
import pickle
|
4
|
+
from urllib.parse import urlencode, urlsplit, urlunsplit
|
5
|
+
|
6
|
+
import folium
|
7
|
+
import geopandas as gpd
|
8
|
+
import numpy as np
|
9
|
+
import osmnx as ox
|
10
|
+
from geopy.distance import geodesic
|
11
|
+
|
12
|
+
|
13
|
+
#
|
14
|
+
# options = {
|
15
|
+
# 'ox_files_save_path': ox_files_save_path,
|
16
|
+
# 'network_type': 'drive',
|
17
|
+
# 'place': 'Costa Rica',
|
18
|
+
# 'files_prefix': 'costa-rica-',
|
19
|
+
# }
|
20
|
+
# Usage example
|
21
|
+
# handler = PBFHandler(**options)
|
22
|
+
# handler.load()
|
23
|
+
|
24
|
+
|
25
|
+
class PBFHandler:
|
26
|
+
def __init__(self, **kwargs):
|
27
|
+
self.graph = None
|
28
|
+
self.nodes = None
|
29
|
+
self.edges = None
|
30
|
+
self.rebuild = kwargs.setdefault("rebuild", False)
|
31
|
+
self.verbose = kwargs.setdefault("verbose", False)
|
32
|
+
self.place = kwargs.setdefault('place', 'Costa Rica')
|
33
|
+
self.filepath = kwargs.setdefault('ox_files_save_path', "gis_data/")
|
34
|
+
self.file_prefix = kwargs.setdefault('file_prefix', 'costa-rica-')
|
35
|
+
self.network_type = kwargs.setdefault('network_type', 'all')
|
36
|
+
self.graph_file = f"{self.filepath}{self.file_prefix}graph.pkl"
|
37
|
+
self.node_file = f"{self.filepath}{self.file_prefix}nodes.pkl"
|
38
|
+
self.edge_file = f"{self.filepath}{self.file_prefix}edges.pkl"
|
39
|
+
|
40
|
+
def load(self):
|
41
|
+
if self.verbose:
|
42
|
+
print("Loading data...")
|
43
|
+
|
44
|
+
files_to_check = [self.graph_file, self.node_file, self.edge_file]
|
45
|
+
|
46
|
+
if self.rebuild:
|
47
|
+
for file in files_to_check:
|
48
|
+
if os.path.exists(file):
|
49
|
+
os.remove(file)
|
50
|
+
if not os.path.exists(self.filepath):
|
51
|
+
os.makedirs(self.filepath, exist_ok=True)
|
52
|
+
# self.process_pbf()
|
53
|
+
# self.save_to_pickle()
|
54
|
+
if not all(os.path.exists(f) for f in files_to_check):
|
55
|
+
self.process_pbf()
|
56
|
+
self.save_to_pickle()
|
57
|
+
else:
|
58
|
+
self.load_from_pickle()
|
59
|
+
|
60
|
+
if self.verbose:
|
61
|
+
print("Data loaded successfully.")
|
62
|
+
|
63
|
+
def process_pbf(self):
|
64
|
+
"""
|
65
|
+
Load a PBF file and create a graph.
|
66
|
+
"""
|
67
|
+
try:
|
68
|
+
if self.verbose:
|
69
|
+
print(f"Processing PBF for {self.place}...")
|
70
|
+
|
71
|
+
self.graph = ox.graph_from_place(self.place, network_type=self.network_type)
|
72
|
+
self.nodes, self.edges = ox.graph_to_gdfs(self.graph)
|
73
|
+
|
74
|
+
if self.verbose:
|
75
|
+
print("PBF processed successfully.")
|
76
|
+
except Exception as e:
|
77
|
+
print(f"Error processing PBF: {e}")
|
78
|
+
raise
|
79
|
+
|
80
|
+
def save_to_pickle(self):
|
81
|
+
"""
|
82
|
+
Save the graph, nodes, and edges to pickle files.
|
83
|
+
"""
|
84
|
+
try:
|
85
|
+
if self.verbose:
|
86
|
+
print("Saving data to pickle files...")
|
87
|
+
|
88
|
+
data_to_save = {
|
89
|
+
self.graph_file: self.graph,
|
90
|
+
self.node_file: self.nodes,
|
91
|
+
self.edge_file: self.edges
|
92
|
+
}
|
93
|
+
|
94
|
+
for file, data in data_to_save.items():
|
95
|
+
if data is not None:
|
96
|
+
with open(file, 'wb') as f:
|
97
|
+
pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
|
98
|
+
|
99
|
+
if self.verbose:
|
100
|
+
print("Data saved to pickle files successfully.")
|
101
|
+
except Exception as e:
|
102
|
+
print(f"Error saving to pickle: {e}")
|
103
|
+
raise
|
104
|
+
|
105
|
+
def load_from_pickle(self):
|
106
|
+
"""
|
107
|
+
Load the graph, nodes, and edges from pickle files.
|
108
|
+
"""
|
109
|
+
try:
|
110
|
+
if self.verbose:
|
111
|
+
print("Loading data from pickle files...")
|
112
|
+
|
113
|
+
files_to_load = {
|
114
|
+
self.graph_file: 'graph',
|
115
|
+
self.node_file: 'nodes',
|
116
|
+
self.edge_file: 'edges'
|
117
|
+
}
|
118
|
+
|
119
|
+
for file, attr in files_to_load.items():
|
120
|
+
with open(file, 'rb') as f:
|
121
|
+
setattr(self, attr, pickle.load(f))
|
122
|
+
|
123
|
+
if self.verbose:
|
124
|
+
print("Data loaded from pickle files successfully.")
|
125
|
+
except Exception as e:
|
126
|
+
print(f"Error loading from pickle: {e}")
|
127
|
+
raise
|
128
|
+
|
129
|
+
def plot_graph(self):
|
130
|
+
"""
|
131
|
+
Plot the graph.
|
132
|
+
"""
|
133
|
+
try:
|
134
|
+
if self.graph is not None:
|
135
|
+
if self.verbose:
|
136
|
+
print("Plotting the graph...")
|
137
|
+
ox.plot_graph(self.graph)
|
138
|
+
if self.verbose:
|
139
|
+
print("Graph plotted successfully.")
|
140
|
+
else:
|
141
|
+
print("Graph is not loaded. Please load a PBF file first.")
|
142
|
+
except Exception as e:
|
143
|
+
print(f"Error plotting the graph: {e}")
|
144
|
+
raise
|
145
|
+
|
146
|
+
|
147
|
+
def get_bounding_box_from_points(gps_points, margin=0.001):
|
148
|
+
latitudes = [point[0] for point in gps_points]
|
149
|
+
longitudes = [point[1] for point in gps_points]
|
150
|
+
|
151
|
+
north = max(latitudes) + margin
|
152
|
+
south = min(latitudes) - margin
|
153
|
+
east = max(longitudes) + margin
|
154
|
+
west = min(longitudes) - margin
|
155
|
+
|
156
|
+
return north, south, east, west
|
157
|
+
|
158
|
+
|
159
|
+
def add_arrows(map_object, locations, color, n_arrows):
|
160
|
+
# Get the number of locations
|
161
|
+
n = len(locations)
|
162
|
+
|
163
|
+
# If there are more than two points...
|
164
|
+
if n > 2:
|
165
|
+
# Add arrows along the path
|
166
|
+
for i in range(0, n - 1, n // n_arrows):
|
167
|
+
# Get the start and end point for this segment
|
168
|
+
start, end = locations[i], locations[i + 1]
|
169
|
+
|
170
|
+
# Calculate the direction in which to place the arrow
|
171
|
+
rotation = -np.arctan2((end[1] - start[1]), (end[0] - start[0])) * 180 / np.pi
|
172
|
+
|
173
|
+
folium.RegularPolygonMarker(location=end,
|
174
|
+
fill_color=color,
|
175
|
+
number_of_sides=2,
|
176
|
+
radius=6,
|
177
|
+
rotation=rotation).add_to(map_object)
|
178
|
+
return map_object
|
179
|
+
|
180
|
+
|
181
|
+
def extract_subgraph(G, north, south, east, west):
|
182
|
+
# Create a bounding box polygon
|
183
|
+
# from osmnx v2 this is how it is done
|
184
|
+
if ox.__version__ >= '2.0':
|
185
|
+
bbox_poly = gpd.GeoSeries([ox.utils_geo.bbox_to_poly(bbox=(west, south, east, north))])
|
186
|
+
else:
|
187
|
+
bbox_poly = gpd.GeoSeries([ox.utils_geo.bbox_to_poly(north, south, east, west)])
|
188
|
+
|
189
|
+
# Get nodes GeoDataFrame
|
190
|
+
nodes_gdf = ox.graph_to_gdfs(G, nodes=True, edges=False)
|
191
|
+
|
192
|
+
# Find nodes within the bounding box
|
193
|
+
nodes_within_bbox = nodes_gdf[nodes_gdf.geometry.within(bbox_poly.geometry.unary_union)]
|
194
|
+
|
195
|
+
# Create subgraph
|
196
|
+
subgraph = G.subgraph(nodes_within_bbox.index)
|
197
|
+
|
198
|
+
return subgraph
|
199
|
+
|
200
|
+
|
201
|
+
def get_distance_between_points(point_a, point_b, unit='km'):
|
202
|
+
if not isinstance(point_a, tuple) or len(point_a) != 2:
|
203
|
+
return 0
|
204
|
+
if not all(isinstance(x, float) and not math.isnan(x) for x in point_a):
|
205
|
+
return 0
|
206
|
+
if not isinstance(point_b, tuple) or len(point_b) != 2:
|
207
|
+
return 0
|
208
|
+
if not all(isinstance(x, float) and not math.isnan(x) for x in point_b):
|
209
|
+
return 0
|
210
|
+
distance = geodesic(point_a, point_b)
|
211
|
+
if unit == 'km':
|
212
|
+
return distance.kilometers
|
213
|
+
elif unit == 'm':
|
214
|
+
return distance.meters
|
215
|
+
elif unit == 'mi':
|
216
|
+
return distance.miles
|
217
|
+
else:
|
218
|
+
return 0
|
219
|
+
|
220
|
+
|
221
|
+
tile_options = {
|
222
|
+
"OpenStreetMap": "OpenStreetMap",
|
223
|
+
"CartoDB": "cartodbpositron",
|
224
|
+
"CartoDB Voyager": "cartodbvoyager"
|
225
|
+
}
|
226
|
+
|
227
|
+
|
228
|
+
def attach_supported_tiles(map_object, default_tile="OpenStreetMap"):
|
229
|
+
# Normalize the default tile name to lowercase for comparison
|
230
|
+
normalized_default_tile = default_tile.lower()
|
231
|
+
|
232
|
+
# Filter out the default tile layer from the options to avoid duplication
|
233
|
+
tile_options_filtered = {k: v for k, v in tile_options.items() if v.lower() != normalized_default_tile}
|
234
|
+
|
235
|
+
for tile, description in tile_options_filtered.items():
|
236
|
+
folium.TileLayer(name=tile, tiles=description, show=False).add_to(map_object)
|
237
|
+
|
238
|
+
|
239
|
+
def get_graph(**options):
|
240
|
+
handler = PBFHandler(**options)
|
241
|
+
handler.load()
|
242
|
+
return handler.graph, handler.nodes, handler.edges
|
243
|
+
|
244
|
+
|
245
|
+
def add_query_params(url, params):
|
246
|
+
# Parse the original URL
|
247
|
+
url_components = urlsplit(url)
|
248
|
+
|
249
|
+
# Parse original query parameters and update with new params
|
250
|
+
original_params = dict([tuple(pair.split('=')) for pair in url_components.query.split('&') if pair])
|
251
|
+
original_params.update(params)
|
252
|
+
|
253
|
+
# Construct the new query string
|
254
|
+
new_query_string = urlencode(original_params)
|
255
|
+
|
256
|
+
# Construct the new URL
|
257
|
+
new_url = urlunsplit((
|
258
|
+
url_components.scheme,
|
259
|
+
url_components.netloc,
|
260
|
+
url_components.path,
|
261
|
+
new_query_string,
|
262
|
+
url_components.fragment
|
263
|
+
))
|
264
|
+
|
265
|
+
return new_url
|
266
|
+
|
267
|
+
|
File without changes
|
@@ -0,0 +1,78 @@
|
|
1
|
+
import unittest
|
2
|
+
from unittest.mock import patch, MagicMock
|
3
|
+
import datetime
|
4
|
+
import pandas as pd
|
5
|
+
from sibi_dst.utils import Logger, ParquetSaver
|
6
|
+
from sibi_dst.utils.data_wrapper import DataWrapper
|
7
|
+
|
8
|
+
|
9
|
+
class TestDataWrapper(unittest.TestCase):
|
10
|
+
|
11
|
+
def setUp(self):
|
12
|
+
self.dataclass = MagicMock()
|
13
|
+
self.date_field = "created_at"
|
14
|
+
self.data_path = "/path/to/data"
|
15
|
+
#self.data_path = "s3://your-bucket-name/path/to/data"
|
16
|
+
self.parquet_filename = "data.parquet"
|
17
|
+
self.start_date = "2022-01-01"
|
18
|
+
self.end_date = "2022-12-31"
|
19
|
+
self.filesystem_type = "file"
|
20
|
+
self.filesystem_options = {
|
21
|
+
#"key": "your_aws_access_key",
|
22
|
+
#"secret": "your_aws_secret_key",
|
23
|
+
#"client_kwargs": {"endpoint_url": "https://s3.amazonaws.com"}
|
24
|
+
}
|
25
|
+
self.logger = Logger.default_logger(logger_name="TestLogger")
|
26
|
+
|
27
|
+
def test_initialization(self):
|
28
|
+
wrapper = DataWrapper(
|
29
|
+
dataclass=self.dataclass,
|
30
|
+
date_field=self.date_field,
|
31
|
+
data_path=self.data_path,
|
32
|
+
parquet_filename=self.parquet_filename,
|
33
|
+
start_date=self.start_date,
|
34
|
+
end_date=self.end_date,
|
35
|
+
filesystem_type=self.filesystem_type,
|
36
|
+
filesystem_options=self.filesystem_options,
|
37
|
+
logger=self.logger
|
38
|
+
)
|
39
|
+
self.assertEqual(wrapper.dataclass, self.dataclass)
|
40
|
+
self.assertEqual(wrapper.date_field, self.date_field)
|
41
|
+
self.assertEqual(wrapper.data_path, "/path/to/data/")
|
42
|
+
self.assertEqual(wrapper.parquet_filename, self.parquet_filename)
|
43
|
+
self.assertEqual(wrapper.start_date, datetime.date(2022, 1, 1))
|
44
|
+
self.assertEqual(wrapper.end_date, datetime.date(2022, 12, 31))
|
45
|
+
self.assertEqual(wrapper.filesystem_type, self.filesystem_type)
|
46
|
+
self.assertEqual(wrapper.filesystem_options, self.filesystem_options)
|
47
|
+
self.assertEqual(wrapper.logger, self.logger)
|
48
|
+
|
49
|
+
def test_convert_to_date(self):
|
50
|
+
self.assertEqual(DataWrapper.convert_to_date("2022-01-01"), datetime.date(2022, 1, 1))
|
51
|
+
self.assertEqual(DataWrapper.convert_to_date(datetime.date(2022, 1, 1)), datetime.date(2022, 1, 1))
|
52
|
+
with self.assertRaises(ValueError):
|
53
|
+
DataWrapper.convert_to_date("invalid-date")
|
54
|
+
|
55
|
+
@patch('fsspec.filesystem')
|
56
|
+
def test_is_file_older_than(self, mock_filesystem):
|
57
|
+
mock_fs = mock_filesystem.return_value
|
58
|
+
mock_fs.info.return_value = {'mtime': (datetime.datetime.now() - datetime.timedelta(minutes=1500)).timestamp()}
|
59
|
+
|
60
|
+
wrapper = DataWrapper(
|
61
|
+
dataclass=self.dataclass,
|
62
|
+
date_field=self.date_field,
|
63
|
+
data_path=self.data_path,
|
64
|
+
parquet_filename=self.parquet_filename,
|
65
|
+
start_date=self.start_date,
|
66
|
+
end_date=self.end_date,
|
67
|
+
filesystem_type=self.filesystem_type,
|
68
|
+
filesystem_options=self.filesystem_options,
|
69
|
+
logger=self.logger
|
70
|
+
)
|
71
|
+
|
72
|
+
self.assertTrue(wrapper.is_file_older_than("some/file/path"))
|
73
|
+
mock_fs.info.return_value = {'mtime': (datetime.datetime.now() - datetime.timedelta(minutes=1000)).timestamp()}
|
74
|
+
self.assertFalse(wrapper.is_file_older_than("some/file/path"))
|
75
|
+
|
76
|
+
|
77
|
+
if __name__ == '__main__':
|
78
|
+
unittest.main()
|
@@ -125,7 +125,7 @@ class ClickHouseWriter:
|
|
125
125
|
"""
|
126
126
|
Writes the Dask DataFrame to a ClickHouse table partition by partition.
|
127
127
|
"""
|
128
|
-
if len(self.df.
|
128
|
+
if len(self.df.index) == 0:
|
129
129
|
self.logger.debug("No data found. Nothing written.")
|
130
130
|
return
|
131
131
|
|
sibi_dst/utils/data_utils.py
CHANGED
@@ -95,7 +95,7 @@ class DataUtils:
|
|
95
95
|
# Get unique IDs from source column
|
96
96
|
ids = df[source_col].dropna().unique()
|
97
97
|
# Compute if it's a Dask Series
|
98
|
-
if isinstance(ids, dd.
|
98
|
+
if isinstance(ids, dd.Series):
|
99
99
|
ids = ids.compute()
|
100
100
|
|
101
101
|
# Check if any IDs are found
|