sibi-dst 0.3.26__py3-none-any.whl → 0.3.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,122 @@
1
+
2
+ # HTML and CSS for the calendar button and popup
3
+ calendar_html = """
4
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/flatpickr/dist/flatpickr.min.css">
5
+ <script src="https://cdn.jsdelivr.net/npm/flatpickr"></script>
6
+
7
+ <style>
8
+ /* Style for the calendar button */
9
+ .calendar-btn {
10
+ background-color: white;
11
+ border: 1px solid gray;
12
+ border-radius: 3px;
13
+ padding: 5px;
14
+ font-size: 16px;
15
+ cursor: pointer;
16
+ position: fixed; /* Changed from absolute to fixed */
17
+ bottom: 50px; /* Adjust position relative to the viewport */
18
+ left: 10px; /* Adjust position relative to the viewport */
19
+ z-index: 10000; /* Ensure it stays on top of other elements */
20
+ }
21
+
22
+ /* Calendar popup with sufficient size */
23
+ .calendar-popup {
24
+ display: none;
25
+ position: fixed; /* Keep the popup fixed so it stays in view */
26
+ bottom: 100px;
27
+ left: 10px;
28
+ background-color: white;
29
+ padding: 10px;
30
+ border: 1px solid gray;
31
+ border-radius: 3px;
32
+ z-index: 10000; /* Ensure it stays on top of other elements */
33
+ width: 250px;
34
+ height: 300px;
35
+ }
36
+
37
+ /* Ensure the calendar fits properly */
38
+ #calendar {
39
+ width: 100%;
40
+ height: auto;
41
+ }
42
+ </style>
43
+
44
+ <!-- Calendar Button -->
45
+ <div class="calendar-btn">📅 Select Date</div>
46
+
47
+ <!-- Calendar Popup -->
48
+ <div class="calendar-popup" id="calendar-popup">
49
+ <div id="calendar"></div>
50
+ </div>
51
+
52
+ <script>
53
+ // Initialize Flatpickr calendar
54
+ const today = new Date().toISOString().split('T')[0];
55
+ // Function to show the "Please wait" message
56
+ function showLoadingMessage() {
57
+ let loadingMessage = document.createElement("div");
58
+ loadingMessage.id = "loading-message";
59
+ loadingMessage.style.position = "fixed";
60
+ loadingMessage.style.top = "50%";
61
+ loadingMessage.style.left = "50%";
62
+ loadingMessage.style.transform = "translate(-50%, -50%)";
63
+ loadingMessage.style.backgroundColor = "rgba(0, 0, 0, 0.8)";
64
+ loadingMessage.style.color = "white";
65
+ loadingMessage.style.padding = "20px";
66
+ loadingMessage.style.borderRadius = "5px";
67
+ loadingMessage.style.zIndex = "9999";
68
+ loadingMessage.innerText = "Please wait...";
69
+ document.body.appendChild(loadingMessage);
70
+ }
71
+
72
+ // Function to remove the "Please wait" message
73
+ function removeLoadingMessage() {
74
+ let loadingMessage = document.getElementById("loading-message");
75
+ if (loadingMessage) {
76
+ loadingMessage.remove();
77
+ }
78
+ }
79
+
80
+
81
+ flatpickr("#calendar", {
82
+ inline: true, // Render the calendar inline within the container
83
+ maxDate: today, // Disable future dates
84
+ onChange: function(selectedDates, dateStr, instance) {
85
+ console.log("Selected date: " + dateStr); // Debugging: Log the selected date
86
+ // Get the current URL and create a URL object to manipulate the query parameters
87
+ // Get the current URL from the parent window
88
+ showLoadingMessage();
89
+ let currentUrl = window.parent.location.href;
90
+
91
+ // If the URL contains "srcdoc", remove it and use the correct base path
92
+ if (currentUrl.includes("srcdoc")) {
93
+ currentUrl = currentUrl.replace("srcdoc", "");
94
+ }
95
+
96
+ const url = new URL(currentUrl);
97
+
98
+ // Set or update the 'date' parameter while preserving existing parameters
99
+ url.searchParams.set('date', dateStr);
100
+
101
+ console.log("Updated URL: " + url.toString()); // Debugging: Log the updated URL
102
+
103
+ // Update the parent window's location with the new URL
104
+ window.parent.location.href = url.toString();
105
+ }
106
+ });
107
+ // Remove the "Please wait" message once the page has finished loading
108
+ window.addEventListener("load", function() {
109
+ removeLoadingMessage();
110
+ });
111
+
112
+ // Toggle the calendar popup when the button is clicked
113
+ document.querySelector(".calendar-btn").addEventListener("click", function() {
114
+ var popup = document.getElementById("calendar-popup");
115
+ if (popup.style.display === "none" || popup.style.display === "") {
116
+ popup.style.display = "block";
117
+ } else {
118
+ popup.style.display = "none";
119
+ }
120
+ });
121
+ </script>
122
+ """
@@ -0,0 +1,186 @@
1
+ from __future__ import annotations
2
+ from sibi_dst.osmnx_helper.utils import get_distance_between_points, add_arrows
3
+ from collections import defaultdict
4
+ import folium
5
+ from folium.plugins import AntPath
6
+ import networkx as nx
7
+
8
+ from sibi_dst.osmnx_helper import BaseOsmMap
9
+ from sibi_dst.osmnx_helper.basemaps.calendar_html import calendar_html
10
+
11
+ class RoutePlotter(BaseOsmMap):
12
+ def __init__(self, osmnx_graph, df, **kwargs):
13
+ self.action_field = kwargs.pop('action_field', '')
14
+ self.action_groups = kwargs.pop('action_groups', {})
15
+ self.action_styles = kwargs.pop('action_styles', {})
16
+ self.use_ant_path = kwargs.pop('use_ant_path', True)
17
+ self.show_calendar = kwargs.pop('show_calendar', True)
18
+ self.show_map_title = kwargs.pop('show_map_title', True)
19
+ self.sort_keys = kwargs.pop('sort_keys', None)
20
+ self.main_route_layer = folium.FeatureGroup(name="Main Route")
21
+ self.feature_groups = {}
22
+ self.feature_group_counts = {}
23
+ self.total_distance = 0.0
24
+ self.actions = []
25
+ self.action_group_counts = {action_group: 0 for action_group in self.action_groups}
26
+ self.marker_count = 1
27
+ kwargs.update({'calc_nearest_nodes': True})
28
+ kwargs['dt_field'] = 'date_time'
29
+ super().__init__(osmnx_graph, df, **kwargs)
30
+
31
+ def pre_process_map(self):
32
+ super().pre_process_map()
33
+ self.actions = self.df[self.action_field].tolist()
34
+
35
+ def process_map(self):
36
+ self._calculate_routes()
37
+ self._plot_routes()
38
+ self._add_markers()
39
+ self.main_route_layer.add_to(self.osm_map)
40
+ if self.show_calendar:
41
+ self._add_calendar()
42
+
43
+ def _calculate_routes(self):
44
+ if self.verbose:
45
+ print("Calculating routes and markers...")
46
+ distances = [
47
+ get_distance_between_points(tuple(self.gps_points[0]), tuple(coord), 'm')
48
+ for coord in self.gps_points
49
+ ]
50
+ self.max_distance_index = distances.index(max(distances))
51
+ self.max_time_index = self.dt.index(max(self.dt))
52
+ self.route_polylines = []
53
+ self.markers = defaultdict(list) # Store markers for action groups
54
+ for i in range(len(self.gps_points) - 1):
55
+ polyline, color, markers = self._calculate_route(i)
56
+ if polyline:
57
+ self.route_polylines.append((polyline, color))
58
+ for action_group, action_markers in markers.items():
59
+ self.markers[action_group].extend(action_markers)
60
+ self.action_group_counts[action_group] += 1
61
+ self.marker_count += 1
62
+ if self.verbose:
63
+ print("Route and marker calculation complete.")
64
+
65
+ for action_group in self.action_groups:
66
+ count = self.action_group_counts[action_group]
67
+ self.feature_groups[action_group] = folium.FeatureGroup(name=f"{action_group} ({count})").add_to(
68
+ self.osm_map)
69
+ self.osm_map.add_child(self.feature_groups[action_group])
70
+
71
+ def _calculate_route(self, i):
72
+ if self.verbose:
73
+ print(f"Calculating for item:{i}")
74
+ orig = self.nearest_nodes[i]
75
+ dest = self.nearest_nodes[i + 1]
76
+ try:
77
+ route = nx.shortest_path(self.G, orig, dest, weight='length')
78
+ route_length = sum(self.G[u][v][0]['length'] for u, v in zip(route[:-1], route[1:]))
79
+ self.total_distance += route_length
80
+ offset = 0 if i < self.max_distance_index else 0.0005
81
+ lats, lons = zip(*[(self.G.nodes[node]['y'] + offset, self.G.nodes[node]['x']) for node in route])
82
+ color = 'blue' if i < self.max_distance_index else 'red'
83
+ polyline = list(zip(lats, lons))
84
+ markers = self._calculate_markers(i)
85
+ return polyline, color, markers
86
+ except nx.NetworkXNoPath:
87
+ if self.verbose:
88
+ print(f"Item:{i}-No path found for {orig} to {dest}")
89
+ return None, None, {}
90
+ except nx.NodeNotFound:
91
+ if self.verbose:
92
+ print(f"Item:{i}-No path found for {orig} to {dest}")
93
+ return None, None, {}
94
+
95
+ def _calculate_markers(self, i):
96
+ # Calculate markers for action groups
97
+ markers = defaultdict(list)
98
+ for action_group in self.action_groups:
99
+ action_indices = [idx for idx, action in enumerate(self.actions) if action == action_group]
100
+ for idx in action_indices:
101
+ if idx == i:
102
+ location = self.gps_points[i]
103
+ tooltip = f"Result {self.marker_count}: {action_group}<br>Date/time:{self.dt[i]}"
104
+ popup_data = self._get_data(i)
105
+ action_style = self.action_styles.get(action_group,
106
+ {'color': 'blue', 'icon': 'marker', 'prefix': 'fa'})
107
+ markers[action_group].append((location, tooltip, popup_data, action_style))
108
+ return markers
109
+
110
+ def _plot_routes(self):
111
+ if self.verbose:
112
+ print("Plotting routes and markers...")
113
+ # self.action_group_counts = {action_group: 0 for action_group in self.feature_groups.keys()}
114
+ for polyline, color in self.route_polylines:
115
+ if self.use_ant_path:
116
+ AntPath(
117
+ locations=polyline,
118
+ color=color,
119
+ weight=3, # Increase line thickness
120
+ opacity=10, # Increase opacity
121
+ # pulse_color=color,
122
+ delay=1000, # Slower animation to reduce flickering
123
+ # dash_array=[20, 30] # Adjust dash pattern if needed
124
+ ).add_to(self.main_route_layer)
125
+ else:
126
+ folium.PolyLine(locations=polyline, color=color).add_to(self.main_route_layer)
127
+ self.osm_map = add_arrows(self.osm_map, polyline, color, n_arrows=3)
128
+ # Plot markers for action groups
129
+ for action_group, action_markers in self.markers.items():
130
+ for location, tooltip, popup_data, action_style in action_markers:
131
+ folium.Marker(
132
+ location=location,
133
+ popup=folium.Popup(popup_data, max_width=600),
134
+ tooltip=tooltip,
135
+ icon=folium.Icon(
136
+ icon=action_style.get("icon"),
137
+ color=action_style.get("color"),
138
+ prefix=action_style.get("prefix")
139
+ )
140
+ ).add_to(self.feature_groups[action_group])
141
+
142
+ if self.verbose:
143
+ print("Route and marker plotting complete.")
144
+
145
+ def _add_markers(self):
146
+ if self.verbose:
147
+ print("Adding markers...")
148
+ # Add start marker
149
+ start_popup = folium.Popup(f"Start of route at {self.dt[0]}", max_width=300)
150
+ folium.Marker(location=self.gps_points[0], popup=start_popup,
151
+ icon=folium.Icon(icon='flag-checkered', prefix='fa')).add_to(self.osm_map)
152
+ # Add total distance marker at the end
153
+ folium.Marker(
154
+ self.gps_points[-1],
155
+ popup=f"End of Route at {self.dt[self.max_time_index]}. Total Distance Travelled: {self.total_distance / 1000:.2f} km",
156
+ icon=folium.Icon(color="red", icon="flag-checkered", prefix="fa")
157
+ ).add_to(self.osm_map)
158
+ if self.verbose:
159
+ print("Marker addition complete.")
160
+
161
+ def _add_calendar(self):
162
+ calendar_element = folium.Element(calendar_html)
163
+ self.osm_map.get_root().html.add_child(calendar_element)
164
+
165
+ def _add_map_title(self):
166
+ if self.map_html_title and self.show_map_title:
167
+ title_html = f'''
168
+ <div style="position: fixed;
169
+ top: 10px;
170
+ left: 50%;
171
+ transform: translate(-50%, 0%);
172
+ z-index: 9999;
173
+ font-size: 24px;
174
+ font-weight: bold;
175
+ background-color: white;
176
+ padding: 10px;
177
+ border: 2px solid black;
178
+ border-radius: 5px;">
179
+ {self.map_html_title}
180
+ </div>
181
+ '''
182
+ self.osm_map.get_root().html.add_child(folium.Element(title_html))
183
+
184
+ def _get_data(self, index):
185
+ # implement in subclass to populate popups
186
+ ...
@@ -0,0 +1,267 @@
1
+ import math
2
+ import os
3
+ import pickle
4
+ from urllib.parse import urlencode, urlsplit, urlunsplit
5
+
6
+ import folium
7
+ import geopandas as gpd
8
+ import numpy as np
9
+ import osmnx as ox
10
+ from geopy.distance import geodesic
11
+
12
+
13
+ #
14
+ # options = {
15
+ # 'ox_files_save_path': ox_files_save_path,
16
+ # 'network_type': 'drive',
17
+ # 'place': 'Costa Rica',
18
+ # 'files_prefix': 'costa-rica-',
19
+ # }
20
+ # Usage example
21
+ # handler = PBFHandler(**options)
22
+ # handler.load()
23
+
24
+
25
+ class PBFHandler:
26
+ def __init__(self, **kwargs):
27
+ self.graph = None
28
+ self.nodes = None
29
+ self.edges = None
30
+ self.rebuild = kwargs.setdefault("rebuild", False)
31
+ self.verbose = kwargs.setdefault("verbose", False)
32
+ self.place = kwargs.setdefault('place', 'Costa Rica')
33
+ self.filepath = kwargs.setdefault('ox_files_save_path', "gis_data/")
34
+ self.file_prefix = kwargs.setdefault('file_prefix', 'costa-rica-')
35
+ self.network_type = kwargs.setdefault('network_type', 'all')
36
+ self.graph_file = f"{self.filepath}{self.file_prefix}graph.pkl"
37
+ self.node_file = f"{self.filepath}{self.file_prefix}nodes.pkl"
38
+ self.edge_file = f"{self.filepath}{self.file_prefix}edges.pkl"
39
+
40
+ def load(self):
41
+ if self.verbose:
42
+ print("Loading data...")
43
+
44
+ files_to_check = [self.graph_file, self.node_file, self.edge_file]
45
+
46
+ if self.rebuild:
47
+ for file in files_to_check:
48
+ if os.path.exists(file):
49
+ os.remove(file)
50
+ if not os.path.exists(self.filepath):
51
+ os.makedirs(self.filepath, exist_ok=True)
52
+ # self.process_pbf()
53
+ # self.save_to_pickle()
54
+ if not all(os.path.exists(f) for f in files_to_check):
55
+ self.process_pbf()
56
+ self.save_to_pickle()
57
+ else:
58
+ self.load_from_pickle()
59
+
60
+ if self.verbose:
61
+ print("Data loaded successfully.")
62
+
63
+ def process_pbf(self):
64
+ """
65
+ Load a PBF file and create a graph.
66
+ """
67
+ try:
68
+ if self.verbose:
69
+ print(f"Processing PBF for {self.place}...")
70
+
71
+ self.graph = ox.graph_from_place(self.place, network_type=self.network_type)
72
+ self.nodes, self.edges = ox.graph_to_gdfs(self.graph)
73
+
74
+ if self.verbose:
75
+ print("PBF processed successfully.")
76
+ except Exception as e:
77
+ print(f"Error processing PBF: {e}")
78
+ raise
79
+
80
+ def save_to_pickle(self):
81
+ """
82
+ Save the graph, nodes, and edges to pickle files.
83
+ """
84
+ try:
85
+ if self.verbose:
86
+ print("Saving data to pickle files...")
87
+
88
+ data_to_save = {
89
+ self.graph_file: self.graph,
90
+ self.node_file: self.nodes,
91
+ self.edge_file: self.edges
92
+ }
93
+
94
+ for file, data in data_to_save.items():
95
+ if data is not None:
96
+ with open(file, 'wb') as f:
97
+ pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
98
+
99
+ if self.verbose:
100
+ print("Data saved to pickle files successfully.")
101
+ except Exception as e:
102
+ print(f"Error saving to pickle: {e}")
103
+ raise
104
+
105
+ def load_from_pickle(self):
106
+ """
107
+ Load the graph, nodes, and edges from pickle files.
108
+ """
109
+ try:
110
+ if self.verbose:
111
+ print("Loading data from pickle files...")
112
+
113
+ files_to_load = {
114
+ self.graph_file: 'graph',
115
+ self.node_file: 'nodes',
116
+ self.edge_file: 'edges'
117
+ }
118
+
119
+ for file, attr in files_to_load.items():
120
+ with open(file, 'rb') as f:
121
+ setattr(self, attr, pickle.load(f))
122
+
123
+ if self.verbose:
124
+ print("Data loaded from pickle files successfully.")
125
+ except Exception as e:
126
+ print(f"Error loading from pickle: {e}")
127
+ raise
128
+
129
+ def plot_graph(self):
130
+ """
131
+ Plot the graph.
132
+ """
133
+ try:
134
+ if self.graph is not None:
135
+ if self.verbose:
136
+ print("Plotting the graph...")
137
+ ox.plot_graph(self.graph)
138
+ if self.verbose:
139
+ print("Graph plotted successfully.")
140
+ else:
141
+ print("Graph is not loaded. Please load a PBF file first.")
142
+ except Exception as e:
143
+ print(f"Error plotting the graph: {e}")
144
+ raise
145
+
146
+
147
+ def get_bounding_box_from_points(gps_points, margin=0.001):
148
+ latitudes = [point[0] for point in gps_points]
149
+ longitudes = [point[1] for point in gps_points]
150
+
151
+ north = max(latitudes) + margin
152
+ south = min(latitudes) - margin
153
+ east = max(longitudes) + margin
154
+ west = min(longitudes) - margin
155
+
156
+ return north, south, east, west
157
+
158
+
159
+ def add_arrows(map_object, locations, color, n_arrows):
160
+ # Get the number of locations
161
+ n = len(locations)
162
+
163
+ # If there are more than two points...
164
+ if n > 2:
165
+ # Add arrows along the path
166
+ for i in range(0, n - 1, n // n_arrows):
167
+ # Get the start and end point for this segment
168
+ start, end = locations[i], locations[i + 1]
169
+
170
+ # Calculate the direction in which to place the arrow
171
+ rotation = -np.arctan2((end[1] - start[1]), (end[0] - start[0])) * 180 / np.pi
172
+
173
+ folium.RegularPolygonMarker(location=end,
174
+ fill_color=color,
175
+ number_of_sides=2,
176
+ radius=6,
177
+ rotation=rotation).add_to(map_object)
178
+ return map_object
179
+
180
+
181
+ def extract_subgraph(G, north, south, east, west):
182
+ # Create a bounding box polygon
183
+ # from osmnx v2 this is how it is done
184
+ if ox.__version__ >= '2.0':
185
+ bbox_poly = gpd.GeoSeries([ox.utils_geo.bbox_to_poly(bbox=(west, south, east, north))])
186
+ else:
187
+ bbox_poly = gpd.GeoSeries([ox.utils_geo.bbox_to_poly(north, south, east, west)])
188
+
189
+ # Get nodes GeoDataFrame
190
+ nodes_gdf = ox.graph_to_gdfs(G, nodes=True, edges=False)
191
+
192
+ # Find nodes within the bounding box
193
+ nodes_within_bbox = nodes_gdf[nodes_gdf.geometry.within(bbox_poly.geometry.unary_union)]
194
+
195
+ # Create subgraph
196
+ subgraph = G.subgraph(nodes_within_bbox.index)
197
+
198
+ return subgraph
199
+
200
+
201
+ def get_distance_between_points(point_a, point_b, unit='km'):
202
+ if not isinstance(point_a, tuple) or len(point_a) != 2:
203
+ return 0
204
+ if not all(isinstance(x, float) and not math.isnan(x) for x in point_a):
205
+ return 0
206
+ if not isinstance(point_b, tuple) or len(point_b) != 2:
207
+ return 0
208
+ if not all(isinstance(x, float) and not math.isnan(x) for x in point_b):
209
+ return 0
210
+ distance = geodesic(point_a, point_b)
211
+ if unit == 'km':
212
+ return distance.kilometers
213
+ elif unit == 'm':
214
+ return distance.meters
215
+ elif unit == 'mi':
216
+ return distance.miles
217
+ else:
218
+ return 0
219
+
220
+
221
+ tile_options = {
222
+ "OpenStreetMap": "OpenStreetMap",
223
+ "CartoDB": "cartodbpositron",
224
+ "CartoDB Voyager": "cartodbvoyager"
225
+ }
226
+
227
+
228
+ def attach_supported_tiles(map_object, default_tile="OpenStreetMap"):
229
+ # Normalize the default tile name to lowercase for comparison
230
+ normalized_default_tile = default_tile.lower()
231
+
232
+ # Filter out the default tile layer from the options to avoid duplication
233
+ tile_options_filtered = {k: v for k, v in tile_options.items() if v.lower() != normalized_default_tile}
234
+
235
+ for tile, description in tile_options_filtered.items():
236
+ folium.TileLayer(name=tile, tiles=description, show=False).add_to(map_object)
237
+
238
+
239
+ def get_graph(**options):
240
+ handler = PBFHandler(**options)
241
+ handler.load()
242
+ return handler.graph, handler.nodes, handler.edges
243
+
244
+
245
+ def add_query_params(url, params):
246
+ # Parse the original URL
247
+ url_components = urlsplit(url)
248
+
249
+ # Parse original query parameters and update with new params
250
+ original_params = dict([tuple(pair.split('=')) for pair in url_components.query.split('&') if pair])
251
+ original_params.update(params)
252
+
253
+ # Construct the new query string
254
+ new_query_string = urlencode(original_params)
255
+
256
+ # Construct the new URL
257
+ new_url = urlunsplit((
258
+ url_components.scheme,
259
+ url_components.netloc,
260
+ url_components.path,
261
+ new_query_string,
262
+ url_components.fragment
263
+ ))
264
+
265
+ return new_url
266
+
267
+
File without changes
@@ -0,0 +1,78 @@
1
+ import unittest
2
+ from unittest.mock import patch, MagicMock
3
+ import datetime
4
+ import pandas as pd
5
+ from sibi_dst.utils import Logger, ParquetSaver
6
+ from sibi_dst.utils.data_wrapper import DataWrapper
7
+
8
+
9
+ class TestDataWrapper(unittest.TestCase):
10
+
11
+ def setUp(self):
12
+ self.dataclass = MagicMock()
13
+ self.date_field = "created_at"
14
+ self.data_path = "/path/to/data"
15
+ #self.data_path = "s3://your-bucket-name/path/to/data"
16
+ self.parquet_filename = "data.parquet"
17
+ self.start_date = "2022-01-01"
18
+ self.end_date = "2022-12-31"
19
+ self.filesystem_type = "file"
20
+ self.filesystem_options = {
21
+ #"key": "your_aws_access_key",
22
+ #"secret": "your_aws_secret_key",
23
+ #"client_kwargs": {"endpoint_url": "https://s3.amazonaws.com"}
24
+ }
25
+ self.logger = Logger.default_logger(logger_name="TestLogger")
26
+
27
+ def test_initialization(self):
28
+ wrapper = DataWrapper(
29
+ dataclass=self.dataclass,
30
+ date_field=self.date_field,
31
+ data_path=self.data_path,
32
+ parquet_filename=self.parquet_filename,
33
+ start_date=self.start_date,
34
+ end_date=self.end_date,
35
+ filesystem_type=self.filesystem_type,
36
+ filesystem_options=self.filesystem_options,
37
+ logger=self.logger
38
+ )
39
+ self.assertEqual(wrapper.dataclass, self.dataclass)
40
+ self.assertEqual(wrapper.date_field, self.date_field)
41
+ self.assertEqual(wrapper.data_path, "/path/to/data/")
42
+ self.assertEqual(wrapper.parquet_filename, self.parquet_filename)
43
+ self.assertEqual(wrapper.start_date, datetime.date(2022, 1, 1))
44
+ self.assertEqual(wrapper.end_date, datetime.date(2022, 12, 31))
45
+ self.assertEqual(wrapper.filesystem_type, self.filesystem_type)
46
+ self.assertEqual(wrapper.filesystem_options, self.filesystem_options)
47
+ self.assertEqual(wrapper.logger, self.logger)
48
+
49
+ def test_convert_to_date(self):
50
+ self.assertEqual(DataWrapper.convert_to_date("2022-01-01"), datetime.date(2022, 1, 1))
51
+ self.assertEqual(DataWrapper.convert_to_date(datetime.date(2022, 1, 1)), datetime.date(2022, 1, 1))
52
+ with self.assertRaises(ValueError):
53
+ DataWrapper.convert_to_date("invalid-date")
54
+
55
+ @patch('fsspec.filesystem')
56
+ def test_is_file_older_than(self, mock_filesystem):
57
+ mock_fs = mock_filesystem.return_value
58
+ mock_fs.info.return_value = {'mtime': (datetime.datetime.now() - datetime.timedelta(minutes=1500)).timestamp()}
59
+
60
+ wrapper = DataWrapper(
61
+ dataclass=self.dataclass,
62
+ date_field=self.date_field,
63
+ data_path=self.data_path,
64
+ parquet_filename=self.parquet_filename,
65
+ start_date=self.start_date,
66
+ end_date=self.end_date,
67
+ filesystem_type=self.filesystem_type,
68
+ filesystem_options=self.filesystem_options,
69
+ logger=self.logger
70
+ )
71
+
72
+ self.assertTrue(wrapper.is_file_older_than("some/file/path"))
73
+ mock_fs.info.return_value = {'mtime': (datetime.datetime.now() - datetime.timedelta(minutes=1000)).timestamp()}
74
+ self.assertFalse(wrapper.is_file_older_than("some/file/path"))
75
+
76
+
77
+ if __name__ == '__main__':
78
+ unittest.main()
@@ -125,7 +125,7 @@ class ClickHouseWriter:
125
125
  """
126
126
  Writes the Dask DataFrame to a ClickHouse table partition by partition.
127
127
  """
128
- if len(self.df.head().index) == 0:
128
+ if len(self.df.index) == 0:
129
129
  self.logger.debug("No data found. Nothing written.")
130
130
  return
131
131
 
@@ -95,7 +95,7 @@ class DataUtils:
95
95
  # Get unique IDs from source column
96
96
  ids = df[source_col].dropna().unique()
97
97
  # Compute if it's a Dask Series
98
- if isinstance(ids, dd.core.Series):
98
+ if isinstance(ids, dd.Series):
99
99
  ids = ids.compute()
100
100
 
101
101
  # Check if any IDs are found