sibi-dst 0.3.27__py3-none-any.whl → 0.3.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,267 @@
1
+ import math
2
+ import os
3
+ import pickle
4
+ from urllib.parse import urlencode, urlsplit, urlunsplit
5
+
6
+ import folium
7
+ import geopandas as gpd
8
+ import numpy as np
9
+ import osmnx as ox
10
+ from geopy.distance import geodesic
11
+
12
+
13
+ #
14
+ # options = {
15
+ # 'ox_files_save_path': ox_files_save_path,
16
+ # 'network_type': 'drive',
17
+ # 'place': 'Costa Rica',
18
+ # 'files_prefix': 'costa-rica-',
19
+ # }
20
+ # Usage example
21
+ # handler = PBFHandler(**options)
22
+ # handler.load()
23
+
24
+
25
+ class PBFHandler:
26
+ def __init__(self, **kwargs):
27
+ self.graph = None
28
+ self.nodes = None
29
+ self.edges = None
30
+ self.rebuild = kwargs.setdefault("rebuild", False)
31
+ self.verbose = kwargs.setdefault("verbose", False)
32
+ self.place = kwargs.setdefault('place', 'Costa Rica')
33
+ self.filepath = kwargs.setdefault('ox_files_save_path', "gis_data/")
34
+ self.file_prefix = kwargs.setdefault('file_prefix', 'costa-rica-')
35
+ self.network_type = kwargs.setdefault('network_type', 'all')
36
+ self.graph_file = f"{self.filepath}{self.file_prefix}graph.pkl"
37
+ self.node_file = f"{self.filepath}{self.file_prefix}nodes.pkl"
38
+ self.edge_file = f"{self.filepath}{self.file_prefix}edges.pkl"
39
+
40
+ def load(self):
41
+ if self.verbose:
42
+ print("Loading data...")
43
+
44
+ files_to_check = [self.graph_file, self.node_file, self.edge_file]
45
+
46
+ if self.rebuild:
47
+ for file in files_to_check:
48
+ if os.path.exists(file):
49
+ os.remove(file)
50
+ if not os.path.exists(self.filepath):
51
+ os.makedirs(self.filepath, exist_ok=True)
52
+ # self.process_pbf()
53
+ # self.save_to_pickle()
54
+ if not all(os.path.exists(f) for f in files_to_check):
55
+ self.process_pbf()
56
+ self.save_to_pickle()
57
+ else:
58
+ self.load_from_pickle()
59
+
60
+ if self.verbose:
61
+ print("Data loaded successfully.")
62
+
63
+ def process_pbf(self):
64
+ """
65
+ Load a PBF file and create a graph.
66
+ """
67
+ try:
68
+ if self.verbose:
69
+ print(f"Processing PBF for {self.place}...")
70
+
71
+ self.graph = ox.graph_from_place(self.place, network_type=self.network_type)
72
+ self.nodes, self.edges = ox.graph_to_gdfs(self.graph)
73
+
74
+ if self.verbose:
75
+ print("PBF processed successfully.")
76
+ except Exception as e:
77
+ print(f"Error processing PBF: {e}")
78
+ raise
79
+
80
+ def save_to_pickle(self):
81
+ """
82
+ Save the graph, nodes, and edges to pickle files.
83
+ """
84
+ try:
85
+ if self.verbose:
86
+ print("Saving data to pickle files...")
87
+
88
+ data_to_save = {
89
+ self.graph_file: self.graph,
90
+ self.node_file: self.nodes,
91
+ self.edge_file: self.edges
92
+ }
93
+
94
+ for file, data in data_to_save.items():
95
+ if data is not None:
96
+ with open(file, 'wb') as f:
97
+ pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
98
+
99
+ if self.verbose:
100
+ print("Data saved to pickle files successfully.")
101
+ except Exception as e:
102
+ print(f"Error saving to pickle: {e}")
103
+ raise
104
+
105
+ def load_from_pickle(self):
106
+ """
107
+ Load the graph, nodes, and edges from pickle files.
108
+ """
109
+ try:
110
+ if self.verbose:
111
+ print("Loading data from pickle files...")
112
+
113
+ files_to_load = {
114
+ self.graph_file: 'graph',
115
+ self.node_file: 'nodes',
116
+ self.edge_file: 'edges'
117
+ }
118
+
119
+ for file, attr in files_to_load.items():
120
+ with open(file, 'rb') as f:
121
+ setattr(self, attr, pickle.load(f))
122
+
123
+ if self.verbose:
124
+ print("Data loaded from pickle files successfully.")
125
+ except Exception as e:
126
+ print(f"Error loading from pickle: {e}")
127
+ raise
128
+
129
+ def plot_graph(self):
130
+ """
131
+ Plot the graph.
132
+ """
133
+ try:
134
+ if self.graph is not None:
135
+ if self.verbose:
136
+ print("Plotting the graph...")
137
+ ox.plot_graph(self.graph)
138
+ if self.verbose:
139
+ print("Graph plotted successfully.")
140
+ else:
141
+ print("Graph is not loaded. Please load a PBF file first.")
142
+ except Exception as e:
143
+ print(f"Error plotting the graph: {e}")
144
+ raise
145
+
146
+
147
+ def get_bounding_box_from_points(gps_points, margin=0.001):
148
+ latitudes = [point[0] for point in gps_points]
149
+ longitudes = [point[1] for point in gps_points]
150
+
151
+ north = max(latitudes) + margin
152
+ south = min(latitudes) - margin
153
+ east = max(longitudes) + margin
154
+ west = min(longitudes) - margin
155
+
156
+ return north, south, east, west
157
+
158
+
159
+ def add_arrows(map_object, locations, color, n_arrows):
160
+ # Get the number of locations
161
+ n = len(locations)
162
+
163
+ # If there are more than two points...
164
+ if n > 2:
165
+ # Add arrows along the path
166
+ for i in range(0, n - 1, n // n_arrows):
167
+ # Get the start and end point for this segment
168
+ start, end = locations[i], locations[i + 1]
169
+
170
+ # Calculate the direction in which to place the arrow
171
+ rotation = -np.arctan2((end[1] - start[1]), (end[0] - start[0])) * 180 / np.pi
172
+
173
+ folium.RegularPolygonMarker(location=end,
174
+ fill_color=color,
175
+ number_of_sides=2,
176
+ radius=6,
177
+ rotation=rotation).add_to(map_object)
178
+ return map_object
179
+
180
+
181
+ def extract_subgraph(G, north, south, east, west):
182
+ # Create a bounding box polygon
183
+ # from osmnx v2 this is how it is done
184
+ if ox.__version__ >= '2.0':
185
+ bbox_poly = gpd.GeoSeries([ox.utils_geo.bbox_to_poly(bbox=(west, south, east, north))])
186
+ else:
187
+ bbox_poly = gpd.GeoSeries([ox.utils_geo.bbox_to_poly(north, south, east, west)])
188
+
189
+ # Get nodes GeoDataFrame
190
+ nodes_gdf = ox.graph_to_gdfs(G, nodes=True, edges=False)
191
+
192
+ # Find nodes within the bounding box
193
+ nodes_within_bbox = nodes_gdf[nodes_gdf.geometry.within(bbox_poly.geometry.unary_union)]
194
+
195
+ # Create subgraph
196
+ subgraph = G.subgraph(nodes_within_bbox.index)
197
+
198
+ return subgraph
199
+
200
+
201
+ def get_distance_between_points(point_a, point_b, unit='km'):
202
+ if not isinstance(point_a, tuple) or len(point_a) != 2:
203
+ return 0
204
+ if not all(isinstance(x, float) and not math.isnan(x) for x in point_a):
205
+ return 0
206
+ if not isinstance(point_b, tuple) or len(point_b) != 2:
207
+ return 0
208
+ if not all(isinstance(x, float) and not math.isnan(x) for x in point_b):
209
+ return 0
210
+ distance = geodesic(point_a, point_b)
211
+ if unit == 'km':
212
+ return distance.kilometers
213
+ elif unit == 'm':
214
+ return distance.meters
215
+ elif unit == 'mi':
216
+ return distance.miles
217
+ else:
218
+ return 0
219
+
220
+
221
+ tile_options = {
222
+ "OpenStreetMap": "OpenStreetMap",
223
+ "CartoDB": "cartodbpositron",
224
+ "CartoDB Voyager": "cartodbvoyager"
225
+ }
226
+
227
+
228
+ def attach_supported_tiles(map_object, default_tile="OpenStreetMap"):
229
+ # Normalize the default tile name to lowercase for comparison
230
+ normalized_default_tile = default_tile.lower()
231
+
232
+ # Filter out the default tile layer from the options to avoid duplication
233
+ tile_options_filtered = {k: v for k, v in tile_options.items() if v.lower() != normalized_default_tile}
234
+
235
+ for tile, description in tile_options_filtered.items():
236
+ folium.TileLayer(name=tile, tiles=description, show=False).add_to(map_object)
237
+
238
+
239
+ def get_graph(**options):
240
+ handler = PBFHandler(**options)
241
+ handler.load()
242
+ return handler.graph, handler.nodes, handler.edges
243
+
244
+
245
+ def add_query_params(url, params):
246
+ # Parse the original URL
247
+ url_components = urlsplit(url)
248
+
249
+ # Parse original query parameters and update with new params
250
+ original_params = dict([tuple(pair.split('=')) for pair in url_components.query.split('&') if pair])
251
+ original_params.update(params)
252
+
253
+ # Construct the new query string
254
+ new_query_string = urlencode(original_params)
255
+
256
+ # Construct the new URL
257
+ new_url = urlunsplit((
258
+ url_components.scheme,
259
+ url_components.netloc,
260
+ url_components.path,
261
+ new_query_string,
262
+ url_components.fragment
263
+ ))
264
+
265
+ return new_url
266
+
267
+
File without changes
@@ -0,0 +1,78 @@
1
+ import unittest
2
+ from unittest.mock import patch, MagicMock
3
+ import datetime
4
+ import pandas as pd
5
+ from sibi_dst.utils import Logger, ParquetSaver
6
+ from sibi_dst.utils.data_wrapper import DataWrapper
7
+
8
+
9
+ class TestDataWrapper(unittest.TestCase):
10
+
11
+ def setUp(self):
12
+ self.dataclass = MagicMock()
13
+ self.date_field = "created_at"
14
+ self.data_path = "/path/to/data"
15
+ #self.data_path = "s3://your-bucket-name/path/to/data"
16
+ self.parquet_filename = "data.parquet"
17
+ self.start_date = "2022-01-01"
18
+ self.end_date = "2022-12-31"
19
+ self.filesystem_type = "file"
20
+ self.filesystem_options = {
21
+ #"key": "your_aws_access_key",
22
+ #"secret": "your_aws_secret_key",
23
+ #"client_kwargs": {"endpoint_url": "https://s3.amazonaws.com"}
24
+ }
25
+ self.logger = Logger.default_logger(logger_name="TestLogger")
26
+
27
+ def test_initialization(self):
28
+ wrapper = DataWrapper(
29
+ dataclass=self.dataclass,
30
+ date_field=self.date_field,
31
+ data_path=self.data_path,
32
+ parquet_filename=self.parquet_filename,
33
+ start_date=self.start_date,
34
+ end_date=self.end_date,
35
+ filesystem_type=self.filesystem_type,
36
+ filesystem_options=self.filesystem_options,
37
+ logger=self.logger
38
+ )
39
+ self.assertEqual(wrapper.dataclass, self.dataclass)
40
+ self.assertEqual(wrapper.date_field, self.date_field)
41
+ self.assertEqual(wrapper.data_path, "/path/to/data/")
42
+ self.assertEqual(wrapper.parquet_filename, self.parquet_filename)
43
+ self.assertEqual(wrapper.start_date, datetime.date(2022, 1, 1))
44
+ self.assertEqual(wrapper.end_date, datetime.date(2022, 12, 31))
45
+ self.assertEqual(wrapper.filesystem_type, self.filesystem_type)
46
+ self.assertEqual(wrapper.filesystem_options, self.filesystem_options)
47
+ self.assertEqual(wrapper.logger, self.logger)
48
+
49
+ def test_convert_to_date(self):
50
+ self.assertEqual(DataWrapper.convert_to_date("2022-01-01"), datetime.date(2022, 1, 1))
51
+ self.assertEqual(DataWrapper.convert_to_date(datetime.date(2022, 1, 1)), datetime.date(2022, 1, 1))
52
+ with self.assertRaises(ValueError):
53
+ DataWrapper.convert_to_date("invalid-date")
54
+
55
+ @patch('fsspec.filesystem')
56
+ def test_is_file_older_than(self, mock_filesystem):
57
+ mock_fs = mock_filesystem.return_value
58
+ mock_fs.info.return_value = {'mtime': (datetime.datetime.now() - datetime.timedelta(minutes=1500)).timestamp()}
59
+
60
+ wrapper = DataWrapper(
61
+ dataclass=self.dataclass,
62
+ date_field=self.date_field,
63
+ data_path=self.data_path,
64
+ parquet_filename=self.parquet_filename,
65
+ start_date=self.start_date,
66
+ end_date=self.end_date,
67
+ filesystem_type=self.filesystem_type,
68
+ filesystem_options=self.filesystem_options,
69
+ logger=self.logger
70
+ )
71
+
72
+ self.assertTrue(wrapper.is_file_older_than("some/file/path"))
73
+ mock_fs.info.return_value = {'mtime': (datetime.datetime.now() - datetime.timedelta(minutes=1000)).timestamp()}
74
+ self.assertFalse(wrapper.is_file_older_than("some/file/path"))
75
+
76
+
77
+ if __name__ == '__main__':
78
+ unittest.main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sibi-dst
3
- Version: 0.3.27
3
+ Version: 0.3.29
4
4
  Summary: Data Science Toolkit
5
5
  Author: Luis Valverde
6
6
  Author-email: lvalverdeb@gmail.com
@@ -8,6 +8,7 @@ Requires-Python: >=3.11,<4.0
8
8
  Classifier: Programming Language :: Python :: 3
9
9
  Classifier: Programming Language :: Python :: 3.11
10
10
  Classifier: Programming Language :: Python :: 3.12
11
+ Classifier: Programming Language :: Python :: 3.13
11
12
  Requires-Dist: apache-airflow-client (>=2.10.0,<3.0.0)
12
13
  Requires-Dist: chardet (>=5.2.0,<6.0.0)
13
14
  Requires-Dist: charset-normalizer (>=3.4.0,<4.0.0)
@@ -17,10 +18,13 @@ Requires-Dist: dask-expr (>=1.1.20,<2.0.0)
17
18
  Requires-Dist: dask[complete] (>=2024.11.1,<2025.0.0)
18
19
  Requires-Dist: django (>=5.1.4,<6.0.0)
19
20
  Requires-Dist: djangorestframework (>=3.15.2,<4.0.0)
21
+ Requires-Dist: folium (>=0.19.4,<0.20.0)
22
+ Requires-Dist: geopandas (>=1.0.1,<2.0.0)
20
23
  Requires-Dist: httpx (>=0.27.2,<0.28.0)
21
24
  Requires-Dist: ipython (>=8.29.0,<9.0.0)
22
25
  Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
23
26
  Requires-Dist: mysqlclient (>=2.2.6,<3.0.0)
27
+ Requires-Dist: nltk (>=3.9.1,<4.0.0)
24
28
  Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
25
29
  Requires-Dist: pandas (>=2.2.3,<3.0.0)
26
30
  Requires-Dist: paramiko (>=3.5.0,<4.0.0)
@@ -1,6 +1,6 @@
1
1
  sibi_dst/__init__.py,sha256=CLHfzrFNqklNx5uMKAPtbZfkbBbVYR5qsiMro0RTfmA,252
2
- sibi_dst/df_helper/__init__.py,sha256=aiAu7j1SWDiw3RVI4UJmvLcADP34OfrJTCYpdupPGII,234
3
- sibi_dst/df_helper/_df_helper.py,sha256=vG-Lb9lj8s5cACTvfYp7JhXt1PajttHVhKYzBWR-9Vc,13953
2
+ sibi_dst/df_helper/__init__.py,sha256=5yzslP6zYYOHsTtAzHnNDXHYjf_T6yW7baxwgtduWqQ,292
3
+ sibi_dst/df_helper/_df_helper.py,sha256=MttqHot8dlHzo4G522JL-z6LOFWYVXqqz06k-4YcvRM,23447
4
4
  sibi_dst/df_helper/_parquet_artifact.py,sha256=nx1wTEyrjARpCCPNwBxYiBROee3CSb6c-u7Cpme_tdk,4978
5
5
  sibi_dst/df_helper/_parquet_reader.py,sha256=sbe8DsScNT2h6huNsz8mUxVnUGpJeRzbaONZ3u2sQeQ,1685
6
6
  sibi_dst/df_helper/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -22,9 +22,21 @@ sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py,sha256=ML-m_WeTR1_UMgiDR
22
22
  sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py,sha256=Bmhh6VvmBfNfBA2JpuEdsYD_193yJ768Si2TvkY9HmU,4405
23
23
  sibi_dst/df_helper/core/__init__.py,sha256=o4zDwgVmaijde3oix0ezb6KLxI5QFy-SGUhFTDVFLT4,569
24
24
  sibi_dst/df_helper/core/_defaults.py,sha256=eNpHD2sZxir-2xO0b3_V16ryw8YP_5FfpIKK0HNuiN4,7011
25
- sibi_dst/df_helper/core/_filter_handler.py,sha256=g9FMcB_koT724ggcWt98jow2XgUnmupK_fNhF95W5bQ,10217
25
+ sibi_dst/df_helper/core/_filter_handler.py,sha256=t3uLLJX5hWO_dWKCCz8Dwpc9RZ5PMHBIWkHSELCpFXI,11131
26
26
  sibi_dst/df_helper/core/_params_config.py,sha256=Og3GYth0GVWpcOYWZWRy7CZ5PDsg63Nmqo-W7TUrA_0,3503
27
27
  sibi_dst/df_helper/core/_query_config.py,sha256=Y8LVSyaKuVkrPluRDkQoOwuXHQxner1pFWG3HPfnDHM,441
28
+ sibi_dst/df_helper/data_cleaner.py,sha256=lkxQoXLvGzXCicFUimnA5nen5qkrO1oxgl_p2Be2o8w,5183
29
+ sibi_dst/geopy_helper/__init__.py,sha256=Q1RJiUZIOlV0QNNLjxZ_2IZS5LqIe5jRbeQkfD1Vm60,112
30
+ sibi_dst/geopy_helper/geo_location_service.py,sha256=l0dV0XuEk-tcWdaOymgN9WulR6xp3k7yJUgqYvnqCKo,2288
31
+ sibi_dst/geopy_helper/utils.py,sha256=R9X6ew0L_SuCpsA_AQK1wd3BspRGtV83q3mhBkcKr4A,1664
32
+ sibi_dst/osmnx_helper/__init__.py,sha256=QeAKEeVXZk_qn8o0d3BOoGgv2lzatcI2yBqY3ZqviKI,153
33
+ sibi_dst/osmnx_helper/base_osm_map.py,sha256=s2OY_XfwjZA3ImJNtCgevGBCbwRVe3dY3QVkTHEulB0,5794
34
+ sibi_dst/osmnx_helper/basemaps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
+ sibi_dst/osmnx_helper/basemaps/calendar_html.py,sha256=UArt6FDgoCgoRte45Xo3IHqd-RNzW0YgitgZYfOFasY,4031
36
+ sibi_dst/osmnx_helper/basemaps/router_plotter.py,sha256=QznnBGsUwhl8ZITcVNBrQDm-MXAd0jpJGPuyozKyQg0,8537
37
+ sibi_dst/osmnx_helper/utils.py,sha256=8sF-wNSL38WzhWS3DceZ1cP8BM11i7D0bI-E4XYD8K4,8449
38
+ sibi_dst/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
+ sibi_dst/tests/test_data_wrapper_class.py,sha256=Nkup5OFH5Cos2fxPaU7g9IEyINJM0uJ5-rOZ-eNtd20,3275
28
40
  sibi_dst/utils/__init__.py,sha256=z51o5sjIo_gTjnDXk5SBniCxWJIrDBMS7df0dTs8VMk,775
29
41
  sibi_dst/utils/airflow_manager.py,sha256=-d44EKUZNYJyp4wuNwRvilRQktunArPOB5fZuWdQv10,7526
30
42
  sibi_dst/utils/clickhouse_writer.py,sha256=xUhFDOuZt0eZDpVJNuLb7pfTHUV06NCYrNUx_a7qrSM,8580
@@ -38,6 +50,6 @@ sibi_dst/utils/filepath_generator.py,sha256=hjI7gQwfwRToPeuzoUQDayHKQrr4Ivhi4Chl
38
50
  sibi_dst/utils/log_utils.py,sha256=4eLmoV8VC7wDwPr1mRfDKP24_-laGO6ogE4U0u3DUuA,2315
39
51
  sibi_dst/utils/parquet_saver.py,sha256=hLrWr1G132y94eLopDPPGQGDsAiR1lQ8id4QQtGYPE4,4349
40
52
  sibi_dst/utils/storage_manager.py,sha256=7nkfeBW_2xlF59pGj7V2aY5TLwpJnPQuPVclqjavJOA,3856
41
- sibi_dst-0.3.27.dist-info/METADATA,sha256=YFb0ZGbz2m0-aczvItyKK4Iqf1wn6pSVGE41ZUQ6YI8,2265
42
- sibi_dst-0.3.27.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
43
- sibi_dst-0.3.27.dist-info/RECORD,,
53
+ sibi_dst-0.3.29.dist-info/METADATA,sha256=KsqbOJSoa7LOBN5ibbG-MLCgCSGSI_Ps7LfdLHDPErQ,2436
54
+ sibi_dst-0.3.29.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
55
+ sibi_dst-0.3.29.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.0
2
+ Generator: poetry-core 1.9.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any