PyPI - sibi-dst - Versions diffs - 0.3.27__py3-none-any.whl → 0.3.28__py3-none-any.whl - Mend

sibi-dst 0.3.27py3-none-any.whl → 0.3.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

sibi_dst/df_helper/__init__.py +2 -0
sibi_dst/df_helper/_df_helper.py +180 -12
sibi_dst/df_helper/core/_filter_handler.py +16 -0
sibi_dst/df_helper/data_cleaner.py +132 -0
sibi_dst/osmnx_helper/__init__.py +9 -0
sibi_dst/osmnx_helper/base_osm_map.py +165 -0
sibi_dst/osmnx_helper/basemaps/__init__.py +0 -0
sibi_dst/osmnx_helper/basemaps/calendar_html.py +122 -0
sibi_dst/osmnx_helper/basemaps/router_plotter.py +186 -0
sibi_dst/osmnx_helper/utils.py +267 -0
sibi_dst/tests/__init__.py +0 -0
sibi_dst/tests/test_data_wrapper_class.py +78 -0
{sibi_dst-0.3.27.dist-info → sibi_dst-0.3.28.dist-info}/METADATA +5 -1
{sibi_dst-0.3.27.dist-info → sibi_dst-0.3.28.dist-info}/RECORD +15 -6
{sibi_dst-0.3.27.dist-info → sibi_dst-0.3.28.dist-info}/WHEEL +1 -1

sibi_dst/df_helper/__init__.py CHANGED Viewed

@@ -3,9 +3,11 @@ from __future__ import annotations
 from ._df_helper import DfHelper
 from ._parquet_artifact import ParquetArtifact
 from ._parquet_reader import ParquetReader
+#from .data_cleaner import DataCleaner
 __all__ = [
     'DfHelper',
     'ParquetArtifact',
     'ParquetReader',
+    #'DataCleaner'
 ]

sibi_dst/df_helper/_df_helper.py CHANGED Viewed

@@ -6,6 +6,7 @@ from typing import Any, Dict, TypeVar
 from typing import Union, Optional
 import dask.dataframe as dd
+from dask import delayed, compute
 import pandas as pd
 from pydantic import BaseModel
@@ -29,6 +30,38 @@ warnings.filterwarnings(
 class DfHelper:
+    """
+    DfHelper is a utility class for managing, loading, and processing data from
+    various backends, such as Django databases, Parquet files, HTTP sources, and
+    SQLAlchemy-based databases. The class abstracts the complexities of handling
+    different backends and provides a unified interface for data operations.
+    The class is particularly useful for projects that require flexibility in
+    data source configuration and seamless integration with both Dask and Pandas
+    for handling data frames. It includes robust mechanisms for post-processing
+    data, filtering columns, renaming, and setting indices.
+    :ivar df: The DataFrame currently being processed or loaded.
+    :type df: Union[dd.DataFrame, pd.DataFrame]
+    :ivar backend_django: Configuration for interacting with Django database backends.
+    :type backend_django: Optional[DjangoConnectionConfig]
+    :ivar _backend_query: Internal configuration for query handling.
+    :type _backend_query: Optional[QueryConfig]
+    :ivar _backend_params: Internal parameters configuration for DataFrame handling.
+    :type _backend_params: Optional[ParamsConfig]
+    :ivar backend_parquet: Configuration for Parquet file handling.
+    :type backend_parquet: Optional[ParquetConfig]
+    :ivar backend_http: Configuration for interacting with HTTP-based backends.
+    :type backend_http: Optional[HttpConfig]
+    :ivar backend_sqlalchemy: Configuration for interacting with SQLAlchemy-based databases.
+    :type backend_sqlalchemy: Optional[SqlAlchemyConnectionConfig]
+    :ivar parquet_filename: The filename for a Parquet file, if applicable.
+    :type parquet_filename: str
+    :ivar logger: Logger instance used for debugging and information logging.
+    :type logger: Logger
+    :ivar default_config: Default configuration dictionary that can be overridden.
+    :type default_config: Dict
+    """
     df: Union[dd.DataFrame, pd.DataFrame] = None
     backend_django: Optional[DjangoConnectionConfig] = None
     _backend_query: Optional[QueryConfig] = None
@@ -60,7 +93,20 @@ class DfHelper:
     def __str__(self):
         return self.__class__.__name__
+    def __call__(self, **options):
+        return self.load(**options)
     def __post_init(self, **kwargs):
+        """
+        Initializes backend-specific configurations based on the provided backend type and other
+        parameters. This method performs configuration setup dependent on the selected backend,
+        such as 'django_db', 'parquet', 'http', or 'sqlalchemy'. Configuration for each backend
+        type is fetched or instantiated as necessary using provided parameters or default values.
+        :param kwargs: Dictionary of arguments passed during initialization of backend configurations.
+                       Additional parameters for specific backend types are extracted here.
+        :return: None
+        """
         self.logger.debug(f"backend used: {self.backend}")
         self._backend_query = self.__get_config(QueryConfig, kwargs)
         self._backend_params = self.__get_config(ParamsConfig, kwargs)
@@ -88,7 +134,35 @@ class DfHelper:
         model_kwargs = {k: kwargs.pop(k) for k in list(kwargs.keys()) if k in recognized_keys}
         return model(**model_kwargs)
+    def load_parallel(self, **options):
+        """
+        Executes the `load` method in parallel using Dask, allowing multiple instances
+        to run concurrently. This function leverages Dask's `delayed` and `compute`
+        methods to schedule and process tasks in parallel. It is designed to handle
+        concurrent workloads efficiently by utilizing up to 4 parallel executions of
+        the `load` function.
+        :param options: Keyword arguments to be passed to the `load` method. These options
+            will be applied to all parallel instances of the `load` method.
+        :return: A list of results, where each element represents the output
+            from one of the parallel executions of the `load` method.
+        """
+        # Define tasks using Dask's delayed
+        tasks = [delayed(self.load)(**options) for _ in range(4)]
+        results = compute(*tasks)
+        return results
     def load(self, **options):
+        """
+        Loads data from a dataframe backend, ensuring compatibility with multiple
+        data processing backends. Provides the data in a pandas dataframe format
+        if the `as_pandas` attribute is set to True.
+        :param options: Arbitrary keyword arguments for dataframe loading customization.
+        :type options: dict
+        :return: The loaded dataframe, computed as a pandas dataframe if
+            `as_pandas` is set to True, or kept in its native backend format otherwise.
+        """
         # this will be the universal method to load data from a df irrespective of the backend
         df = self.__load(**options)
         if self.as_pandas:
@@ -96,7 +170,23 @@ class DfHelper:
         return df
     def __load(self, **options):
+        """
+        Private method responsible for loading data using a specified backend. This method
+        abstracts away the details of interacting with the backend and dynamically calls the
+        appropriate function depending on the backend type. It supports multiple backend
+        types, such as `django_db`, `sqlalchemy`, `parquet`, and `http`. If the `http` backend
+        is selected, it checks whether the asyncio event loop is running and either runs the
+        process as a new asyncio task or synchronously.
+        :param options: Arbitrary keyword arguments provided for backend-specific configurations.
+                        These should align with the requirements of the chosen backend.
+        :type options: dict
+        :return: The data loaded from the specified backend. The return type is dependent on
+                 the particular backend being used.
+        :rtype: Depending on backend implementation; could be `Task`, `List`, `Dict`, or
+                another format defined by the backend.
+        """
         if self.backend == 'django_db':
             self._backend_params.parse_params(options)
             return self.__load_from_db(**options)
@@ -167,8 +257,13 @@ class DfHelper:
     def __post_process_df(self):
         """
-        Efficiently process the DataFrame by filtering, renaming, and setting indices.
-        Optimized for large datasets with Dask compatibility.
+        Processes a DataFrame according to the provided parameters defined within the
+        `self._backend_params.df_params` dictionary. This involves filtering columns,
+        renaming columns, setting an index column, and handling datetime indexing.
+        The method modifies the DataFrame in place.
+        :raises ValueError: If the lengths of `fieldnames` and `column_names` do not match,
+            or if the specified `index_col` is not found in the DataFrame.
         """
         df_params = self._backend_params.df_params
         fieldnames = df_params.get("fieldnames", None)
@@ -205,6 +300,21 @@ class DfHelper:
         self.logger.debug("Post-processing of DataFrame completed.")
     def __process_loaded_data(self):
+        """
+        Processes the dataframe by applying renaming logic based on the given field map
+        configuration. Inspects the dataframe for missing columns referenced in the field
+        map and flags them with a warning. Applies renaming only for columns that exist
+        in the dataframe while ensuring that no operations take place if the dataframe
+        is empty.
+        :param self: The instance of the class where the dataframe is being processed.
+        :type self: object with attributes `df`, `_backend_params`, and `logger`.
+        :raises Warning: Logs a warning if specified columns in the `field_map` are not
+            present in the dataframe.
+        :return: None
+        """
         self.logger.debug(f"Type of self.df: {type(self.df)}")
         if self.df.map_partitions(len).compute().sum() > 0:
             field_map = self._backend_params.field_map or {}
@@ -239,20 +349,54 @@ class DfHelper:
         self.logger.debug("Save to ClickHouse completed.")
     def __load_from_parquet(self, **options) -> Union[pd.DataFrame, dd.DataFrame]:
-        self.df = self.backend_parquet.load_files()
-        if options:
-            """
-            deprecated specific filter handling to a generic one
-            self.df = ParquetFilterHandler(logger=self.logger).apply_filters_dask(self.df, options)
-            """
-            self.df = FilterHandler(backend='dask', logger=self.logger).apply_filters(self.df, filters=options)
-        return self.df
+        try:
+            self.df = self.backend_parquet.load_files()
+            if options and self.df is not None:
+                """
+                deprecated specific filter handling to a generic one
+                self.df = ParquetFilterHandler(logger=self.logger).apply_filters_dask(self.df, options)
+                """
+                self.df = FilterHandler(backend='dask', logger=self.logger).apply_filters(self.df, filters=options)
+            return self.df
+        except Exception as e:
+            self.logger.debug(f"Failed to load data from parquet: {e}")
+            return dd.from_pandas(pd.DataFrame(), npartitions=1)
     def load_period(self, **kwargs):
         return self.__load_period(**kwargs)
     def __load_period(self, **kwargs):
+        """
+        Validates and processes the temporal filtering parameters `start` and `end` for querying,
+        ensuring correctness and compatibility with a specified backend (Django or SQLAlchemy).
+        This method dynamically maps and validates the provided datetime or date field from the
+        model according to the configured backend, and applies the appropriate filters to query objects.
+        This function enforces that both `start` and `end` are provided and checks if the start date
+        is earlier or the same as the end date. It supports parsing string representations of dates
+        and validates them against the date or datetime fields associated with the chosen backend.
+        If the backend or field is incompatible or missing, appropriate errors are raised.
+        The resulting filter conditions are integrated into `kwargs` for querying with the
+        appropriate backend model.
+        :param kwargs: Keyword arguments, including temporal filtering parameters and optionally a
+            datetime or date field name. Supported parameters include:
+            - **dt_field**: The name of the date or datetime field to use in filtering. Defaults
+              to an internally set field if not explicitly provided.
+            - **start**: The starting date or datetime for the query range. Can be a `str` or
+              `datetime.date/datetime.datetime` object.
+            - **end**: The ending date or datetime for the query range. Can be a `str` or
+              `datetime.date/datetime.datetime` object.
+        :return: Queryset or result of the load function with the applied temporal filters.
+        :rtype: Any
+        :raises ValueError: If the `dt_field` is not provided, if `start` or `end`
+            are missing, if the `start` date is later than `end`, or if the `dt_field`
+            does not exist in the backend model or its metadata.
+        """
         dt_field = kwargs.pop("dt_field", self.dt_field)
         if dt_field is None:
             raise ValueError("dt_field must be provided")
@@ -316,6 +460,30 @@ class DfHelper:
     @staticmethod
     def parse_date(date_str: str) -> Union[datetime.datetime, datetime.date]:
+        """
+        Parses a date string and converts it to a `datetime.datetime` or
+        `datetime.date` object.
+        This method attempts to parse the given string in two distinct formats:
+        1. First, it tries to interpret the string as a datetime with the format
+           ``%Y-%m-%d %H:%M:%S``. If successful, it returns a `datetime.datetime`
+           object.
+        2. If the first format parsing fails, it attempts to parse the string as
+           a date with the format ``%Y-%m-%d``. If successful, it returns a
+           `datetime.date` object.
+        If the string cannot be parsed in either of these formats, the method will
+        raise a `ValueError`.
+        :param date_str: The date string to be parsed. Expected to match one of the
+            formats: ``%Y-%m-%d %H:%M:%S`` or ``%Y-%m-%d``.
+        :type date_str: str
+        :return: A `datetime.datetime` object if the string matches the first format,
+            or a `datetime.date` object if the string matches the second format.
+        :rtype: Union[datetime.datetime, datetime.date]
+        :raises ValueError: Raised if neither date format can be successfully parsed
+            from the provided string.
+        """
         try:
             return datetime.datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S')
         except ValueError:

sibi_dst/df_helper/core/_filter_handler.py CHANGED Viewed

@@ -9,6 +9,22 @@ from sibi_dst.utils import Logger
 class FilterHandler:
+    """
+    Handles the application of filters to data sources with support for SQLAlchemy and Dask backends.
+    The FilterHandler class abstracts the process of applying filters to various backends, specifically
+    SQLAlchemy queries and Dask DataFrames. It supports multiple filtering operations, including
+    exact matches, comparisons, and string-related operations such as contains and regex. The handler
+    automatically determines and applies backend-specific processing, enabling seamless integration with
+    different data models or backends.
+    :ivar backend: The backend in use ('sqlalchemy' or 'dask').
+    :type backend: str
+    :ivar logger: An optional logger instance for debugging and logging purposes.
+    :type logger: Logger
+    :ivar backend_methods: A dictionary mapping backend-specific methods for column retrieval and operation application.
+    :type backend_methods: dict
+    """
     def __init__(self, backend, logger=None):
         """
         Initialize the FilterHandler.

sibi_dst/df_helper/data_cleaner.py ADDED Viewed

@@ -0,0 +1,132 @@
+import re
+from nltk.corpus import stopwords
+from nltk.stem import SnowballStemmer
+import dask.dataframe as dd
+from dask_ml.preprocessing import OneHotEncoder, LabelEncoder
+import nltk
+class DataCleaner:
+    def __init__(self, dataframe):
+        self.original_df = dataframe
+        self.df = dataframe.copy()
+        self.duplicates_df = None
+    def handle_missing_values(self, strategy='mean'):
+        if strategy == 'mean':
+            self.df = self.df.fillna(self.df.mean())
+        elif strategy == 'median':
+            self.df = self.df.fillna(self.df.median())
+        elif strategy == 'mode':
+            self.df = self.df.fillna(self.df.mode().iloc[0])
+        elif strategy == 'drop':
+            self.df = self.df.dropna()
+        return self
+    def identify_duplicates(self, subset=None):
+        self.duplicates_df = self.df.map_partitions(lambda df: df[df.duplicated(subset=subset, keep=False)])
+        return self.duplicates_df
+    def remove_duplicates(self):
+        if self.duplicates_df is not None:
+            self.df = self.df[~self.df.index.isin(self.duplicates_df.index)]
+        return self
+    def validate_date_fields(self, date_columns=None):
+        if date_columns is None:
+            date_columns = self.df.select_dtypes(include=['datetime', 'datetime64[ns]', 'datetime64[ns, UTC]']).columns
+        for col in date_columns:
+            print('Validating date field: ', col)
+            self.df[col] = dd.to_datetime(self.df[col], errors='coerce')
+        return self
+    def clean_text(self, text_columns=None, language='english'):
+        nltk.download('stopwords')
+        stop_words = set(stopwords.words(language))
+        stemmer = SnowballStemmer(language)
+        def clean_text(text):
+            if isinstance(text, str):
+                text = text.strip().lower()  # Remove leading/trailing whitespace and convert to lowercase
+                text = re.sub(r'[^\w\s]', '', text)  # Remove special characters and punctuation
+                words = text.split()
+                words = [word for word in words if word not in stop_words]  # Remove stop words
+                words = [stemmer.stem(word) for word in words]  # Apply stemming
+                return ' '.join(words)
+            return text
+        if text_columns is None:
+            text_columns = self.df.select_dtypes(include=['object', 'string']).columns
+            text_columns = [col for col in text_columns if self.df[col].dtype != 'bool']
+        for col in text_columns:
+            print('Cleaning text field: ', col)
+            self.df[col] = self.df[col].map(clean_text, meta=('cleaned_text', 'object'))
+        return self
+    def validate_numeric_fields(self, int_columns=None, float_columns=None):
+        if int_columns is None:
+            int_columns = self.df.select_dtypes(include=['int64', 'int32']).columns
+        if float_columns is None:
+            float_columns = self.df.select_dtypes(include=['float64', 'float32']).columns
+        for col in int_columns:
+            print('Validating integer field: ', col)
+            self.df[col] = dd.to_numeric(self.df[col], errors='coerce', downcast='integer')
+        for col in float_columns:
+            print('Validating float field: ', col)
+            self.df[col] = dd.to_numeric(self.df[col], errors='coerce', downcast='float')
+        return self
+    def detect_categorical_columns(self, threshold=0.05):
+        """
+        Detect columns that can be converted to 'category' dtype.
+        Parameters:
+        threshold (float): The maximum ratio of unique values to total values for a column to be considered categorical.
+        Returns:
+        List of column names that can be converted to 'category' dtype.
+        """
+        categorical_columns = []
+        def unique_ratio(partition, col):
+            return partition[col].nunique() / len(partition)
+        for col in self.df.columns:
+            print("Detecting categorical columns: ", col)
+            unique_ratios = self.df.map_partitions(unique_ratio, col=col).compute()
+            overall_unique_ratio = unique_ratios.sum() / len(self.df)
+            if overall_unique_ratio < threshold:
+                print(f'Column {col} is categorical')
+                categorical_columns.append(col)
+        return categorical_columns
+    def handle_categorical_variables(self, columns=None, method='onehot', threshold=0.05):
+        if columns is None:
+            columns = self.detect_categorical_columns(threshold)
+        if method == 'onehot':
+            for col in columns:
+                self.df[col] = self.df[col].astype('category')
+            encoder = OneHotEncoder(sparse_output=False)
+            self.df = encoder.fit_transform(self.df)
+        elif method == 'label':
+            encoder = LabelEncoder()
+            for col in columns:
+                self.df[col] = encoder.fit_transform(self.df[col])
+        return self
+    def analyze_dtypes(self):
+        return self.df.dtypes
+    def get_cleaned_dataframe(self):
+        return self.df
+    def get_original_dataframe(self):
+        return self.original_df
+    def get_duplicates_dataframe(self):
+        return self.duplicates_df

sibi_dst/osmnx_helper/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+from __future__ import annotations
+from .base_osm_map import BaseOsmMap
+from .utils import PBFHandler
+__all__ = [
+    "BaseOsmMap",
+    "PBFHandler",
+]

sibi_dst/osmnx_helper/base_osm_map.py ADDED Viewed

@@ -0,0 +1,165 @@
+from __future__ import annotations
+import html
+from abc import abstractmethod
+import folium
+import geopandas as gpd
+import numpy as np
+import osmnx as ox
+from folium.plugins import Fullscreen
+class BaseOsmMap:
+    tile_options = {
+        "OpenStreetMap": "OpenStreetMap",
+        "CartoDB": "cartodbpositron",
+        "CartoDB Voyager": "cartodbvoyager"
+    }
+    # Set default bounds for Costa Rica
+    bounds = [[8.0340, -85.9417], [11.2192, -82.5566]]
+    def __init__(self, osmnx_graph=None, df=None, **kwargs):
+        if osmnx_graph is None:
+            raise ValueError('osmnx_graph must be provided')
+        if df is None:
+            raise ValueError('df must be provided')
+        if df.empty:
+            raise ValueError('df must not be empty')
+        self.df = df.copy()
+        self.osmnx_graph = osmnx_graph
+        self.lat_col = kwargs.get('lat_col', 'latitude')
+        self.lon_col = kwargs.get('lon_col', 'longitude')
+        self.osm_map = None
+        self.G = None
+        self.map_html_title = self._sanitize_html(kwargs.get('map_html_title', 'OSM Basemap'))
+        self.zoom_start = kwargs.pop('zoom_start', 13)
+        self.fullscreen = kwargs.pop('fullscreen', True)
+        self.fullscreen_position = kwargs.pop('fullscreen_position', 'topright')
+        self.tiles = kwargs.pop('tiles', 'OpenStreetMap')
+        self.verbose = kwargs.pop('verbose', False)
+        self.sort_keys = kwargs.pop('sort_keys', None)
+        self.dt_field = kwargs.pop('dt_field', None)
+        self.dt = None
+        self.calc_nearest_nodes = kwargs.pop('calc_nearest_nodes', False)
+        self.nearest_nodes = None
+        self.max_bounds = kwargs.pop('max_bounds', False)
+        self._prepare_df()
+        self._initialise_map()
+    def _prepare_df(self):
+        if self.sort_keys:
+            self.df.sort_values(by=self.sort_keys, inplace=True)
+        self.df.reset_index(drop=True, inplace=True)
+        self.gps_points = self.df[[self.lat_col, self.lon_col]].values.tolist()
+        if self.dt_field is not None:
+            self.dt = self.df[self.dt_field].tolist()
+        if self.calc_nearest_nodes:
+            self.nearest_nodes = ox.distance.nearest_nodes(self.osmnx_graph, X=self.df[self.lon_col],
+                                                           Y=self.df[self.lat_col])
+    def _initialise_map(self):
+        gps_array = np.array(self.gps_points)
+        mean_latitude = np.mean(gps_array[:, 0])
+        mean_longitude = np.mean(gps_array[:, 1])
+        self.osm_map = folium.Map(location=[mean_latitude, mean_longitude], zoom_start=self.zoom_start,
+                                  tiles=self.tiles, max_bounds=self.max_bounds)
+        north, south, east, west = self._get_bounding_box_from_points(margin=0.001)
+        self.G = self._extract_subgraph(north, south, east, west)
+    def _attach_supported_tiles(self):
+        # Normalize the default tile name to lowercase for comparison
+        normalized_default_tile = self.tiles.lower()
+        # Filter out the default tile layer from the options to avoid duplication
+        tile_options_filtered = {k: v for k, v in self.tile_options.items() if v.lower() != normalized_default_tile}
+        for tile, description in tile_options_filtered.items():
+            folium.TileLayer(name=tile, tiles=description, show=False).add_to(self.osm_map)
+    def _get_bounding_box_from_points(self, margin=0.001):
+        latitudes = [point[0] for point in self.gps_points]
+        longitudes = [point[1] for point in self.gps_points]
+        north = max(latitudes) + margin
+        south = min(latitudes) - margin
+        east = max(longitudes) + margin
+        west = min(longitudes) - margin
+        return north, south, east, west
+    def _extract_subgraph(self, north, south, east, west):
+        # Create a bounding box polygon
+        # from osmnx v2 this is how it is done
+        if ox.__version__ >= '2.0':
+            bbox_poly = gpd.GeoSeries([ox.utils_geo.bbox_to_poly(bbox=(west, south, east, north))])
+        else:
+            bbox_poly = gpd.GeoSeries([ox.utils_geo.bbox_to_poly(north, south, east, west)])
+        # Get nodes GeoDataFrame
+        nodes_gdf = ox.graph_to_gdfs(self.osmnx_graph, nodes=True, edges=False)
+        # Find nodes within the bounding box
+        nodes_within_bbox = nodes_gdf[nodes_gdf.geometry.within(bbox_poly.geometry.unary_union)]
+        # Create subgraph
+        subgraph = self.osmnx_graph.subgraph(nodes_within_bbox.index)
+        return subgraph
+    @abstractmethod
+    def process_map(self):
+        # this is to be implemented at the subclass level
+        # implement here your specific map logic.
+        ...
+    def pre_process_map(self):
+        # this is to be implemented at the subclass level
+        # call super().pre_process_map first to inherit the following behaviour
+        ...
+    def _post_process_map(self):
+        self._attach_supported_tiles()
+        self.add_tile_layer()
+        self._add_fullscreen()
+        self._add_map_title()
+        if self.max_bounds:
+            self.osm_map.fit_bounds(self.bounds)
+    def add_tile_layer(self):
+        # Override in subclass and call super().add_tile_layer at the end
+        folium.LayerControl().add_to(self.osm_map)
+    def _add_fullscreen(self):
+        if self.fullscreen:
+            Fullscreen(position=self.fullscreen_position).add_to(self.osm_map)
+    def _add_map_title(self):
+        if self.map_html_title:
+            self.osm_map.get_root().html.add_child(folium.Element(self.map_html_title))
+    @staticmethod
+    def _sanitize_html(input_html):
+        return html.escape(input_html)
+    def generate_map(self):
+        self.pre_process_map()
+        self.process_map()
+        self._post_process_map()
+        return self.osm_map

sibi_dst/osmnx_helper/basemaps/__init__.py ADDED Viewed

File without changes

sibi_dst/osmnx_helper/basemaps/calendar_html.py ADDED Viewed

@@ -0,0 +1,122 @@
+# HTML and CSS for the calendar button and popup
+calendar_html = """
+<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/flatpickr/dist/flatpickr.min.css">
+<script src="https://cdn.jsdelivr.net/npm/flatpickr"></script>
+<style>
+  /* Style for the calendar button */
+  .calendar-btn {
+    background-color: white;
+    border: 1px solid gray;
+    border-radius: 3px;
+    padding: 5px;
+    font-size: 16px;
+    cursor: pointer;
+    position: fixed;  /* Changed from absolute to fixed */
+    bottom: 50px;     /* Adjust position relative to the viewport */
+    left: 10px;       /* Adjust position relative to the viewport */
+    z-index: 10000;   /* Ensure it stays on top of other elements */
+  }
+  /* Calendar popup with sufficient size */
+  .calendar-popup {
+    display: none;
+    position: fixed;  /* Keep the popup fixed so it stays in view */
+    bottom: 100px;
+    left: 10px;
+    background-color: white;
+    padding: 10px;
+    border: 1px solid gray;
+    border-radius: 3px;
+    z-index: 10000;   /* Ensure it stays on top of other elements */
+    width: 250px;
+    height: 300px;
+  }
+  /* Ensure the calendar fits properly */
+  #calendar {
+    width: 100%;
+    height: auto;
+  }
+</style>
+<!-- Calendar Button -->
+<div class="calendar-btn">📅 Select Date</div>
+<!-- Calendar Popup -->
+<div class="calendar-popup" id="calendar-popup">
+  <div id="calendar"></div>
+</div>
+<script>
+  // Initialize Flatpickr calendar
+  const today = new Date().toISOString().split('T')[0];
+  // Function to show the "Please wait" message
+  function showLoadingMessage() {
+    let loadingMessage = document.createElement("div");
+    loadingMessage.id = "loading-message";
+    loadingMessage.style.position = "fixed";
+    loadingMessage.style.top = "50%";
+    loadingMessage.style.left = "50%";
+    loadingMessage.style.transform = "translate(-50%, -50%)";
+    loadingMessage.style.backgroundColor = "rgba(0, 0, 0, 0.8)";
+    loadingMessage.style.color = "white";
+    loadingMessage.style.padding = "20px";
+    loadingMessage.style.borderRadius = "5px";
+    loadingMessage.style.zIndex = "9999";
+    loadingMessage.innerText = "Please wait...";
+    document.body.appendChild(loadingMessage);
+  }
+  // Function to remove the "Please wait" message
+  function removeLoadingMessage() {
+    let loadingMessage = document.getElementById("loading-message");
+    if (loadingMessage) {
+      loadingMessage.remove();
+    }
+  }
+  flatpickr("#calendar", {
+    inline: true,  // Render the calendar inline within the container
+    maxDate: today,  // Disable future dates
+    onChange: function(selectedDates, dateStr, instance) {
+      console.log("Selected date: " + dateStr);  // Debugging: Log the selected date
+      // Get the current URL and create a URL object to manipulate the query parameters
+      // Get the current URL from the parent window
+      showLoadingMessage();
+      let currentUrl = window.parent.location.href;
+      // If the URL contains "srcdoc", remove it and use the correct base path
+      if (currentUrl.includes("srcdoc")) {
+        currentUrl = currentUrl.replace("srcdoc", "");
+      }
+      const url = new URL(currentUrl);
+      // Set or update the 'date' parameter while preserving existing parameters
+      url.searchParams.set('date', dateStr);
+      console.log("Updated URL: " + url.toString());  // Debugging: Log the updated URL
+      // Update the parent window's location with the new URL
+      window.parent.location.href = url.toString();
+    }
+  });
+  // Remove the "Please wait" message once the page has finished loading
+  window.addEventListener("load", function() {
+    removeLoadingMessage();
+  });
+  // Toggle the calendar popup when the button is clicked
+  document.querySelector(".calendar-btn").addEventListener("click", function() {
+    var popup = document.getElementById("calendar-popup");
+    if (popup.style.display === "none" || popup.style.display === "") {
+      popup.style.display = "block";
+    } else {
+      popup.style.display = "none";
+    }
+  });
+</script>
+"""

sibi_dst/osmnx_helper/basemaps/router_plotter.py ADDED Viewed

@@ -0,0 +1,186 @@
+from __future__ import annotations
+from sibi_dst.osmnx_helper.utils import get_distance_between_points, add_arrows
+from collections import defaultdict
+import folium
+from folium.plugins import AntPath
+import networkx as nx
+from sibi_dst.osmnx_helper import BaseOsmMap
+from sibi_dst.osmnx_helper.basemaps.calendar_html import calendar_html
+class RoutePlotter(BaseOsmMap):
+    def __init__(self, osmnx_graph, df, **kwargs):
+        self.action_field = kwargs.pop('action_field', '')
+        self.action_groups = kwargs.pop('action_groups', {})
+        self.action_styles = kwargs.pop('action_styles', {})
+        self.use_ant_path = kwargs.pop('use_ant_path', True)
+        self.show_calendar = kwargs.pop('show_calendar', True)
+        self.show_map_title = kwargs.pop('show_map_title', True)
+        self.sort_keys = kwargs.pop('sort_keys', None)
+        self.main_route_layer = folium.FeatureGroup(name="Main Route")
+        self.feature_groups = {}
+        self.feature_group_counts = {}
+        self.total_distance = 0.0
+        self.actions = []
+        self.action_group_counts = {action_group: 0 for action_group in self.action_groups}
+        self.marker_count = 1
+        kwargs.update({'calc_nearest_nodes': True})
+        kwargs['dt_field'] = 'date_time'
+        super().__init__(osmnx_graph, df, **kwargs)
+    def pre_process_map(self):
+        super().pre_process_map()
+        self.actions = self.df[self.action_field].tolist()
+    def process_map(self):
+        self._calculate_routes()
+        self._plot_routes()
+        self._add_markers()
+        self.main_route_layer.add_to(self.osm_map)
+        if self.show_calendar:
+            self._add_calendar()
+    def _calculate_routes(self):
+        if self.verbose:
+            print("Calculating routes and markers...")
+        distances = [
+            get_distance_between_points(tuple(self.gps_points[0]), tuple(coord), 'm')
+            for coord in self.gps_points
+        ]
+        self.max_distance_index = distances.index(max(distances))
+        self.max_time_index = self.dt.index(max(self.dt))
+        self.route_polylines = []
+        self.markers = defaultdict(list)  # Store markers for action groups
+        for i in range(len(self.gps_points) - 1):
+            polyline, color, markers = self._calculate_route(i)
+            if polyline:
+                self.route_polylines.append((polyline, color))
+                for action_group, action_markers in markers.items():
+                    self.markers[action_group].extend(action_markers)
+                    self.action_group_counts[action_group] += 1
+                    self.marker_count += 1
+        if self.verbose:
+            print("Route and marker calculation complete.")
+        for action_group in self.action_groups:
+            count = self.action_group_counts[action_group]
+            self.feature_groups[action_group] = folium.FeatureGroup(name=f"{action_group} ({count})").add_to(
+                self.osm_map)
+            self.osm_map.add_child(self.feature_groups[action_group])
+    def _calculate_route(self, i):
+        if self.verbose:
+            print(f"Calculating for item:{i}")
+        orig = self.nearest_nodes[i]
+        dest = self.nearest_nodes[i + 1]
+        try:
+            route = nx.shortest_path(self.G, orig, dest, weight='length')
+            route_length = sum(self.G[u][v][0]['length'] for u, v in zip(route[:-1], route[1:]))
+            self.total_distance += route_length
+            offset = 0 if i < self.max_distance_index else 0.0005
+            lats, lons = zip(*[(self.G.nodes[node]['y'] + offset, self.G.nodes[node]['x']) for node in route])
+            color = 'blue' if i < self.max_distance_index else 'red'
+            polyline = list(zip(lats, lons))
+            markers = self._calculate_markers(i)
+            return polyline, color, markers
+        except nx.NetworkXNoPath:
+            if self.verbose:
+                print(f"Item:{i}-No path found for {orig} to {dest}")
+            return None, None, {}
+        except nx.NodeNotFound:
+            if self.verbose:
+                print(f"Item:{i}-No path found for {orig} to {dest}")
+            return None, None, {}
+    def _calculate_markers(self, i):
+        # Calculate markers for action groups
+        markers = defaultdict(list)
+        for action_group in self.action_groups:
+            action_indices = [idx for idx, action in enumerate(self.actions) if action == action_group]
+            for idx in action_indices:
+                if idx == i:
+                    location = self.gps_points[i]
+                    tooltip = f"Result {self.marker_count}: {action_group}<br>Date/time:{self.dt[i]}"
+                    popup_data = self._get_data(i)
+                    action_style = self.action_styles.get(action_group,
+                                                          {'color': 'blue', 'icon': 'marker', 'prefix': 'fa'})
+                    markers[action_group].append((location, tooltip, popup_data, action_style))
+        return markers
+    def _plot_routes(self):
+        if self.verbose:
+            print("Plotting routes and markers...")
+        # self.action_group_counts = {action_group: 0 for action_group in self.feature_groups.keys()}
+        for polyline, color in self.route_polylines:
+            if self.use_ant_path:
+                AntPath(
+                    locations=polyline,
+                    color=color,
+                    weight=3,  # Increase line thickness
+                    opacity=10,  # Increase opacity
+                    # pulse_color=color,
+                    delay=1000,  # Slower animation to reduce flickering
+                    # dash_array=[20, 30]  # Adjust dash pattern if needed
+                ).add_to(self.main_route_layer)
+            else:
+                folium.PolyLine(locations=polyline, color=color).add_to(self.main_route_layer)
+                self.osm_map = add_arrows(self.osm_map, polyline, color, n_arrows=3)
+        # Plot markers for action groups
+        for action_group, action_markers in self.markers.items():
+            for location, tooltip, popup_data, action_style in action_markers:
+                folium.Marker(
+                    location=location,
+                    popup=folium.Popup(popup_data, max_width=600),
+                    tooltip=tooltip,
+                    icon=folium.Icon(
+                        icon=action_style.get("icon"),
+                        color=action_style.get("color"),
+                        prefix=action_style.get("prefix")
+                    )
+                ).add_to(self.feature_groups[action_group])
+        if self.verbose:
+            print("Route and marker plotting complete.")
+    def _add_markers(self):
+        if self.verbose:
+            print("Adding markers...")
+        # Add start marker
+        start_popup = folium.Popup(f"Start of route at {self.dt[0]}", max_width=300)
+        folium.Marker(location=self.gps_points[0], popup=start_popup,
+                      icon=folium.Icon(icon='flag-checkered', prefix='fa')).add_to(self.osm_map)
+        # Add total distance marker at the end
+        folium.Marker(
+            self.gps_points[-1],
+            popup=f"End of Route at {self.dt[self.max_time_index]}. Total Distance Travelled: {self.total_distance / 1000:.2f} km",
+            icon=folium.Icon(color="red", icon="flag-checkered", prefix="fa")
+        ).add_to(self.osm_map)
+        if self.verbose:
+            print("Marker addition complete.")
+    def _add_calendar(self):
+        calendar_element = folium.Element(calendar_html)
+        self.osm_map.get_root().html.add_child(calendar_element)
+    def _add_map_title(self):
+        if self.map_html_title and self.show_map_title:
+            title_html = f'''
+                 <div style="position: fixed;
+                             top: 10px;
+                             left: 50%;
+                             transform: translate(-50%, 0%);
+                             z-index: 9999;
+                             font-size: 24px;
+                             font-weight: bold;
+                             background-color: white;
+                             padding: 10px;
+                             border: 2px solid black;
+                             border-radius: 5px;">
+                    {self.map_html_title}
+                 </div>
+                 '''
+            self.osm_map.get_root().html.add_child(folium.Element(title_html))
+    def _get_data(self, index):
+        # implement in subclass to populate popups
+        ...

sibi_dst/osmnx_helper/utils.py ADDED Viewed

@@ -0,0 +1,267 @@
+import math
+import os
+import pickle
+from urllib.parse import urlencode, urlsplit, urlunsplit
+import folium
+import geopandas as gpd
+import numpy as np
+import osmnx as ox
+from geopy.distance import geodesic
+#
+# options = {
+#    'ox_files_save_path': ox_files_save_path,
+#    'network_type': 'drive',
+#    'place': 'Costa Rica',
+#    'files_prefix': 'costa-rica-',
+# }
+# Usage example
+# handler = PBFHandler(**options)
+# handler.load()
+class PBFHandler:
+    def __init__(self, **kwargs):
+        self.graph = None
+        self.nodes = None
+        self.edges = None
+        self.rebuild = kwargs.setdefault("rebuild", False)
+        self.verbose = kwargs.setdefault("verbose", False)
+        self.place = kwargs.setdefault('place', 'Costa Rica')
+        self.filepath = kwargs.setdefault('ox_files_save_path', "gis_data/")
+        self.file_prefix = kwargs.setdefault('file_prefix', 'costa-rica-')
+        self.network_type = kwargs.setdefault('network_type', 'all')
+        self.graph_file = f"{self.filepath}{self.file_prefix}graph.pkl"
+        self.node_file = f"{self.filepath}{self.file_prefix}nodes.pkl"
+        self.edge_file = f"{self.filepath}{self.file_prefix}edges.pkl"
+    def load(self):
+        if self.verbose:
+            print("Loading data...")
+        files_to_check = [self.graph_file, self.node_file, self.edge_file]
+        if self.rebuild:
+            for file in files_to_check:
+                if os.path.exists(file):
+                    os.remove(file)
+        if not os.path.exists(self.filepath):
+            os.makedirs(self.filepath, exist_ok=True)
+            # self.process_pbf()
+            # self.save_to_pickle()
+        if not all(os.path.exists(f) for f in files_to_check):
+            self.process_pbf()
+            self.save_to_pickle()
+        else:
+            self.load_from_pickle()
+        if self.verbose:
+            print("Data loaded successfully.")
+    def process_pbf(self):
+        """
+        Load a PBF file and create a graph.
+        """
+        try:
+            if self.verbose:
+                print(f"Processing PBF for {self.place}...")
+            self.graph = ox.graph_from_place(self.place, network_type=self.network_type)
+            self.nodes, self.edges = ox.graph_to_gdfs(self.graph)
+            if self.verbose:
+                print("PBF processed successfully.")
+        except Exception as e:
+            print(f"Error processing PBF: {e}")
+            raise
+    def save_to_pickle(self):
+        """
+        Save the graph, nodes, and edges to pickle files.
+        """
+        try:
+            if self.verbose:
+                print("Saving data to pickle files...")
+            data_to_save = {
+                self.graph_file: self.graph,
+                self.node_file: self.nodes,
+                self.edge_file: self.edges
+            }
+            for file, data in data_to_save.items():
+                if data is not None:
+                    with open(file, 'wb') as f:
+                        pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
+            if self.verbose:
+                print("Data saved to pickle files successfully.")
+        except Exception as e:
+            print(f"Error saving to pickle: {e}")
+            raise
+    def load_from_pickle(self):
+        """
+        Load the graph, nodes, and edges from pickle files.
+        """
+        try:
+            if self.verbose:
+                print("Loading data from pickle files...")
+            files_to_load = {
+                self.graph_file: 'graph',
+                self.node_file: 'nodes',
+                self.edge_file: 'edges'
+            }
+            for file, attr in files_to_load.items():
+                with open(file, 'rb') as f:
+                    setattr(self, attr, pickle.load(f))
+            if self.verbose:
+                print("Data loaded from pickle files successfully.")
+        except Exception as e:
+            print(f"Error loading from pickle: {e}")
+            raise
+    def plot_graph(self):
+        """
+        Plot the graph.
+        """
+        try:
+            if self.graph is not None:
+                if self.verbose:
+                    print("Plotting the graph...")
+                ox.plot_graph(self.graph)
+                if self.verbose:
+                    print("Graph plotted successfully.")
+            else:
+                print("Graph is not loaded. Please load a PBF file first.")
+        except Exception as e:
+            print(f"Error plotting the graph: {e}")
+            raise
+def get_bounding_box_from_points(gps_points, margin=0.001):
+    latitudes = [point[0] for point in gps_points]
+    longitudes = [point[1] for point in gps_points]
+    north = max(latitudes) + margin
+    south = min(latitudes) - margin
+    east = max(longitudes) + margin
+    west = min(longitudes) - margin
+    return north, south, east, west
+def add_arrows(map_object, locations, color, n_arrows):
+    # Get the number of locations
+    n = len(locations)
+    # If there are more than two points...
+    if n > 2:
+        # Add arrows along the path
+        for i in range(0, n - 1, n // n_arrows):
+            # Get the start and end point for this segment
+            start, end = locations[i], locations[i + 1]
+            # Calculate the direction in which to place the arrow
+            rotation = -np.arctan2((end[1] - start[1]), (end[0] - start[0])) * 180 / np.pi
+            folium.RegularPolygonMarker(location=end,
+                                        fill_color=color,
+                                        number_of_sides=2,
+                                        radius=6,
+                                        rotation=rotation).add_to(map_object)
+    return map_object
+def extract_subgraph(G, north, south, east, west):
+    # Create a bounding box polygon
+    # from osmnx v2 this is how it is done
+    if ox.__version__ >= '2.0':
+        bbox_poly = gpd.GeoSeries([ox.utils_geo.bbox_to_poly(bbox=(west, south, east, north))])
+    else:
+        bbox_poly = gpd.GeoSeries([ox.utils_geo.bbox_to_poly(north, south, east, west)])
+    # Get nodes GeoDataFrame
+    nodes_gdf = ox.graph_to_gdfs(G, nodes=True, edges=False)
+    # Find nodes within the bounding box
+    nodes_within_bbox = nodes_gdf[nodes_gdf.geometry.within(bbox_poly.geometry.unary_union)]
+    # Create subgraph
+    subgraph = G.subgraph(nodes_within_bbox.index)
+    return subgraph
+def get_distance_between_points(point_a, point_b, unit='km'):
+    if not isinstance(point_a, tuple) or len(point_a) != 2:
+        return 0
+    if not all(isinstance(x, float) and not math.isnan(x) for x in point_a):
+        return 0
+    if not isinstance(point_b, tuple) or len(point_b) != 2:
+        return 0
+    if not all(isinstance(x, float) and not math.isnan(x) for x in point_b):
+        return 0
+    distance = geodesic(point_a, point_b)
+    if unit == 'km':
+        return distance.kilometers
+    elif unit == 'm':
+        return distance.meters
+    elif unit == 'mi':
+        return distance.miles
+    else:
+        return 0
+tile_options = {
+    "OpenStreetMap": "OpenStreetMap",
+    "CartoDB": "cartodbpositron",
+    "CartoDB Voyager": "cartodbvoyager"
+}
+def attach_supported_tiles(map_object, default_tile="OpenStreetMap"):
+    # Normalize the default tile name to lowercase for comparison
+    normalized_default_tile = default_tile.lower()
+    # Filter out the default tile layer from the options to avoid duplication
+    tile_options_filtered = {k: v for k, v in tile_options.items() if v.lower() != normalized_default_tile}
+    for tile, description in tile_options_filtered.items():
+        folium.TileLayer(name=tile, tiles=description, show=False).add_to(map_object)
+def get_graph(**options):
+    handler = PBFHandler(**options)
+    handler.load()
+    return handler.graph, handler.nodes, handler.edges
+def add_query_params(url, params):
+    # Parse the original URL
+    url_components = urlsplit(url)
+    # Parse original query parameters and update with new params
+    original_params = dict([tuple(pair.split('=')) for pair in url_components.query.split('&') if pair])
+    original_params.update(params)
+    # Construct the new query string
+    new_query_string = urlencode(original_params)
+    # Construct the new URL
+    new_url = urlunsplit((
+        url_components.scheme,
+        url_components.netloc,
+        url_components.path,
+        new_query_string,
+        url_components.fragment
+    ))
+    return new_url

sibi_dst/tests/__init__.py ADDED Viewed

File without changes

sibi_dst/tests/test_data_wrapper_class.py ADDED Viewed

@@ -0,0 +1,78 @@
+import unittest
+from unittest.mock import patch, MagicMock
+import datetime
+import pandas as pd
+from sibi_dst.utils import Logger, ParquetSaver
+from sibi_dst.utils.data_wrapper import DataWrapper
+class TestDataWrapper(unittest.TestCase):
+    def setUp(self):
+        self.dataclass = MagicMock()
+        self.date_field = "created_at"
+        self.data_path = "/path/to/data"
+        #self.data_path = "s3://your-bucket-name/path/to/data"
+        self.parquet_filename = "data.parquet"
+        self.start_date = "2022-01-01"
+        self.end_date = "2022-12-31"
+        self.filesystem_type = "file"
+        self.filesystem_options = {
+            #"key": "your_aws_access_key",
+            #"secret": "your_aws_secret_key",
+            #"client_kwargs": {"endpoint_url": "https://s3.amazonaws.com"}
+        }
+        self.logger = Logger.default_logger(logger_name="TestLogger")
+    def test_initialization(self):
+        wrapper = DataWrapper(
+            dataclass=self.dataclass,
+            date_field=self.date_field,
+            data_path=self.data_path,
+            parquet_filename=self.parquet_filename,
+            start_date=self.start_date,
+            end_date=self.end_date,
+            filesystem_type=self.filesystem_type,
+            filesystem_options=self.filesystem_options,
+            logger=self.logger
+        )
+        self.assertEqual(wrapper.dataclass, self.dataclass)
+        self.assertEqual(wrapper.date_field, self.date_field)
+        self.assertEqual(wrapper.data_path, "/path/to/data/")
+        self.assertEqual(wrapper.parquet_filename, self.parquet_filename)
+        self.assertEqual(wrapper.start_date, datetime.date(2022, 1, 1))
+        self.assertEqual(wrapper.end_date, datetime.date(2022, 12, 31))
+        self.assertEqual(wrapper.filesystem_type, self.filesystem_type)
+        self.assertEqual(wrapper.filesystem_options, self.filesystem_options)
+        self.assertEqual(wrapper.logger, self.logger)
+    def test_convert_to_date(self):
+        self.assertEqual(DataWrapper.convert_to_date("2022-01-01"), datetime.date(2022, 1, 1))
+        self.assertEqual(DataWrapper.convert_to_date(datetime.date(2022, 1, 1)), datetime.date(2022, 1, 1))
+        with self.assertRaises(ValueError):
+            DataWrapper.convert_to_date("invalid-date")
+    @patch('fsspec.filesystem')
+    def test_is_file_older_than(self, mock_filesystem):
+        mock_fs = mock_filesystem.return_value
+        mock_fs.info.return_value = {'mtime': (datetime.datetime.now() - datetime.timedelta(minutes=1500)).timestamp()}
+        wrapper = DataWrapper(
+            dataclass=self.dataclass,
+            date_field=self.date_field,
+            data_path=self.data_path,
+            parquet_filename=self.parquet_filename,
+            start_date=self.start_date,
+            end_date=self.end_date,
+            filesystem_type=self.filesystem_type,
+            filesystem_options=self.filesystem_options,
+            logger=self.logger
+        )
+        self.assertTrue(wrapper.is_file_older_than("some/file/path"))
+        mock_fs.info.return_value = {'mtime': (datetime.datetime.now() - datetime.timedelta(minutes=1000)).timestamp()}
+        self.assertFalse(wrapper.is_file_older_than("some/file/path"))
+if __name__ == '__main__':
+    unittest.main()

{sibi_dst-0.3.27.dist-info → sibi_dst-0.3.28.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sibi-dst
-Version: 0.3.27
+Version: 0.3.28
 Summary: Data Science Toolkit
 Author: Luis Valverde
 Author-email: lvalverdeb@gmail.com
@@ -8,6 +8,7 @@ Requires-Python: >=3.11,<4.0
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
 Requires-Dist: apache-airflow-client (>=2.10.0,<3.0.0)
 Requires-Dist: chardet (>=5.2.0,<6.0.0)
 Requires-Dist: charset-normalizer (>=3.4.0,<4.0.0)
@@ -17,10 +18,13 @@ Requires-Dist: dask-expr (>=1.1.20,<2.0.0)
 Requires-Dist: dask[complete] (>=2024.11.1,<2025.0.0)
 Requires-Dist: django (>=5.1.4,<6.0.0)
 Requires-Dist: djangorestframework (>=3.15.2,<4.0.0)
+Requires-Dist: folium (>=0.19.4,<0.20.0)
+Requires-Dist: geopandas (>=1.0.1,<2.0.0)
 Requires-Dist: httpx (>=0.27.2,<0.28.0)
 Requires-Dist: ipython (>=8.29.0,<9.0.0)
 Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
 Requires-Dist: mysqlclient (>=2.2.6,<3.0.0)
+Requires-Dist: nltk (>=3.9.1,<4.0.0)
 Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
 Requires-Dist: pandas (>=2.2.3,<3.0.0)
 Requires-Dist: paramiko (>=3.5.0,<4.0.0)

{sibi_dst-0.3.27.dist-info → sibi_dst-0.3.28.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 sibi_dst/__init__.py,sha256=CLHfzrFNqklNx5uMKAPtbZfkbBbVYR5qsiMro0RTfmA,252
-sibi_dst/df_helper/__init__.py,sha256=aiAu7j1SWDiw3RVI4UJmvLcADP34OfrJTCYpdupPGII,234
-sibi_dst/df_helper/_df_helper.py,sha256=vG-Lb9lj8s5cACTvfYp7JhXt1PajttHVhKYzBWR-9Vc,13953
+sibi_dst/df_helper/__init__.py,sha256=5yzslP6zYYOHsTtAzHnNDXHYjf_T6yW7baxwgtduWqQ,292
+sibi_dst/df_helper/_df_helper.py,sha256=MttqHot8dlHzo4G522JL-z6LOFWYVXqqz06k-4YcvRM,23447
 sibi_dst/df_helper/_parquet_artifact.py,sha256=nx1wTEyrjARpCCPNwBxYiBROee3CSb6c-u7Cpme_tdk,4978
 sibi_dst/df_helper/_parquet_reader.py,sha256=sbe8DsScNT2h6huNsz8mUxVnUGpJeRzbaONZ3u2sQeQ,1685
 sibi_dst/df_helper/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -22,9 +22,18 @@ sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py,sha256=ML-m_WeTR1_UMgiDR
 sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py,sha256=Bmhh6VvmBfNfBA2JpuEdsYD_193yJ768Si2TvkY9HmU,4405
 sibi_dst/df_helper/core/__init__.py,sha256=o4zDwgVmaijde3oix0ezb6KLxI5QFy-SGUhFTDVFLT4,569
 sibi_dst/df_helper/core/_defaults.py,sha256=eNpHD2sZxir-2xO0b3_V16ryw8YP_5FfpIKK0HNuiN4,7011
-sibi_dst/df_helper/core/_filter_handler.py,sha256=g9FMcB_koT724ggcWt98jow2XgUnmupK_fNhF95W5bQ,10217
+sibi_dst/df_helper/core/_filter_handler.py,sha256=t3uLLJX5hWO_dWKCCz8Dwpc9RZ5PMHBIWkHSELCpFXI,11131
 sibi_dst/df_helper/core/_params_config.py,sha256=Og3GYth0GVWpcOYWZWRy7CZ5PDsg63Nmqo-W7TUrA_0,3503
 sibi_dst/df_helper/core/_query_config.py,sha256=Y8LVSyaKuVkrPluRDkQoOwuXHQxner1pFWG3HPfnDHM,441
+sibi_dst/df_helper/data_cleaner.py,sha256=lkxQoXLvGzXCicFUimnA5nen5qkrO1oxgl_p2Be2o8w,5183
+sibi_dst/osmnx_helper/__init__.py,sha256=QeAKEeVXZk_qn8o0d3BOoGgv2lzatcI2yBqY3ZqviKI,153
+sibi_dst/osmnx_helper/base_osm_map.py,sha256=s2OY_XfwjZA3ImJNtCgevGBCbwRVe3dY3QVkTHEulB0,5794
+sibi_dst/osmnx_helper/basemaps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+sibi_dst/osmnx_helper/basemaps/calendar_html.py,sha256=UArt6FDgoCgoRte45Xo3IHqd-RNzW0YgitgZYfOFasY,4031
+sibi_dst/osmnx_helper/basemaps/router_plotter.py,sha256=QznnBGsUwhl8ZITcVNBrQDm-MXAd0jpJGPuyozKyQg0,8537
+sibi_dst/osmnx_helper/utils.py,sha256=8sF-wNSL38WzhWS3DceZ1cP8BM11i7D0bI-E4XYD8K4,8449
+sibi_dst/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+sibi_dst/tests/test_data_wrapper_class.py,sha256=Nkup5OFH5Cos2fxPaU7g9IEyINJM0uJ5-rOZ-eNtd20,3275
 sibi_dst/utils/__init__.py,sha256=z51o5sjIo_gTjnDXk5SBniCxWJIrDBMS7df0dTs8VMk,775
 sibi_dst/utils/airflow_manager.py,sha256=-d44EKUZNYJyp4wuNwRvilRQktunArPOB5fZuWdQv10,7526
 sibi_dst/utils/clickhouse_writer.py,sha256=xUhFDOuZt0eZDpVJNuLb7pfTHUV06NCYrNUx_a7qrSM,8580
@@ -38,6 +47,6 @@ sibi_dst/utils/filepath_generator.py,sha256=hjI7gQwfwRToPeuzoUQDayHKQrr4Ivhi4Chl
 sibi_dst/utils/log_utils.py,sha256=4eLmoV8VC7wDwPr1mRfDKP24_-laGO6ogE4U0u3DUuA,2315
 sibi_dst/utils/parquet_saver.py,sha256=hLrWr1G132y94eLopDPPGQGDsAiR1lQ8id4QQtGYPE4,4349
 sibi_dst/utils/storage_manager.py,sha256=7nkfeBW_2xlF59pGj7V2aY5TLwpJnPQuPVclqjavJOA,3856
-sibi_dst-0.3.27.dist-info/METADATA,sha256=YFb0ZGbz2m0-aczvItyKK4Iqf1wn6pSVGE41ZUQ6YI8,2265
-sibi_dst-0.3.27.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-sibi_dst-0.3.27.dist-info/RECORD,,
+sibi_dst-0.3.28.dist-info/METADATA,sha256=9xBeLwWalUf7exDK-0NZfnYmUQnOIdV2xa0PYNTd85I,2436
+sibi_dst-0.3.28.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+sibi_dst-0.3.28.dist-info/RECORD,,

{sibi_dst-0.3.27.dist-info → sibi_dst-0.3.28.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 1.9.0
+Generator: poetry-core 1.9.1
 Root-Is-Purelib: true
 Tag: py3-none-any

sibi-dst 0.3.27__py3-none-any.whl → 0.3.28__py3-none-any.whl

sibi-dst 0.3.27py3-none-any.whl → 0.3.28py3-none-any.whl