sibi-dst 0.3.45__py3-none-any.whl → 0.3.46__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. sibi_dst/__init__.py +38 -0
  2. sibi_dst/{df_helper → v1/df_helper}/_artifact_updater_multi_wrapper.py +1 -1
  3. sibi_dst/{df_helper → v1/df_helper}/_df_helper.py +3 -3
  4. sibi_dst/{df_helper → v1/df_helper}/_parquet_artifact.py +2 -2
  5. sibi_dst/{df_helper → v1/df_helper}/_parquet_reader.py +2 -2
  6. sibi_dst/{df_helper → v1/df_helper}/backends/django/_load_from_db.py +3 -3
  7. sibi_dst/{df_helper → v1/df_helper}/backends/http/_http_config.py +1 -1
  8. sibi_dst/{df_helper → v1/df_helper}/backends/parquet/_filter_handler.py +1 -1
  9. sibi_dst/{df_helper → v1/df_helper}/backends/parquet/_parquet_options.py +2 -2
  10. sibi_dst/{df_helper → v1/df_helper}/backends/sqlalchemy/_io_dask.py +2 -2
  11. sibi_dst/{df_helper → v1/df_helper}/backends/sqlalchemy/_load_from_db.py +2 -2
  12. sibi_dst/{df_helper → v1/df_helper}/backends/sqlalchemy/_sql_model_builder.py +2 -1
  13. sibi_dst/{df_helper → v1/df_helper}/core/_filter_handler.py +1 -1
  14. sibi_dst/v1/osmnx_helper/__init__.py +6 -0
  15. sibi_dst/{tests → v1/tests}/test_data_wrapper_class.py +11 -10
  16. sibi_dst/{utils → v1/utils}/__init__.py +2 -0
  17. sibi_dst/{utils → v1/utils}/clickhouse_writer.py +1 -1
  18. sibi_dst/v1/utils/data_from_http_source.py +49 -0
  19. sibi_dst/{utils → v1/utils}/data_utils.py +5 -3
  20. sibi_dst/{utils → v1/utils}/data_wrapper.py +3 -1
  21. sibi_dst/{utils → v1/utils}/date_utils.py +1 -1
  22. sibi_dst/{utils → v1/utils}/file_utils.py +1 -1
  23. sibi_dst/{utils → v1/utils}/filepath_generator.py +1 -1
  24. sibi_dst/{utils → v1/utils}/parquet_saver.py +1 -1
  25. sibi_dst/v1/utils/storage_config.py +28 -0
  26. sibi_dst/v2/df_helper/__init__.py +7 -0
  27. sibi_dst/v2/df_helper/_df_helper.py +214 -0
  28. sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py +10 -0
  29. sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +82 -0
  30. sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +135 -0
  31. sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py +142 -0
  32. sibi_dst/v2/df_helper/backends/sqlalchemy/_model_builder.py +297 -0
  33. sibi_dst/v2/df_helper/backends/sqlmodel/__init__.py +9 -0
  34. sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +78 -0
  35. sibi_dst/v2/df_helper/backends/sqlmodel/_io_dask.py +122 -0
  36. sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py +142 -0
  37. sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py +283 -0
  38. sibi_dst/v2/df_helper/core/__init__.py +9 -0
  39. sibi_dst/v2/df_helper/core/_filter_handler.py +236 -0
  40. sibi_dst/v2/df_helper/core/_params_config.py +139 -0
  41. sibi_dst/v2/df_helper/core/_query_config.py +17 -0
  42. sibi_dst/v2/utils/__init__.py +5 -0
  43. sibi_dst/v2/utils/log_utils.py +120 -0
  44. {sibi_dst-0.3.45.dist-info → sibi_dst-0.3.46.dist-info}/METADATA +3 -2
  45. sibi_dst-0.3.46.dist-info/RECORD +80 -0
  46. sibi_dst/osmnx_helper/__init__.py +0 -9
  47. sibi_dst/osmnx_helper/v2/base_osm_map.py +0 -153
  48. sibi_dst/osmnx_helper/v2/basemaps/utils.py +0 -0
  49. sibi_dst-0.3.45.dist-info/RECORD +0 -62
  50. /sibi_dst/{df_helper/backends → v1}/__init__.py +0 -0
  51. /sibi_dst/{df_helper → v1/df_helper}/__init__.py +0 -0
  52. /sibi_dst/{osmnx_helper/v1 → v1/df_helper/backends}/__init__.py +0 -0
  53. /sibi_dst/{df_helper → v1/df_helper}/backends/django/__init__.py +0 -0
  54. /sibi_dst/{df_helper → v1/df_helper}/backends/django/_db_connection.py +0 -0
  55. /sibi_dst/{df_helper → v1/df_helper}/backends/django/_io_dask.py +0 -0
  56. /sibi_dst/{df_helper → v1/df_helper}/backends/django/_sql_model_builder.py +0 -0
  57. /sibi_dst/{df_helper → v1/df_helper}/backends/http/__init__.py +0 -0
  58. /sibi_dst/{df_helper → v1/df_helper}/backends/parquet/__init__.py +0 -0
  59. /sibi_dst/{df_helper → v1/df_helper}/backends/sqlalchemy/__init__.py +0 -0
  60. /sibi_dst/{df_helper → v1/df_helper}/backends/sqlalchemy/_db_connection.py +0 -0
  61. /sibi_dst/{df_helper → v1/df_helper}/backends/sqlalchemy/_filter_handler.py +0 -0
  62. /sibi_dst/{df_helper → v1/df_helper}/core/__init__.py +0 -0
  63. /sibi_dst/{df_helper → v1/df_helper}/core/_defaults.py +0 -0
  64. /sibi_dst/{df_helper → v1/df_helper}/core/_params_config.py +0 -0
  65. /sibi_dst/{df_helper → v1/df_helper}/core/_query_config.py +0 -0
  66. /sibi_dst/{df_helper → v1/df_helper}/data_cleaner.py +0 -0
  67. /sibi_dst/{geopy_helper → v1/geopy_helper}/__init__.py +0 -0
  68. /sibi_dst/{geopy_helper → v1/geopy_helper}/geo_location_service.py +0 -0
  69. /sibi_dst/{geopy_helper → v1/geopy_helper}/utils.py +0 -0
  70. /sibi_dst/{osmnx_helper/v1 → v1/osmnx_helper}/base_osm_map.py +0 -0
  71. /sibi_dst/{osmnx_helper/v1 → v1/osmnx_helper}/basemaps/__init__.py +0 -0
  72. /sibi_dst/{osmnx_helper/v1 → v1/osmnx_helper}/basemaps/calendar_html.py +0 -0
  73. /sibi_dst/{osmnx_helper/v1 → v1/osmnx_helper}/basemaps/router_plotter.py +0 -0
  74. /sibi_dst/{osmnx_helper/v1 → v1/osmnx_helper}/utils.py +0 -0
  75. /sibi_dst/{osmnx_helper/v2 → v1/tests}/__init__.py +0 -0
  76. /sibi_dst/{utils → v1/utils}/airflow_manager.py +0 -0
  77. /sibi_dst/{utils → v1/utils}/credentials.py +0 -0
  78. /sibi_dst/{utils → v1/utils}/df_utils.py +0 -0
  79. /sibi_dst/{utils → v1/utils}/log_utils.py +0 -0
  80. /sibi_dst/{utils → v1/utils}/phone_formatter.py +0 -0
  81. /sibi_dst/{utils → v1/utils}/storage_manager.py +0 -0
  82. /sibi_dst/{osmnx_helper/v2/basemaps → v2}/__init__.py +0 -0
  83. /sibi_dst/{tests → v2/df_helper/backends}/__init__.py +0 -0
  84. {sibi_dst-0.3.45.dist-info → sibi_dst-0.3.46.dist-info}/WHEEL +0 -0
@@ -0,0 +1,236 @@
1
+ import datetime
2
+ import itertools
3
+ import dask.dataframe as dd
4
+ import pandas as pd
5
+ from sqlalchemy import func, cast
6
+ from sqlalchemy.sql.sqltypes import Date, Time
7
+ from sibi_dst.v2.utils import Logger
8
+ import typing
9
+
10
+
11
+ class FilterHandler:
12
+ """
13
+ Handles the application of filters to data sources with support for SQLAlchemy, SQLModel, and Dask backends.
14
+
15
+ This class abstracts the process of applying filters to various backends, specifically
16
+ SQLAlchemy/SQLModel queries and Dask DataFrames. It supports multiple filtering operations,
17
+ including exact matches, comparisons, and string-related operations such as contains and regex.
18
+ """
19
+
20
+ def __init__(self, backend, logger=None, debug=False):
21
+ """
22
+ Initialize the FilterHandler.
23
+
24
+ Args:
25
+ backend: The backend to use ('sqlalchemy', 'sqlmodel', or 'dask').
26
+ logger: Optional logger for debugging purposes.
27
+ """
28
+ self.backend = backend
29
+ self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
30
+ self.logger.set_level(Logger.DEBUG if debug else Logger.INFO)
31
+ self.backend_methods = self._get_backend_methods(backend)
32
+
33
+ def apply_filters(self, query_or_df, model=None, filters=None):
34
+ """
35
+ Apply filters to the data source based on the backend.
36
+
37
+ Args:
38
+ query_or_df: A SQLAlchemy/SQLModel query or Dask DataFrame.
39
+ model: SQLAlchemy/SQLModel model (required for SQLAlchemy/SQLModel backend).
40
+ filters: Dictionary of filters.
41
+
42
+ Returns:
43
+ Filtered query or DataFrame.
44
+ """
45
+ filters = filters or {}
46
+ for key, value in filters.items():
47
+ field_name, casting, operation = self._parse_filter_key(key)
48
+ parsed_value = self._parse_filter_value(casting, value)
49
+ # For both SQLAlchemy and SQLModel, use the same backend methods.
50
+ if self.backend in ("sqlalchemy", "sqlmodel"):
51
+ column = self.backend_methods["get_column"](field_name, model, casting)
52
+ condition = self.backend_methods["apply_operation"](column, operation, parsed_value)
53
+ query_or_df = self.backend_methods["apply_condition"](query_or_df, condition)
54
+ elif self.backend == "dask":
55
+ column = self.backend_methods["get_column"](query_or_df, field_name, casting)
56
+ condition = self.backend_methods["apply_operation"](column, operation, parsed_value)
57
+ query_or_df = self.backend_methods["apply_condition"](query_or_df, condition)
58
+ else:
59
+ raise ValueError(f"Unsupported backend: {self.backend}")
60
+
61
+ return query_or_df
62
+
63
+ @staticmethod
64
+ def _parse_filter_key(key):
65
+ parts = key.split("__")
66
+ field_name = parts[0]
67
+ casting = None
68
+ operation = "exact"
69
+
70
+ if len(parts) == 3:
71
+ _, casting, operation = parts
72
+ elif len(parts) == 2:
73
+ if parts[1] in FilterHandler._comparison_operators():
74
+ operation = parts[1]
75
+ elif parts[1] in FilterHandler._dt_operators() + FilterHandler._date_operators():
76
+ casting = parts[1]
77
+
78
+ return field_name, casting, operation
79
+
80
+ def _parse_filter_value(self, casting, value):
81
+ """
82
+ Convert filter value to an appropriate type based on the casting (e.g., date).
83
+ """
84
+ if casting == "date":
85
+ if isinstance(value, str):
86
+ return pd.Timestamp(value) # Convert to datetime64[ns]
87
+ if isinstance(value, list):
88
+ return [pd.Timestamp(v) for v in value]
89
+ elif casting == "time" and isinstance(value, str):
90
+ parsed = datetime.time.fromisoformat(value)
91
+ self.logger.debug(f"Parsed value (time): {parsed}")
92
+ return parsed
93
+ return value
94
+
95
+ @staticmethod
96
+ def _get_backend_methods(backend):
97
+ if backend in ("sqlalchemy", "sqlmodel"):
98
+ return {
99
+ "get_column": FilterHandler._get_sqlalchemy_column,
100
+ "apply_operation": FilterHandler._apply_operation_sqlalchemy,
101
+ "apply_condition": lambda query, condition: query.filter(condition),
102
+ }
103
+ elif backend == "dask":
104
+ return {
105
+ "get_column": FilterHandler._get_dask_column,
106
+ "apply_operation": FilterHandler._apply_operation_dask,
107
+ "apply_condition": lambda df, condition: df[condition],
108
+ }
109
+ else:
110
+ raise ValueError(f"Unsupported backend: {backend}")
111
+
112
+ @staticmethod
113
+ def _get_sqlalchemy_column(field_name, model, casting):
114
+ """
115
+ Retrieve and cast a column for SQLAlchemy/SQLModel based on the field name and casting.
116
+
117
+ Args:
118
+ field_name: The name of the field/column.
119
+ model: The SQLAlchemy/SQLModel model.
120
+ casting: The casting type ('date', 'time', etc.).
121
+
122
+ Returns:
123
+ The SQLAlchemy column object, optionally cast or transformed.
124
+ """
125
+ column = getattr(model, field_name, None)
126
+ if not column:
127
+ raise AttributeError(f"Field '{field_name}' not found in model '{model.__name__}'")
128
+ if casting == "date":
129
+ column = cast(column, Date)
130
+ elif casting == "time":
131
+ column = cast(column, Time)
132
+ elif casting in FilterHandler._date_operators():
133
+ column = func.extract(casting, column)
134
+ return column
135
+
136
+ @staticmethod
137
+ def _get_dask_column(df, field_name, casting):
138
+ """
139
+ Retrieve and optionally cast a column for Dask based on the field name and casting.
140
+
141
+ Args:
142
+ df: The Dask DataFrame.
143
+ field_name: The name of the field/column.
144
+ casting: The casting type ('date', 'time', etc.).
145
+
146
+ Returns:
147
+ The Dask Series, optionally cast or transformed.
148
+ """
149
+ column = dd.to_datetime(df[field_name], errors="coerce") if casting in FilterHandler._dt_operators() else df[field_name]
150
+ if casting == "date":
151
+ column = column.dt.floor("D")
152
+ elif casting in FilterHandler._date_operators():
153
+ column = getattr(column.dt, casting)
154
+ return column
155
+
156
+ @staticmethod
157
+ def _apply_operation_sqlalchemy(column, operation, value):
158
+ operation_map = FilterHandler._operation_map_sqlalchemy()
159
+ if operation not in operation_map:
160
+ raise ValueError(f"Unsupported operation: {operation}")
161
+ return operation_map[operation](column, value)
162
+
163
+ @staticmethod
164
+ def _apply_operation_dask(column, operation, value):
165
+ operation_map = FilterHandler._operation_map_dask()
166
+ if operation not in operation_map:
167
+ raise ValueError(f"Unsupported operation: {operation}")
168
+ return operation_map[operation](column, value)
169
+
170
+ @staticmethod
171
+ def _operation_map_sqlalchemy():
172
+ return {
173
+ "exact": lambda col, val: col == val,
174
+ "gt": lambda col, val: col > val,
175
+ "gte": lambda col, val: col >= val,
176
+ "lt": lambda col, val: col < val,
177
+ "lte": lambda col, val: col <= val,
178
+ "in": lambda col, val: col.in_(val),
179
+ "range": lambda col, val: col.between(val[0], val[1]),
180
+ "contains": lambda col, val: col.like(f"%{val}%"),
181
+ "startswith": lambda col, val: col.like(f"{val}%"),
182
+ "endswith": lambda col, val: col.like(f"%{val}"),
183
+ "isnull": lambda col, val: col.is_(None) if val else col.isnot(None),
184
+ "not_exact": lambda col, val: col != val,
185
+ "not_contains": lambda col, val: ~col.like(f"%{val}%"),
186
+ "not_in": lambda col, val: ~col.in_(val),
187
+ "regex": lambda col, val: col.op("~")(val),
188
+ "icontains": lambda col, val: col.ilike(f"%{val}%"),
189
+ "istartswith": lambda col, val: col.ilike(f"{val}%"),
190
+ "iendswith": lambda col, val: col.ilike(f"%{val}"),
191
+ "iexact": lambda col, val: col.ilike(val),
192
+ "iregex": lambda col, val: col.op("~*")(val),
193
+ }
194
+
195
+ @staticmethod
196
+ def _operation_map_dask():
197
+ return {
198
+ "exact": lambda col, val: col == val,
199
+ "gt": lambda col, val: col > val,
200
+ "gte": lambda col, val: col >= val,
201
+ "lt": lambda col, val: col < val,
202
+ "lte": lambda col, val: col <= val,
203
+ "in": lambda col, val: col.isin(val),
204
+ "range": lambda col, val: (col >= val[0]) & (col <= val[1]),
205
+ "contains": lambda col, val: col.str.contains(val, regex=True),
206
+ "startswith": lambda col, val: col.str.startswith(val),
207
+ "endswith": lambda col, val: col.str.endswith(val),
208
+ "isnull": lambda col, val: col.isnull() if val else col.notnull(),
209
+ "not_exact": lambda col, val: col != val,
210
+ "not_contains": lambda col, val: ~col.str.contains(val, regex=True),
211
+ "not_in": lambda col, val: ~col.isin(val),
212
+ "regex": lambda col, val: col.str.contains(val, regex=True),
213
+ "icontains": lambda col, val: col.str.contains(val, case=False, regex=True),
214
+ "istartswith": lambda col, val: col.str.startswith(val, case=False),
215
+ "iendswith": lambda col, val: col.str.endswith(val, case=False),
216
+ "iexact": lambda col, val: col.str.contains(f"^{val}$", case=False, regex=True),
217
+ "iregex": lambda col, val: col.str.contains(val, case=False, regex=True),
218
+ }
219
+
220
+ @staticmethod
221
+ def _dt_operators():
222
+ return ["date", "time"]
223
+
224
+ @staticmethod
225
+ def _date_operators():
226
+ return ["year", "month", "day", "hour", "minute", "second", "week_day"]
227
+
228
+ @staticmethod
229
+ def _comparison_operators():
230
+ return [
231
+ "gte", "lte", "gt", "lt", "exact", "in", "range",
232
+ "contains", "startswith", "endswith", "isnull",
233
+ "not_exact", "not_contains", "not_in",
234
+ "regex", "icontains", "istartswith", "iendswith",
235
+ "iexact", "iregex"
236
+ ]
@@ -0,0 +1,139 @@
1
+ from typing import Optional, Dict, Union, List
2
+
3
+ from pydantic import BaseModel, model_validator, Field
4
+
5
+ dataframe_params: Dict[str, Union[None, str, bool, int, None]] = {
6
+ "fieldnames": None,
7
+ "index_col": None,
8
+ "coerce_float": False,
9
+ "verbose": True,
10
+ "datetime_index": False,
11
+ "column_names": None,
12
+ "chunk_size": 1000,
13
+ }
14
+ # dataframe_options is a dictionary that provides additional options for modifying a pandas DataFrame.
15
+ # These options include parameters for handling duplicate values, sorting, grouping, and other DataFrame operations.
16
+
17
+ dataframe_options: Dict[str, Union[bool, str, int, None]] = {
18
+ "debug": False, # Whether to print debug information
19
+ "duplicate_expr": None, # Expression for identifying duplicate values
20
+ "duplicate_keep": 'last', # How to handle duplicate values ('first', 'last', or False)
21
+ "sort_field": None, # Field to use for sorting the DataFrame
22
+ "group_by_expr": None, # Expression for grouping the DataFrame
23
+ "group_expr": None # Expression for aggregating functions to the grouped DataFrame
24
+ }
25
+
26
+ LOOKUP_SEP = "__"
27
+
28
+
29
+ class ParamsConfig(BaseModel):
30
+ """
31
+ Defines a configuration model for parameters with functionality for parsing,
32
+ validation, and conversion of legacy filters.
33
+
34
+ This class extends BaseModel from Pydantic and is designed to handle multiple
35
+ sets of configurations, including field mappings, filters, dataframe parameters,
36
+ and dataframe options. It allows for flexible parsing of parameters across a
37
+ variety of supported structures and ensures that legacy filters can be
38
+ appropriately converted for compatibility.
39
+
40
+ :ivar field_map: Maps field names to their equivalent legacy field names.
41
+ :type field_map: Optional[Dict]
42
+ :ivar legacy_filters: Indicates whether legacy filters should be processed.
43
+ :type legacy_filters: bool
44
+ :ivar sticky_filters: Stores additional filters as key-value pairs that persist
45
+ across parameter parsing.
46
+ :type sticky_filters: Dict[str, Union[str, bool, int, float, list, tuple]]
47
+ :ivar filters: Holds all the current filters including sticky and dynamically
48
+ parsed filters.
49
+ :type filters: Dict[str, Union[str, Dict, bool, int, float, list, tuple]]
50
+ :ivar df_params: Contains parameters related to dataframe configurations in a
51
+ structured format.
52
+ :type df_params: Dict[str, Union[tuple, str, bool, None]]
53
+ :ivar df_options: Stores optional configurations for a dataframe, allowing for
54
+ additional behavior customization.
55
+ :type df_options: Dict[str, Union[bool, str, None]]
56
+ :ivar params: Dictionary of parameters provided for configuration, supporting
57
+ both basic and nested structures.
58
+ :type params: Dict[str, Union[str, bool, int, float, List[Union[str, int, bool, float]]]]
59
+ """
60
+ field_map: Optional[Dict] = Field(default_factory=dict)
61
+ legacy_filters: bool = False
62
+ sticky_filters: Dict[str, Union[str, bool, int, float, list, tuple]] = Field(default_factory=dict)
63
+ filters: Dict[str, Union[str, Dict, bool, int, float, list, tuple]] = Field(default_factory=dict)
64
+ df_params: Dict[str, Union[tuple, str, bool, None]] = Field(default_factory=dict)
65
+ df_options: Dict[str, Union[bool, str, None]] = Field(default_factory=dict)
66
+ params: Dict[str, Union[str, bool, int, float, List[Union[str, int, bool, float]]]] = Field(default_factory=dict)
67
+
68
+ @model_validator(mode='after')
69
+ def check_params(self):
70
+ if self.params is not None:
71
+ self.parse_params(self.params)
72
+ return self
73
+
74
+ def parse_params(self, params):
75
+ """
76
+ Parses and separates the given parameters into specific categories such as dataframe parameters,
77
+ dataframe options, and filters. Updates existing class attributes with the parsed values,
78
+ retaining any sticky filters. Also handles the legacy filters if provided.
79
+
80
+ :param params: Dictionary containing parameters to process. These parameters can include specific
81
+ keys relevant for dataframe configuration (e.g., dataframe parameters, dataframe options)
82
+ as well as arbitrary filter settings.
83
+ :type params: dict
84
+ :return: None
85
+ """
86
+ self.legacy_filters = params.pop('legacy_filters', self.legacy_filters)
87
+ self.field_map = params.pop('field_map', self.field_map)
88
+ self.sticky_filters = params.pop('params', self.sticky_filters)
89
+ df_params, df_options, filters = {}, {}, {}
90
+ for k, v in params.items():
91
+ if k in dataframe_params.keys():
92
+ df_params.update({k: v})
93
+ elif k in dataframe_options.keys():
94
+ df_options.update({k: v})
95
+ else:
96
+ filters.update({k: v})
97
+ self.filters = {**self.sticky_filters, **filters}
98
+ self.df_params = {**self.df_params, **df_params}
99
+ self.df_options = {**self.df_options, **df_options}
100
+ if self.legacy_filters:
101
+ self.convert_legacy_filters()
102
+
103
+ def convert_legacy_filters(self):
104
+ """
105
+ Converts legacy filter fields in the `self.filters` dictionary to their
106
+ modern equivalents using the mappings provided in `self.field_map`.
107
+ This method ensures backward compatibility for filters by automatically
108
+ translating the old field names into the current system.
109
+
110
+ The function first verifies that the required dictionaries (`legacy_filters`,
111
+ `field_map`, `filters`) are valid. It creates a reverse map of `field_map` for
112
+ efficient lookup, processes the key names within `self.filters`, and updates
113
+ them to reflect the legacy mapping.
114
+
115
+ :raises KeyError: If any required dictionary key is missing during processing.
116
+
117
+ :param self.legacy_filters: A boolean flag indicating whether legacy filters
118
+ are being used.
119
+ :type self.legacy_filters: bool
120
+
121
+ """
122
+ if not self.legacy_filters or not self.field_map or not self.filters:
123
+ return
124
+ # create a reverse map of the field_map
125
+ reverse_map = {v: k for k, v in self.field_map.items()}
126
+
127
+ new_filters = {}
128
+ for filter_field, value in self.filters.items():
129
+ # split the filter_field if LOOKUP_SEP exists
130
+ parts = filter_field.split(LOOKUP_SEP, 1)
131
+
132
+ # replace each part with its legacy equivalent if it exists
133
+ new_parts = [reverse_map.get(part, part) for part in parts]
134
+
135
+ # join the parts back together and add to the new filters
136
+ new_filter_field = LOOKUP_SEP.join(new_parts)
137
+ new_filters[new_filter_field] = value
138
+
139
+ self.filters = new_filters
@@ -0,0 +1,17 @@
1
+ from typing import Optional
2
+
3
+ from pydantic import BaseModel, model_validator
4
+
5
+
6
+ class QueryConfig(BaseModel):
7
+ use_exclude: bool = False
8
+ n_records: int = 100
9
+ dt_field: Optional[str] = None
10
+ use_dask: bool = False
11
+ as_dask: bool = False
12
+
13
+ @model_validator(mode='after')
14
+ def check_n_records(self):
15
+ if self.n_records < 0:
16
+ raise ValueError('Number of records must be non-negative')
17
+ return self
@@ -0,0 +1,5 @@
1
+ from .log_utils import Logger
2
+
3
+ __all__ = [
4
+ "Logger",
5
+ ]
@@ -0,0 +1,120 @@
1
+ import logging
2
+ import os
3
+ import sys
4
+ import time
5
+ from typing import Optional
6
+
7
+ class Logger:
8
+ """
9
+ Handles the creation, setup, and management of logging functionalities.
10
+ """
11
+ DEBUG = logging.DEBUG
12
+ INFO = logging.INFO
13
+ WARNING = logging.WARNING
14
+ ERROR = logging.ERROR
15
+ CRITICAL = logging.CRITICAL
16
+
17
+ def __init__(self, log_dir: str, logger_name: str, log_file: str, debug: bool = False):
18
+ """
19
+ Initialize the Logger instance.
20
+ :param log_dir: Directory where logs are stored.
21
+ :param logger_name: Name of the logger instance.
22
+ :param log_file: Base name of the log file.
23
+ :param debug: Whether to enable debug mode (defaults to False).
24
+ """
25
+ self.log_dir = log_dir
26
+ self.logger_name = logger_name
27
+ self.log_file = log_file
28
+ self.debug_mode = debug # Add a debug flag
29
+ self.logger = None
30
+ self._setup()
31
+
32
+ def _setup(self):
33
+ """Set up the logger with file and console handlers."""
34
+ # Ensure the log directory exists
35
+ os.makedirs(self.log_dir, exist_ok=True)
36
+
37
+ # Get the name of the calling script
38
+ calling_script = os.path.splitext(os.path.basename(sys.argv[0]))[0]
39
+
40
+ # Create a log file path
41
+ log_file_path = os.path.join(self.log_dir, f"{self.log_file}_{calling_script}.log")
42
+
43
+ # Delete the existing log file if it exists
44
+ if os.path.exists(log_file_path):
45
+ os.remove(log_file_path)
46
+
47
+ # Create a logger
48
+ self.logger = logging.getLogger(self.logger_name)
49
+ self.logger.setLevel(self._get_log_level()) # Dynamically set the log level
50
+
51
+ # Create a formatter
52
+ formatter = logging.Formatter(
53
+ '[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s',
54
+ datefmt='%Y-%m-%d %H:%M:%S'
55
+ )
56
+ formatter.converter = time.localtime # Set local time explicitly
57
+
58
+ # Create a file handler
59
+ file_handler = logging.FileHandler(log_file_path)
60
+ file_handler.setFormatter(formatter)
61
+ self.logger.addHandler(file_handler)
62
+
63
+ # Create a console handler (optional)
64
+ console_handler = logging.StreamHandler()
65
+ console_handler.setFormatter(formatter)
66
+ self.logger.addHandler(console_handler)
67
+
68
+ def _get_log_level(self) -> int:
69
+ """
70
+ Determine the logging level based on the debug flag.
71
+ :return: Logging level (DEBUG if debug is True, otherwise INFO).
72
+ """
73
+ return logging.DEBUG if self.debug_mode else logging.INFO
74
+
75
+ @classmethod
76
+ def default_logger(
77
+ cls,
78
+ log_dir: str = './logs/',
79
+ logger_name: Optional[str] = None,
80
+ log_file: Optional[str] = None,
81
+ debug: bool = False
82
+ ) -> 'Logger':
83
+ """
84
+ Class-level method to create a default logger with generic parameters.
85
+ :param log_dir: Directory where logs are stored (defaults to './logs/').
86
+ :param logger_name: Name of the logger (defaults to __name__).
87
+ :param log_file: Name of the log file (defaults to logger_name).
88
+ :param debug: Whether to enable debug mode (defaults to False).
89
+ :return: Instance of Logger.
90
+ """
91
+ logger_name = logger_name or __name__
92
+ log_file = log_file or logger_name
93
+ return cls(log_dir=log_dir, logger_name=logger_name, log_file=log_file, debug=debug)
94
+
95
+ def set_level(self, level: int):
96
+ """
97
+ Set the logging level for the logger.
98
+ :param level: Logging level (e.g., logging.DEBUG, logging.INFO).
99
+ """
100
+ self.logger.setLevel(level)
101
+
102
+ def debug(self, msg: str):
103
+ """Log a debug message."""
104
+ self.logger.debug(msg)
105
+
106
+ def info(self, msg: str):
107
+ """Log an info message."""
108
+ self.logger.info(msg)
109
+
110
+ def warning(self, msg: str):
111
+ """Log a warning message."""
112
+ self.logger.warning(msg)
113
+
114
+ def error(self, msg: str):
115
+ """Log an error message."""
116
+ self.logger.error(msg)
117
+
118
+ def critical(self, msg: str):
119
+ """Log a critical message."""
120
+ self.logger.critical(msg)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sibi-dst
3
- Version: 0.3.45
3
+ Version: 0.3.46
4
4
  Summary: Data Science Toolkit
5
5
  Author: Luis Valverde
6
6
  Author-email: lvalverdeb@gmail.com
@@ -14,12 +14,12 @@ Requires-Dist: chardet (>=5.2.0,<6.0.0)
14
14
  Requires-Dist: charset-normalizer (>=3.4.0,<4.0.0)
15
15
  Requires-Dist: clickhouse-connect (>=0.8.7,<0.9.0)
16
16
  Requires-Dist: clickhouse-driver (>=0.2.9,<0.3.0)
17
- Requires-Dist: dask-expr (>=1.1.20,<2.0.0)
18
17
  Requires-Dist: dask[complete] (>=2024.11.1,<2025.0.0)
19
18
  Requires-Dist: django (>=5.1.4,<6.0.0)
20
19
  Requires-Dist: djangorestframework (>=3.15.2,<4.0.0)
21
20
  Requires-Dist: folium (>=0.19.4,<0.20.0)
22
21
  Requires-Dist: geopandas (>=1.0.1,<2.0.0)
22
+ Requires-Dist: geopy (>=2.4.1,<3.0.0)
23
23
  Requires-Dist: gunicorn (>=23.0.0,<24.0.0)
24
24
  Requires-Dist: httpx (>=0.27.2,<0.28.0)
25
25
  Requires-Dist: ipython (>=8.29.0,<9.0.0)
@@ -40,6 +40,7 @@ Requires-Dist: pytest-mock (>=3.14.0,<4.0.0)
40
40
  Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
41
41
  Requires-Dist: s3fs (>=2024.12.0,<2025.0.0)
42
42
  Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0)
43
+ Requires-Dist: sqlmodel (>=0.0.22,<0.0.23)
43
44
  Requires-Dist: tornado (>=6.4.1,<7.0.0)
44
45
  Requires-Dist: tqdm (>=4.67.0,<5.0.0)
45
46
  Requires-Dist: uvicorn (>=0.34.0,<0.35.0)
@@ -0,0 +1,80 @@
1
+ sibi_dst/__init__.py,sha256=uwF7KQ9TjongYh3qIA36hSSjR85TVJKuZI0WUPqLYJg,1576
2
+ sibi_dst/v1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ sibi_dst/v1/df_helper/__init__.py,sha256=A-f5cCBy949HHxgiPt0T4MG3qdLAnDpGOpRvP-2dXWc,400
4
+ sibi_dst/v1/df_helper/_artifact_updater_multi_wrapper.py,sha256=Ghfee9dELU8CGUMdWeZxyThSFZMd6jTGKednLrRKh4U,11649
5
+ sibi_dst/v1/df_helper/_df_helper.py,sha256=veist8sExgeOT3Xpjp5L9n3djQdQoG435oPQx-NluF8,29780
6
+ sibi_dst/v1/df_helper/_parquet_artifact.py,sha256=HOgfANd3ivGBCHf63q8UN-75NWyo0Tr7fzRqfeVKB3I,10202
7
+ sibi_dst/v1/df_helper/_parquet_reader.py,sha256=fmSepQmxjWp59gbecAd_ThMC68gDnKMaAUeciVRVpQY,3960
8
+ sibi_dst/v1/df_helper/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ sibi_dst/v1/df_helper/backends/django/__init__.py,sha256=uWHi-DtQX5re7b2HcqoXUH3_FZWOw1VTmDf552FAkNs,256
10
+ sibi_dst/v1/df_helper/backends/django/_db_connection.py,sha256=AGbqCnmiX4toMaFPE5ne5h7QCkImjnBKvzGtUD6Ge8Q,3698
11
+ sibi_dst/v1/df_helper/backends/django/_io_dask.py,sha256=NjvJg6y9qKKCRiNrJL4f_A03iKDKEcjCi7LGbr9DgtM,19555
12
+ sibi_dst/v1/df_helper/backends/django/_load_from_db.py,sha256=Htr4jDYB6MQQqQyJKOiqv9AfheUXlL4XzOlL3-wFBeY,10646
13
+ sibi_dst/v1/df_helper/backends/django/_sql_model_builder.py,sha256=at9J7ecGkZbOOYba85uofe9C-ic4wwOqVgJcHpQNiYQ,21449
14
+ sibi_dst/v1/df_helper/backends/http/__init__.py,sha256=d1pfgYxbiYg7E0Iw8RbJ7xfqIfJShqqTBQQGU_S6OOo,105
15
+ sibi_dst/v1/df_helper/backends/http/_http_config.py,sha256=YFpduRY-00dNwGJwFuHleGyp5mP56IfPdhSY_kTJ6O0,4729
16
+ sibi_dst/v1/df_helper/backends/parquet/__init__.py,sha256=esWJ9aSuYC26d-T01z9dPrJ1uqJzvdaPNTYRb5qXTlQ,182
17
+ sibi_dst/v1/df_helper/backends/parquet/_filter_handler.py,sha256=hBhrMLHFIOoVWEVyc6Jyp_XTopg6u7i3s0FhmVpISyI,5222
18
+ sibi_dst/v1/df_helper/backends/parquet/_parquet_options.py,sha256=yueBidL07wYr6EMZCERHqX0FPTZCGBHaZD6AXFcclCo,10597
19
+ sibi_dst/v1/df_helper/backends/sqlalchemy/__init__.py,sha256=TuVp8Ce49dCIIxtyrtFGRblarQUl8QGcS-TDZd515IE,348
20
+ sibi_dst/v1/df_helper/backends/sqlalchemy/_db_connection.py,sha256=Kli83IEg5SFVqkhsK4w45cV6PbZnfdGanfsyiW6Xw00,2502
21
+ sibi_dst/v1/df_helper/backends/sqlalchemy/_filter_handler.py,sha256=58RCda1Hg_nsuJw-2V36IstsT8O84IQFgsdE7FnqvMk,4655
22
+ sibi_dst/v1/df_helper/backends/sqlalchemy/_io_dask.py,sha256=FvsNYb__wmT-D8pZYEwfrq6uPJihi77AfEAinkuGiwo,5472
23
+ sibi_dst/v1/df_helper/backends/sqlalchemy/_load_from_db.py,sha256=PFA22DF3avh2jPEt6uqN16jpxRwPZIzs_E5qMjcdw9M,6265
24
+ sibi_dst/v1/df_helper/backends/sqlalchemy/_sql_model_builder.py,sha256=ksvJ0EvktrVsoJ9DTMIQHzHe8ghw2mzDIBD_YgWytgw,8402
25
+ sibi_dst/v1/df_helper/core/__init__.py,sha256=o4zDwgVmaijde3oix0ezb6KLxI5QFy-SGUhFTDVFLT4,569
26
+ sibi_dst/v1/df_helper/core/_defaults.py,sha256=eNpHD2sZxir-2xO0b3_V16ryw8YP_5FfpIKK0HNuiN4,7011
27
+ sibi_dst/v1/df_helper/core/_filter_handler.py,sha256=TLUSEiGudCTmOv5htAFi-RQtXXU367Wk6cqLKVSbT-U,11217
28
+ sibi_dst/v1/df_helper/core/_params_config.py,sha256=DYx2drDz3uF-lSPzizPkchhy-kxRrQKE5FQRxcEWsac,6736
29
+ sibi_dst/v1/df_helper/core/_query_config.py,sha256=Y8LVSyaKuVkrPluRDkQoOwuXHQxner1pFWG3HPfnDHM,441
30
+ sibi_dst/v1/df_helper/data_cleaner.py,sha256=lkxQoXLvGzXCicFUimnA5nen5qkrO1oxgl_p2Be2o8w,5183
31
+ sibi_dst/v1/geopy_helper/__init__.py,sha256=Q1RJiUZIOlV0QNNLjxZ_2IZS5LqIe5jRbeQkfD1Vm60,112
32
+ sibi_dst/v1/geopy_helper/geo_location_service.py,sha256=1ArI980QF_gRw096ZsABHwJt-m55jrfOlB8tPwL1BvY,2959
33
+ sibi_dst/v1/geopy_helper/utils.py,sha256=Sb7qfSqIyWh-AZ4GBdB9-z5FrQPWtrdtQLLcNjph0yw,3351
34
+ sibi_dst/v1/osmnx_helper/__init__.py,sha256=eHM2XenOdI-Rc1deeUCVoT_OidtPJRMcveKenqxzCJM,116
35
+ sibi_dst/v1/osmnx_helper/base_osm_map.py,sha256=L7g3VBiayHX41BcCBTOCS0iJOKzp2ZZYcrp8N-mnU90,19392
36
+ sibi_dst/v1/osmnx_helper/basemaps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
+ sibi_dst/v1/osmnx_helper/basemaps/calendar_html.py,sha256=UArt6FDgoCgoRte45Xo3IHqd-RNzW0YgitgZYfOFasY,4031
38
+ sibi_dst/v1/osmnx_helper/basemaps/router_plotter.py,sha256=SWdDz5XGDSHT6Iyr-EIatSNTvGPR3AVDJ5TTcWm0w4g,10947
39
+ sibi_dst/v1/osmnx_helper/utils.py,sha256=BzuY8CtYnBAAO8UAr_M7EOk6CP1zcifNLs8pkdFZEFg,20577
40
+ sibi_dst/v1/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
+ sibi_dst/v1/tests/test_data_wrapper_class.py,sha256=hCSZ0aubhWNlu2pBV8jG56UsSHhnjQ6hHLdN4Q4YLTw,3342
42
+ sibi_dst/v1/utils/__init__.py,sha256=QE4ZRfLWdsjMYJIxznjJlPoLneK8RPPf9gIh6aEhxkY,938
43
+ sibi_dst/v1/utils/airflow_manager.py,sha256=-d44EKUZNYJyp4wuNwRvilRQktunArPOB5fZuWdQv10,7526
44
+ sibi_dst/v1/utils/clickhouse_writer.py,sha256=JE_WMsnLTlxoABIDLuvrDUVHkTidlev8gSC4krDag3M,9879
45
+ sibi_dst/v1/utils/credentials.py,sha256=cHJPPsmVyijqbUQIq7WWPe-lIallA-mI5RAy3YUuRME,1724
46
+ sibi_dst/v1/utils/data_from_http_source.py,sha256=AcpKNsqTgN2ClNwuhgUpuNCx62r5_DdsAiKY8vcHEBA,1867
47
+ sibi_dst/v1/utils/data_utils.py,sha256=MqbwXk33BuANWeKKmsabHouhb8GZswSmbM-VetWWE-M,10357
48
+ sibi_dst/v1/utils/data_wrapper.py,sha256=pIIQxeHknUeQd0YbISkAhL-xYBK4OdijoATBY-oBznw,12114
49
+ sibi_dst/v1/utils/date_utils.py,sha256=7cqgC6WEcfkh6BKTgq-kyig4H9rf_0VzpySPYElSo_0,18359
50
+ sibi_dst/v1/utils/df_utils.py,sha256=GAX0lthULTmGaDvYzuLmo0op7YKaCM5uot403QpztoM,11278
51
+ sibi_dst/v1/utils/file_utils.py,sha256=Z99CZ_4nPDIaZqbCfzzUDfAYJjSudWDj-mwEO8grhbc,1253
52
+ sibi_dst/v1/utils/filepath_generator.py,sha256=-HHO0U-PR8fysDDFwnWdHRlgqksh_RkmgBZLWv9hM7s,6669
53
+ sibi_dst/v1/utils/log_utils.py,sha256=eSAbi_jmMpJ8RpycakzT4S4zNkqVZDj3FY8WwnxpdXc,4623
54
+ sibi_dst/v1/utils/parquet_saver.py,sha256=Tucxv9jRX66VuLQZn0dPQBN7JOttBou6SF8FxqufeGE,8169
55
+ sibi_dst/v1/utils/phone_formatter.py,sha256=tsVTDamuthFYgy4-5UwmQkPQ-FGTGH7MjZyH8utAkIY,4945
56
+ sibi_dst/v1/utils/storage_config.py,sha256=ugM70OHo63dN7LPukl0FZTWwXKBuoCILFh3RdNEeMgY,1239
57
+ sibi_dst/v1/utils/storage_manager.py,sha256=H_itUFJv9nP0BfXYYQDsw4RzB0YWfgVOAHNWAiMpZ_w,4443
58
+ sibi_dst/v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
+ sibi_dst/v2/df_helper/__init__.py,sha256=XuH6jKYAPg2DdRbsxxBSxp9X3x-ARyaT0xe27uILrVo,99
60
+ sibi_dst/v2/df_helper/_df_helper.py,sha256=9pED3bjQ2Z81zqzJrZ9e7SguoO4-hBmNTJK4WOKrr4M,9297
61
+ sibi_dst/v2/df_helper/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
+ sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py,sha256=MOEedyWqcb1_RiRYKyyWX0uFNCfBgmyYbTjco8-GBxU,262
63
+ sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py,sha256=8u3jdD0sR2fmm2H75GDdygoqiqDI6-N-azOJsLgUWFA,3189
64
+ sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py,sha256=6V9DruwckEsonYW5YvBY93-NzXYHbTA7OsyMKMYIZEs,5472
65
+ sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py,sha256=jhgN0OO5Sk1zQFHrMUhJn2F_hHB5g3x3EJ8j5PXNb0U,6295
66
+ sibi_dst/v2/df_helper/backends/sqlalchemy/_model_builder.py,sha256=jX_mQAzl_6xdh7CTYw4uvUIX2wMp3NzXMlfbC5alOzs,13632
67
+ sibi_dst/v2/df_helper/backends/sqlmodel/__init__.py,sha256=LcwJjVVxxrnVZalWqnz5m7r77i9tmJR0-U2k8eSQ-m8,249
68
+ sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py,sha256=b5xmxQr4a8fhE4qdCGJrNWjjX1NW5hrPNLmlfP20rIg,2897
69
+ sibi_dst/v2/df_helper/backends/sqlmodel/_io_dask.py,sha256=wVgNPo5V75aLtlZr_SIQ-yteyXq-Rg93eMfR8JCfkSo,5422
70
+ sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py,sha256=FIs6UrNxdJ7eDHDvTv-cJuybIue2-oCRedhW-MNe7CU,6285
71
+ sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py,sha256=k0dnMLkLMMvkDYDYWkGFgibW5UD8pJgB3YrEg_R7pj8,13556
72
+ sibi_dst/v2/df_helper/core/__init__.py,sha256=rZhBh32Rgcxj4MBii-KsYVJQmrT9egiWKXk68gWKblo,197
73
+ sibi_dst/v2/df_helper/core/_filter_handler.py,sha256=54jyz7OUigUqwlyl5gzy1d7aJ_oXV3aMORCoqZIf6sY,10100
74
+ sibi_dst/v2/df_helper/core/_params_config.py,sha256=DYx2drDz3uF-lSPzizPkchhy-kxRrQKE5FQRxcEWsac,6736
75
+ sibi_dst/v2/df_helper/core/_query_config.py,sha256=Y8LVSyaKuVkrPluRDkQoOwuXHQxner1pFWG3HPfnDHM,441
76
+ sibi_dst/v2/utils/__init__.py,sha256=6H4cvhqTiFufnFPETBF0f8beVVMpfJfvUs6Ne0TQZNY,58
77
+ sibi_dst/v2/utils/log_utils.py,sha256=rfk5VsLAt-FKpv6aPTC1FToIPiyrnHAFFBAkHme24po,4123
78
+ sibi_dst-0.3.46.dist-info/METADATA,sha256=Z9QXdfU6JmUrcR45dFKItdGtHrcR7UBlC37adJWrz-A,6552
79
+ sibi_dst-0.3.46.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
80
+ sibi_dst-0.3.46.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from .v1.base_osm_map import BaseOsmMap
4
- from .v1.utils import PBFHandler
5
-
6
- __all__ = [
7
- "BaseOsmMap",
8
- "PBFHandler",
9
- ]