sibi-dst 0.3.44__py3-none-any.whl → 0.3.46__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. sibi_dst/__init__.py +38 -0
  2. sibi_dst/{df_helper → v1/df_helper}/_artifact_updater_multi_wrapper.py +1 -1
  3. sibi_dst/{df_helper → v1/df_helper}/_df_helper.py +3 -3
  4. sibi_dst/{df_helper → v1/df_helper}/_parquet_artifact.py +3 -3
  5. sibi_dst/{df_helper → v1/df_helper}/_parquet_reader.py +2 -2
  6. sibi_dst/{df_helper → v1/df_helper}/backends/django/_load_from_db.py +3 -3
  7. sibi_dst/{df_helper → v1/df_helper}/backends/http/_http_config.py +1 -1
  8. sibi_dst/{df_helper → v1/df_helper}/backends/parquet/_filter_handler.py +1 -1
  9. sibi_dst/{df_helper → v1/df_helper}/backends/parquet/_parquet_options.py +2 -2
  10. sibi_dst/{df_helper → v1/df_helper}/backends/sqlalchemy/_io_dask.py +2 -2
  11. sibi_dst/{df_helper → v1/df_helper}/backends/sqlalchemy/_load_from_db.py +2 -2
  12. sibi_dst/{df_helper → v1/df_helper}/backends/sqlalchemy/_sql_model_builder.py +2 -1
  13. sibi_dst/{df_helper → v1/df_helper}/core/_filter_handler.py +1 -1
  14. sibi_dst/v1/osmnx_helper/__init__.py +6 -0
  15. sibi_dst/{tests → v1/tests}/test_data_wrapper_class.py +11 -10
  16. sibi_dst/{utils → v1/utils}/__init__.py +2 -0
  17. sibi_dst/{utils → v1/utils}/clickhouse_writer.py +1 -1
  18. sibi_dst/v1/utils/data_from_http_source.py +49 -0
  19. sibi_dst/{utils → v1/utils}/data_utils.py +5 -3
  20. sibi_dst/{utils → v1/utils}/data_wrapper.py +3 -1
  21. sibi_dst/{utils → v1/utils}/date_utils.py +1 -1
  22. sibi_dst/{utils → v1/utils}/file_utils.py +1 -1
  23. sibi_dst/{utils → v1/utils}/filepath_generator.py +1 -1
  24. sibi_dst/{utils → v1/utils}/parquet_saver.py +1 -1
  25. sibi_dst/v1/utils/storage_config.py +28 -0
  26. sibi_dst/v2/df_helper/__init__.py +7 -0
  27. sibi_dst/v2/df_helper/_df_helper.py +214 -0
  28. sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py +10 -0
  29. sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +82 -0
  30. sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +135 -0
  31. sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py +142 -0
  32. sibi_dst/v2/df_helper/backends/sqlalchemy/_model_builder.py +297 -0
  33. sibi_dst/v2/df_helper/backends/sqlmodel/__init__.py +9 -0
  34. sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +78 -0
  35. sibi_dst/v2/df_helper/backends/sqlmodel/_io_dask.py +122 -0
  36. sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py +142 -0
  37. sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py +283 -0
  38. sibi_dst/v2/df_helper/core/__init__.py +9 -0
  39. sibi_dst/v2/df_helper/core/_filter_handler.py +236 -0
  40. sibi_dst/v2/df_helper/core/_params_config.py +139 -0
  41. sibi_dst/v2/df_helper/core/_query_config.py +17 -0
  42. sibi_dst/v2/utils/__init__.py +5 -0
  43. sibi_dst/v2/utils/log_utils.py +120 -0
  44. {sibi_dst-0.3.44.dist-info → sibi_dst-0.3.46.dist-info}/METADATA +3 -2
  45. sibi_dst-0.3.46.dist-info/RECORD +80 -0
  46. sibi_dst/osmnx_helper/__init__.py +0 -9
  47. sibi_dst/osmnx_helper/v2/base_osm_map.py +0 -153
  48. sibi_dst/osmnx_helper/v2/basemaps/utils.py +0 -0
  49. sibi_dst-0.3.44.dist-info/RECORD +0 -62
  50. /sibi_dst/{df_helper/backends → v1}/__init__.py +0 -0
  51. /sibi_dst/{df_helper → v1/df_helper}/__init__.py +0 -0
  52. /sibi_dst/{osmnx_helper/v1 → v1/df_helper/backends}/__init__.py +0 -0
  53. /sibi_dst/{df_helper → v1/df_helper}/backends/django/__init__.py +0 -0
  54. /sibi_dst/{df_helper → v1/df_helper}/backends/django/_db_connection.py +0 -0
  55. /sibi_dst/{df_helper → v1/df_helper}/backends/django/_io_dask.py +0 -0
  56. /sibi_dst/{df_helper → v1/df_helper}/backends/django/_sql_model_builder.py +0 -0
  57. /sibi_dst/{df_helper → v1/df_helper}/backends/http/__init__.py +0 -0
  58. /sibi_dst/{df_helper → v1/df_helper}/backends/parquet/__init__.py +0 -0
  59. /sibi_dst/{df_helper → v1/df_helper}/backends/sqlalchemy/__init__.py +0 -0
  60. /sibi_dst/{df_helper → v1/df_helper}/backends/sqlalchemy/_db_connection.py +0 -0
  61. /sibi_dst/{df_helper → v1/df_helper}/backends/sqlalchemy/_filter_handler.py +0 -0
  62. /sibi_dst/{df_helper → v1/df_helper}/core/__init__.py +0 -0
  63. /sibi_dst/{df_helper → v1/df_helper}/core/_defaults.py +0 -0
  64. /sibi_dst/{df_helper → v1/df_helper}/core/_params_config.py +0 -0
  65. /sibi_dst/{df_helper → v1/df_helper}/core/_query_config.py +0 -0
  66. /sibi_dst/{df_helper → v1/df_helper}/data_cleaner.py +0 -0
  67. /sibi_dst/{geopy_helper → v1/geopy_helper}/__init__.py +0 -0
  68. /sibi_dst/{geopy_helper → v1/geopy_helper}/geo_location_service.py +0 -0
  69. /sibi_dst/{geopy_helper → v1/geopy_helper}/utils.py +0 -0
  70. /sibi_dst/{osmnx_helper/v1 → v1/osmnx_helper}/base_osm_map.py +0 -0
  71. /sibi_dst/{osmnx_helper/v1 → v1/osmnx_helper}/basemaps/__init__.py +0 -0
  72. /sibi_dst/{osmnx_helper/v1 → v1/osmnx_helper}/basemaps/calendar_html.py +0 -0
  73. /sibi_dst/{osmnx_helper/v1 → v1/osmnx_helper}/basemaps/router_plotter.py +0 -0
  74. /sibi_dst/{osmnx_helper/v1 → v1/osmnx_helper}/utils.py +0 -0
  75. /sibi_dst/{osmnx_helper/v2 → v1/tests}/__init__.py +0 -0
  76. /sibi_dst/{utils → v1/utils}/airflow_manager.py +0 -0
  77. /sibi_dst/{utils → v1/utils}/credentials.py +0 -0
  78. /sibi_dst/{utils → v1/utils}/df_utils.py +0 -0
  79. /sibi_dst/{utils → v1/utils}/log_utils.py +0 -0
  80. /sibi_dst/{utils → v1/utils}/phone_formatter.py +0 -0
  81. /sibi_dst/{utils → v1/utils}/storage_manager.py +0 -0
  82. /sibi_dst/{osmnx_helper/v2/basemaps → v2}/__init__.py +0 -0
  83. /sibi_dst/{tests → v2/df_helper/backends}/__init__.py +0 -0
  84. {sibi_dst-0.3.44.dist-info → sibi_dst-0.3.46.dist-info}/WHEEL +0 -0
@@ -0,0 +1,283 @@
1
+ import re
2
+ from collections import defaultdict
3
+ from datetime import datetime
4
+ from typing import Any, Dict, List, Optional, Tuple, Type, get_args, get_origin
5
+
6
+ from sqlalchemy import and_, inspect, cast, func
7
+ from sqlalchemy.exc import ArgumentError, NoForeignKeysError
8
+ from sqlalchemy.orm import relationship, foreign, configure_mappers, clear_mappers
9
+ from sqlalchemy.sql.sqltypes import Integer, String, Float, DateTime, Boolean, Numeric, Text
10
+
11
+ from sqlmodel import SQLModel, create_engine
12
+ from sibi_dst.v2.utils import Logger
13
+
14
+ APPS_LABEL = "datacubes"
15
+ RESERVED_COLUMN_NAMES = {"metadata", "class_", "table"}
16
+ RESERVED_KEYWORDS = {"class", "def", "return", "yield", "global"}
17
+
18
+ MODEL_REGISTRY: Dict[str, Type] = {}
19
+
20
+
21
+ class SQLModelModelBuilder:
22
+ """
23
+ Dynamically builds an ORM model for a single table by reflecting its columns
24
+ and reverse-engineering its relationships from foreign key metadata using SQLModel.
25
+ The generated model is mapped solely via its reflected __table__ attribute.
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ engine,
31
+ table_name: str,
32
+ add_relationships: bool = False,
33
+ debug: bool = False,
34
+ logger: Optional[Logger] = None,
35
+ ) -> None:
36
+ self.engine = engine
37
+ self.table_name = table_name
38
+ self.add_relationships = add_relationships
39
+ self.debug = debug
40
+ self.logger = logger or Logger.default_logger(logger_name="sqlmodel_model_builder", debug=self.debug)
41
+ # Use SQLModel's shared metadata.
42
+ self.metadata = SQLModel.metadata
43
+ self.metadata.bind = self.engine
44
+
45
+ try:
46
+ self.metadata.reflect(only=[table_name], bind=self.engine)
47
+ except Exception as e:
48
+ self.logger.warning(f"Could not reflect table '{table_name}': {e}. Skipping model build.")
49
+ self.table = None
50
+ else:
51
+ self.table = self.metadata.tables.get(table_name)
52
+ if self.table is None:
53
+ self.logger.warning(f"Table '{table_name}' not found in the database. Skipping model build.")
54
+ self.model_name: str = self.normalize_class_name(table_name)
55
+ if self.debug:
56
+ self.logger.debug(f"Reflected table for '{table_name}': {self.table}")
57
+
58
+ def build_model(self) -> Optional[Type]:
59
+ try:
60
+ self.metadata.reflect(only=[self.table_name], bind=self.engine)
61
+ except Exception as e:
62
+ self.logger.warning(f"Could not reflect table '{self.table_name}': {e}. Skipping model build.")
63
+ return None
64
+
65
+ self.table = self.metadata.tables.get(self.table_name)
66
+ if self.table is None:
67
+ self.logger.warning(f"Table '{self.table_name}' not found in the database. Skipping model build.")
68
+ return None
69
+
70
+ # Force registration of the reflected table in the metadata.
71
+ try:
72
+ self.metadata._add_table(self.table_name, None, self.table)
73
+ except Exception as e:
74
+ self.logger.debug(f"Error forcing table registration: {e}")
75
+
76
+ columns, annotations = self.get_columns(self.table)
77
+ # Build the mapping dictionary using only __table__.
78
+ attrs: Dict[str, Any] = {
79
+ "__table__": self.table,
80
+ "__module__": f"{APPS_LABEL}.models",
81
+ "__mapper_args__": {"eager_defaults": True},
82
+ "__annotations__": annotations,
83
+ }
84
+ attrs.update(columns)
85
+ if self.add_relationships:
86
+ self._add_relationships(attrs, self.table)
87
+ model = type(self.model_name, (SQLModel,), attrs)
88
+ MODEL_REGISTRY[self.table_name] = model
89
+
90
+ try:
91
+ configure_mappers()
92
+ self.logger.debug(f"Configured mappers for model {self.model_name}.")
93
+ except Exception as e:
94
+ self.logger.error(f"Mapper configuration error for model {self.model_name}: {e}")
95
+ raise ValueError(f"Invalid mapping in model {self.model_name}: {e}") from e
96
+
97
+ # Register the mapping.
98
+ SQLModel.metadata.create_all(self.engine)
99
+ self.logger.debug(f"Created model {self.model_name} for table {self.table_name}.")
100
+ return model
101
+
102
+ def get_columns(self, table: Any) -> Tuple[Dict[str, Any], Dict[str, Any]]:
103
+ cols: Dict[str, Any] = {}
104
+ annotations: Dict[str, Any] = {}
105
+ for column in table.columns:
106
+ norm_name = self.normalize_column_name(column.name)
107
+ if norm_name in RESERVED_COLUMN_NAMES:
108
+ continue
109
+ if norm_name in cols:
110
+ self.logger.warning(f"Duplicate normalized column name '{norm_name}'; skipping duplicate for column '{column.name}'.")
111
+ continue
112
+ cols[norm_name] = column
113
+ annotations[norm_name] = self._python_type_for_column(column)
114
+ return cols, annotations
115
+
116
+ def _python_type_for_column(self, column: Any) -> Any:
117
+ col_type = type(column.type)
118
+ if issubclass(col_type, Integer):
119
+ return int
120
+ elif issubclass(col_type, (String, Text)):
121
+ return str
122
+ elif issubclass(col_type, (Float, Numeric)):
123
+ return float
124
+ elif issubclass(col_type, DateTime):
125
+ return datetime
126
+ elif issubclass(col_type, Boolean):
127
+ return bool
128
+ else:
129
+ return Any
130
+
131
+ def _add_relationships(self, attrs: Dict[str, Any], table: Any) -> None:
132
+ inspector = inspect(self.engine)
133
+ fk_info_list = inspector.get_foreign_keys(self.table.name)
134
+ fk_groups = defaultdict(list)
135
+ for fk_info in fk_info_list:
136
+ referred_table = fk_info.get("referred_table")
137
+ if referred_table:
138
+ fk_groups[referred_table].append(fk_info)
139
+
140
+ for related_table_name, fk_dicts in fk_groups.items():
141
+ try:
142
+ if related_table_name not in MODEL_REGISTRY:
143
+ self.logger.debug(f"Building missing model for related table {related_table_name}.")
144
+ remote_model = SQLModelModelBuilder(
145
+ self.engine,
146
+ related_table_name,
147
+ add_relationships=False,
148
+ debug=self.debug,
149
+ logger=self.logger,
150
+ ).build_model()
151
+ if related_table_name not in MODEL_REGISTRY or remote_model is None:
152
+ raise ValueError(f"Failed to build model for table {related_table_name}.")
153
+ else:
154
+ remote_model = MODEL_REGISTRY[related_table_name]
155
+ except Exception as e:
156
+ self.logger.warning(f"Could not build model for table {related_table_name}: {e}")
157
+ continue
158
+
159
+ remote_table = remote_model.__table__
160
+ join_conditions = []
161
+ local_foreign_keys = []
162
+ remote_side_keys = []
163
+ for fk_info in fk_dicts:
164
+ local_cols = fk_info.get("constrained_columns", [])
165
+ remote_cols = fk_info.get("referred_columns", [])
166
+ if not local_cols or not remote_cols:
167
+ self.logger.warning(f"Incomplete FK definition for {related_table_name} in {self.table_name}.")
168
+ continue
169
+ local_col_name = local_cols[0]
170
+ remote_col_name = remote_cols[0]
171
+ try:
172
+ local_col = self.table.c[local_col_name]
173
+ except KeyError:
174
+ self.logger.warning(f"Local column {local_col_name} not found in {self.table_name}.")
175
+ continue
176
+ try:
177
+ remote_col = remote_table.columns[remote_col_name]
178
+ except KeyError:
179
+ self.logger.warning(f"Remote column {remote_col_name} not found in model {remote_model.__name__}.")
180
+ continue
181
+ if not local_col.foreign_keys:
182
+ self.logger.warning(f"Column {local_col_name} in {self.table_name} is not defined as a foreign key.")
183
+ continue
184
+ if remote_col.name not in remote_model.__table__.columns.keys():
185
+ self.logger.warning(f"Remote column {remote_col.name} not in table for model {remote_model.__name__}.")
186
+ continue
187
+ join_conditions.append(foreign(local_col) == remote_col)
188
+ local_foreign_keys.append(local_col)
189
+ remote_side_keys.append(remote_col)
190
+ if not join_conditions:
191
+ self.logger.warning(f"No valid join conditions for relationship from {self.table_name} to {related_table_name}.")
192
+ continue
193
+ primaryjoin_expr = join_conditions[0] if len(join_conditions) == 1 else and_(*join_conditions)
194
+ relationship_name = self.normalize_column_name(related_table_name)
195
+ if relationship_name in attrs:
196
+ continue
197
+ try:
198
+ rel = relationship(
199
+ lambda rt=related_table_name: MODEL_REGISTRY[rt],
200
+ primaryjoin=primaryjoin_expr,
201
+ foreign_keys=local_foreign_keys,
202
+ remote_side=remote_side_keys,
203
+ lazy="joined",
204
+ viewonly=True,
205
+ )
206
+ attrs[relationship_name] = rel
207
+ attrs.setdefault("__annotations__", {})[relationship_name] = List[remote_model]
208
+ self.logger.debug(f"Added relationship '{relationship_name}' referencing {related_table_name}.")
209
+ except (ArgumentError, NoForeignKeysError) as e:
210
+ self.logger.error(f"Error creating relationship '{relationship_name}' on model {self.model_name}: {e}")
211
+ continue
212
+ try:
213
+ configure_mappers()
214
+ self.logger.debug(f"Validated relationship '{relationship_name}' on model {self.model_name}.")
215
+ except Exception as e:
216
+ self.logger.error(f"Relationship '{relationship_name}' on model {self.model_name} failed configuration: {e}")
217
+ del attrs[relationship_name]
218
+ self.logger.debug(f"Removed relationship '{relationship_name}' from model {self.model_name}.")
219
+ clear_mappers()
220
+ continue
221
+
222
+ @staticmethod
223
+ def normalize_class_name(table_name: str) -> str:
224
+ return "".join(word.capitalize() for word in table_name.split("_"))
225
+
226
+ def normalize_column_name(self, column_name: Any) -> str:
227
+ try:
228
+ s = str(column_name)
229
+ except Exception as e:
230
+ self.logger.debug(f"Failed to convert column name {column_name} to string: {e}")
231
+ s = ""
232
+ norm_name = re.sub(r"\W|^(?=\d)", "_", s)
233
+ if norm_name in RESERVED_KEYWORDS:
234
+ norm_name += "_field"
235
+ return norm_name
236
+
237
+ @staticmethod
238
+ def export_models_to_file(filename: str) -> None:
239
+ reserved_attrs = {"metadata", "__tablename__", "__sqlmodel_relationships__", "__name__"}
240
+ import re
241
+ import typing
242
+
243
+ with open(filename, "w") as f:
244
+ f.write("from sqlmodel import SQLModel, Field, Relationship, Column\n")
245
+ f.write("from sqlalchemy import ForeignKey\n")
246
+ f.write("from sqlalchemy.sql.elements import DefaultClause\n")
247
+ f.write("from sqlalchemy.sql.sqltypes import INTEGER, DATE, VARCHAR, SMALLINT, FLOAT, CHAR, TEXT, DATETIME\n")
248
+ f.write("from sqlalchemy.dialects.mysql import TINYINT\n")
249
+ f.write("from typing import Any, List, Optional, Union\n")
250
+ f.write("import typing\n")
251
+ f.write("import sqlalchemy\n\n\n")
252
+
253
+ f.write("class Base(SQLModel):\n")
254
+ f.write(" class Config:\n")
255
+ f.write(" arbitrary_types_allowed = True\n\n\n")
256
+
257
+ for table_name, model in MODEL_REGISTRY.items():
258
+ f.write(f"class {model.__name__}(SQLModel, table=True):\n")
259
+ f.write(f" __tablename__ = '{table_name}'\n")
260
+ for column in model.__table__.columns:
261
+ col_repr = repr(column)
262
+ col_repr = re.sub(r", table=<[^>]+>", "", col_repr)
263
+ col_repr = re.sub(r",\s*server_default=DefaultClause\([^)]*\)", "", col_repr)
264
+ col_repr = re.sub(r",\s*display_width=\d+", "", col_repr)
265
+ f.write(f" {column.name}: Any = Field(sa_column={col_repr})\n")
266
+ annotations = typing.get_type_hints(model)
267
+ col_names = {col.name for col in model.__table__.columns}
268
+ for key, type_hint in annotations.items():
269
+ if key in col_names or key in reserved_attrs or key.startswith("__"):
270
+ continue
271
+ origin = get_origin(type_hint)
272
+ if origin in (list, List):
273
+ remote_model = get_args(type_hint)[0]
274
+ remote_model_name = remote_model.__name__
275
+ elif origin is Optional:
276
+ args = get_args(type_hint)
277
+ non_none = [arg for arg in args if arg is not type(None)]
278
+ remote_model_name = non_none[0].__name__ if non_none else "Any"
279
+ else:
280
+ remote_model_name = type_hint.__name__ if hasattr(type_hint, '__name__') else str(type_hint)
281
+ f.write(f" {key}: {type_hint} = Relationship(\"{remote_model_name}\")\n")
282
+ f.write("\n\n")
283
+ print(f"Models exported to {filename}")
@@ -0,0 +1,9 @@
1
+ from ._filter_handler import FilterHandler
2
+ from ._params_config import ParamsConfig
3
+ from ._query_config import QueryConfig
4
+
5
+ __all__ = [
6
+ "ParamsConfig",
7
+ "QueryConfig",
8
+ "FilterHandler",
9
+ ]
@@ -0,0 +1,236 @@
1
+ import datetime
2
+ import itertools
3
+ import dask.dataframe as dd
4
+ import pandas as pd
5
+ from sqlalchemy import func, cast
6
+ from sqlalchemy.sql.sqltypes import Date, Time
7
+ from sibi_dst.v2.utils import Logger
8
+ import typing
9
+
10
+
11
+ class FilterHandler:
12
+ """
13
+ Handles the application of filters to data sources with support for SQLAlchemy, SQLModel, and Dask backends.
14
+
15
+ This class abstracts the process of applying filters to various backends, specifically
16
+ SQLAlchemy/SQLModel queries and Dask DataFrames. It supports multiple filtering operations,
17
+ including exact matches, comparisons, and string-related operations such as contains and regex.
18
+ """
19
+
20
+ def __init__(self, backend, logger=None, debug=False):
21
+ """
22
+ Initialize the FilterHandler.
23
+
24
+ Args:
25
+ backend: The backend to use ('sqlalchemy', 'sqlmodel', or 'dask').
26
+ logger: Optional logger for debugging purposes.
27
+ """
28
+ self.backend = backend
29
+ self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
30
+ self.logger.set_level(Logger.DEBUG if debug else Logger.INFO)
31
+ self.backend_methods = self._get_backend_methods(backend)
32
+
33
+ def apply_filters(self, query_or_df, model=None, filters=None):
34
+ """
35
+ Apply filters to the data source based on the backend.
36
+
37
+ Args:
38
+ query_or_df: A SQLAlchemy/SQLModel query or Dask DataFrame.
39
+ model: SQLAlchemy/SQLModel model (required for SQLAlchemy/SQLModel backend).
40
+ filters: Dictionary of filters.
41
+
42
+ Returns:
43
+ Filtered query or DataFrame.
44
+ """
45
+ filters = filters or {}
46
+ for key, value in filters.items():
47
+ field_name, casting, operation = self._parse_filter_key(key)
48
+ parsed_value = self._parse_filter_value(casting, value)
49
+ # For both SQLAlchemy and SQLModel, use the same backend methods.
50
+ if self.backend in ("sqlalchemy", "sqlmodel"):
51
+ column = self.backend_methods["get_column"](field_name, model, casting)
52
+ condition = self.backend_methods["apply_operation"](column, operation, parsed_value)
53
+ query_or_df = self.backend_methods["apply_condition"](query_or_df, condition)
54
+ elif self.backend == "dask":
55
+ column = self.backend_methods["get_column"](query_or_df, field_name, casting)
56
+ condition = self.backend_methods["apply_operation"](column, operation, parsed_value)
57
+ query_or_df = self.backend_methods["apply_condition"](query_or_df, condition)
58
+ else:
59
+ raise ValueError(f"Unsupported backend: {self.backend}")
60
+
61
+ return query_or_df
62
+
63
+ @staticmethod
64
+ def _parse_filter_key(key):
65
+ parts = key.split("__")
66
+ field_name = parts[0]
67
+ casting = None
68
+ operation = "exact"
69
+
70
+ if len(parts) == 3:
71
+ _, casting, operation = parts
72
+ elif len(parts) == 2:
73
+ if parts[1] in FilterHandler._comparison_operators():
74
+ operation = parts[1]
75
+ elif parts[1] in FilterHandler._dt_operators() + FilterHandler._date_operators():
76
+ casting = parts[1]
77
+
78
+ return field_name, casting, operation
79
+
80
+ def _parse_filter_value(self, casting, value):
81
+ """
82
+ Convert filter value to an appropriate type based on the casting (e.g., date).
83
+ """
84
+ if casting == "date":
85
+ if isinstance(value, str):
86
+ return pd.Timestamp(value) # Convert to datetime64[ns]
87
+ if isinstance(value, list):
88
+ return [pd.Timestamp(v) for v in value]
89
+ elif casting == "time" and isinstance(value, str):
90
+ parsed = datetime.time.fromisoformat(value)
91
+ self.logger.debug(f"Parsed value (time): {parsed}")
92
+ return parsed
93
+ return value
94
+
95
+ @staticmethod
96
+ def _get_backend_methods(backend):
97
+ if backend in ("sqlalchemy", "sqlmodel"):
98
+ return {
99
+ "get_column": FilterHandler._get_sqlalchemy_column,
100
+ "apply_operation": FilterHandler._apply_operation_sqlalchemy,
101
+ "apply_condition": lambda query, condition: query.filter(condition),
102
+ }
103
+ elif backend == "dask":
104
+ return {
105
+ "get_column": FilterHandler._get_dask_column,
106
+ "apply_operation": FilterHandler._apply_operation_dask,
107
+ "apply_condition": lambda df, condition: df[condition],
108
+ }
109
+ else:
110
+ raise ValueError(f"Unsupported backend: {backend}")
111
+
112
+ @staticmethod
113
+ def _get_sqlalchemy_column(field_name, model, casting):
114
+ """
115
+ Retrieve and cast a column for SQLAlchemy/SQLModel based on the field name and casting.
116
+
117
+ Args:
118
+ field_name: The name of the field/column.
119
+ model: The SQLAlchemy/SQLModel model.
120
+ casting: The casting type ('date', 'time', etc.).
121
+
122
+ Returns:
123
+ The SQLAlchemy column object, optionally cast or transformed.
124
+ """
125
+ column = getattr(model, field_name, None)
126
+ if not column:
127
+ raise AttributeError(f"Field '{field_name}' not found in model '{model.__name__}'")
128
+ if casting == "date":
129
+ column = cast(column, Date)
130
+ elif casting == "time":
131
+ column = cast(column, Time)
132
+ elif casting in FilterHandler._date_operators():
133
+ column = func.extract(casting, column)
134
+ return column
135
+
136
+ @staticmethod
137
+ def _get_dask_column(df, field_name, casting):
138
+ """
139
+ Retrieve and optionally cast a column for Dask based on the field name and casting.
140
+
141
+ Args:
142
+ df: The Dask DataFrame.
143
+ field_name: The name of the field/column.
144
+ casting: The casting type ('date', 'time', etc.).
145
+
146
+ Returns:
147
+ The Dask Series, optionally cast or transformed.
148
+ """
149
+ column = dd.to_datetime(df[field_name], errors="coerce") if casting in FilterHandler._dt_operators() else df[field_name]
150
+ if casting == "date":
151
+ column = column.dt.floor("D")
152
+ elif casting in FilterHandler._date_operators():
153
+ column = getattr(column.dt, casting)
154
+ return column
155
+
156
+ @staticmethod
157
+ def _apply_operation_sqlalchemy(column, operation, value):
158
+ operation_map = FilterHandler._operation_map_sqlalchemy()
159
+ if operation not in operation_map:
160
+ raise ValueError(f"Unsupported operation: {operation}")
161
+ return operation_map[operation](column, value)
162
+
163
+ @staticmethod
164
+ def _apply_operation_dask(column, operation, value):
165
+ operation_map = FilterHandler._operation_map_dask()
166
+ if operation not in operation_map:
167
+ raise ValueError(f"Unsupported operation: {operation}")
168
+ return operation_map[operation](column, value)
169
+
170
+ @staticmethod
171
+ def _operation_map_sqlalchemy():
172
+ return {
173
+ "exact": lambda col, val: col == val,
174
+ "gt": lambda col, val: col > val,
175
+ "gte": lambda col, val: col >= val,
176
+ "lt": lambda col, val: col < val,
177
+ "lte": lambda col, val: col <= val,
178
+ "in": lambda col, val: col.in_(val),
179
+ "range": lambda col, val: col.between(val[0], val[1]),
180
+ "contains": lambda col, val: col.like(f"%{val}%"),
181
+ "startswith": lambda col, val: col.like(f"{val}%"),
182
+ "endswith": lambda col, val: col.like(f"%{val}"),
183
+ "isnull": lambda col, val: col.is_(None) if val else col.isnot(None),
184
+ "not_exact": lambda col, val: col != val,
185
+ "not_contains": lambda col, val: ~col.like(f"%{val}%"),
186
+ "not_in": lambda col, val: ~col.in_(val),
187
+ "regex": lambda col, val: col.op("~")(val),
188
+ "icontains": lambda col, val: col.ilike(f"%{val}%"),
189
+ "istartswith": lambda col, val: col.ilike(f"{val}%"),
190
+ "iendswith": lambda col, val: col.ilike(f"%{val}"),
191
+ "iexact": lambda col, val: col.ilike(val),
192
+ "iregex": lambda col, val: col.op("~*")(val),
193
+ }
194
+
195
+ @staticmethod
196
+ def _operation_map_dask():
197
+ return {
198
+ "exact": lambda col, val: col == val,
199
+ "gt": lambda col, val: col > val,
200
+ "gte": lambda col, val: col >= val,
201
+ "lt": lambda col, val: col < val,
202
+ "lte": lambda col, val: col <= val,
203
+ "in": lambda col, val: col.isin(val),
204
+ "range": lambda col, val: (col >= val[0]) & (col <= val[1]),
205
+ "contains": lambda col, val: col.str.contains(val, regex=True),
206
+ "startswith": lambda col, val: col.str.startswith(val),
207
+ "endswith": lambda col, val: col.str.endswith(val),
208
+ "isnull": lambda col, val: col.isnull() if val else col.notnull(),
209
+ "not_exact": lambda col, val: col != val,
210
+ "not_contains": lambda col, val: ~col.str.contains(val, regex=True),
211
+ "not_in": lambda col, val: ~col.isin(val),
212
+ "regex": lambda col, val: col.str.contains(val, regex=True),
213
+ "icontains": lambda col, val: col.str.contains(val, case=False, regex=True),
214
+ "istartswith": lambda col, val: col.str.startswith(val, case=False),
215
+ "iendswith": lambda col, val: col.str.endswith(val, case=False),
216
+ "iexact": lambda col, val: col.str.contains(f"^{val}$", case=False, regex=True),
217
+ "iregex": lambda col, val: col.str.contains(val, case=False, regex=True),
218
+ }
219
+
220
+ @staticmethod
221
+ def _dt_operators():
222
+ return ["date", "time"]
223
+
224
+ @staticmethod
225
+ def _date_operators():
226
+ return ["year", "month", "day", "hour", "minute", "second", "week_day"]
227
+
228
+ @staticmethod
229
+ def _comparison_operators():
230
+ return [
231
+ "gte", "lte", "gt", "lt", "exact", "in", "range",
232
+ "contains", "startswith", "endswith", "isnull",
233
+ "not_exact", "not_contains", "not_in",
234
+ "regex", "icontains", "istartswith", "iendswith",
235
+ "iexact", "iregex"
236
+ ]
@@ -0,0 +1,139 @@
1
+ from typing import Optional, Dict, Union, List
2
+
3
+ from pydantic import BaseModel, model_validator, Field
4
+
5
+ dataframe_params: Dict[str, Union[None, str, bool, int, None]] = {
6
+ "fieldnames": None,
7
+ "index_col": None,
8
+ "coerce_float": False,
9
+ "verbose": True,
10
+ "datetime_index": False,
11
+ "column_names": None,
12
+ "chunk_size": 1000,
13
+ }
14
+ # dataframe_options is a dictionary that provides additional options for modifying a pandas DataFrame.
15
+ # These options include parameters for handling duplicate values, sorting, grouping, and other DataFrame operations.
16
+
17
+ dataframe_options: Dict[str, Union[bool, str, int, None]] = {
18
+ "debug": False, # Whether to print debug information
19
+ "duplicate_expr": None, # Expression for identifying duplicate values
20
+ "duplicate_keep": 'last', # How to handle duplicate values ('first', 'last', or False)
21
+ "sort_field": None, # Field to use for sorting the DataFrame
22
+ "group_by_expr": None, # Expression for grouping the DataFrame
23
+ "group_expr": None # Expression for aggregating functions to the grouped DataFrame
24
+ }
25
+
26
+ LOOKUP_SEP = "__"
27
+
28
+
29
+ class ParamsConfig(BaseModel):
30
+ """
31
+ Defines a configuration model for parameters with functionality for parsing,
32
+ validation, and conversion of legacy filters.
33
+
34
+ This class extends BaseModel from Pydantic and is designed to handle multiple
35
+ sets of configurations, including field mappings, filters, dataframe parameters,
36
+ and dataframe options. It allows for flexible parsing of parameters across a
37
+ variety of supported structures and ensures that legacy filters can be
38
+ appropriately converted for compatibility.
39
+
40
+ :ivar field_map: Maps field names to their equivalent legacy field names.
41
+ :type field_map: Optional[Dict]
42
+ :ivar legacy_filters: Indicates whether legacy filters should be processed.
43
+ :type legacy_filters: bool
44
+ :ivar sticky_filters: Stores additional filters as key-value pairs that persist
45
+ across parameter parsing.
46
+ :type sticky_filters: Dict[str, Union[str, bool, int, float, list, tuple]]
47
+ :ivar filters: Holds all the current filters including sticky and dynamically
48
+ parsed filters.
49
+ :type filters: Dict[str, Union[str, Dict, bool, int, float, list, tuple]]
50
+ :ivar df_params: Contains parameters related to dataframe configurations in a
51
+ structured format.
52
+ :type df_params: Dict[str, Union[tuple, str, bool, None]]
53
+ :ivar df_options: Stores optional configurations for a dataframe, allowing for
54
+ additional behavior customization.
55
+ :type df_options: Dict[str, Union[bool, str, None]]
56
+ :ivar params: Dictionary of parameters provided for configuration, supporting
57
+ both basic and nested structures.
58
+ :type params: Dict[str, Union[str, bool, int, float, List[Union[str, int, bool, float]]]]
59
+ """
60
+ field_map: Optional[Dict] = Field(default_factory=dict)
61
+ legacy_filters: bool = False
62
+ sticky_filters: Dict[str, Union[str, bool, int, float, list, tuple]] = Field(default_factory=dict)
63
+ filters: Dict[str, Union[str, Dict, bool, int, float, list, tuple]] = Field(default_factory=dict)
64
+ df_params: Dict[str, Union[tuple, str, bool, None]] = Field(default_factory=dict)
65
+ df_options: Dict[str, Union[bool, str, None]] = Field(default_factory=dict)
66
+ params: Dict[str, Union[str, bool, int, float, List[Union[str, int, bool, float]]]] = Field(default_factory=dict)
67
+
68
+ @model_validator(mode='after')
69
+ def check_params(self):
70
+ if self.params is not None:
71
+ self.parse_params(self.params)
72
+ return self
73
+
74
+ def parse_params(self, params):
75
+ """
76
+ Parses and separates the given parameters into specific categories such as dataframe parameters,
77
+ dataframe options, and filters. Updates existing class attributes with the parsed values,
78
+ retaining any sticky filters. Also handles the legacy filters if provided.
79
+
80
+ :param params: Dictionary containing parameters to process. These parameters can include specific
81
+ keys relevant for dataframe configuration (e.g., dataframe parameters, dataframe options)
82
+ as well as arbitrary filter settings.
83
+ :type params: dict
84
+ :return: None
85
+ """
86
+ self.legacy_filters = params.pop('legacy_filters', self.legacy_filters)
87
+ self.field_map = params.pop('field_map', self.field_map)
88
+ self.sticky_filters = params.pop('params', self.sticky_filters)
89
+ df_params, df_options, filters = {}, {}, {}
90
+ for k, v in params.items():
91
+ if k in dataframe_params.keys():
92
+ df_params.update({k: v})
93
+ elif k in dataframe_options.keys():
94
+ df_options.update({k: v})
95
+ else:
96
+ filters.update({k: v})
97
+ self.filters = {**self.sticky_filters, **filters}
98
+ self.df_params = {**self.df_params, **df_params}
99
+ self.df_options = {**self.df_options, **df_options}
100
+ if self.legacy_filters:
101
+ self.convert_legacy_filters()
102
+
103
+ def convert_legacy_filters(self):
104
+ """
105
+ Converts legacy filter fields in the `self.filters` dictionary to their
106
+ modern equivalents using the mappings provided in `self.field_map`.
107
+ This method ensures backward compatibility for filters by automatically
108
+ translating the old field names into the current system.
109
+
110
+ The function first verifies that the required dictionaries (`legacy_filters`,
111
+ `field_map`, `filters`) are valid. It creates a reverse map of `field_map` for
112
+ efficient lookup, processes the key names within `self.filters`, and updates
113
+ them to reflect the legacy mapping.
114
+
115
+ :raises KeyError: If any required dictionary key is missing during processing.
116
+
117
+ :param self.legacy_filters: A boolean flag indicating whether legacy filters
118
+ are being used.
119
+ :type self.legacy_filters: bool
120
+
121
+ """
122
+ if not self.legacy_filters or not self.field_map or not self.filters:
123
+ return
124
+ # create a reverse map of the field_map
125
+ reverse_map = {v: k for k, v in self.field_map.items()}
126
+
127
+ new_filters = {}
128
+ for filter_field, value in self.filters.items():
129
+ # split the filter_field if LOOKUP_SEP exists
130
+ parts = filter_field.split(LOOKUP_SEP, 1)
131
+
132
+ # replace each part with its legacy equivalent if it exists
133
+ new_parts = [reverse_map.get(part, part) for part in parts]
134
+
135
+ # join the parts back together and add to the new filters
136
+ new_filter_field = LOOKUP_SEP.join(new_parts)
137
+ new_filters[new_filter_field] = value
138
+
139
+ self.filters = new_filters