sibi-dst 0.3.25__tar.gz → 0.3.27__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/PKG-INFO +2 -1
  2. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/pyproject.toml +2 -1
  3. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/core/_filter_handler.py +21 -0
  4. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/core/_params_config.py +2 -2
  5. sibi_dst-0.3.27/sibi_dst/utils/__init__.py +31 -0
  6. sibi_dst-0.3.25/sibi_dst/utils/_clickhouse_writer.py → sibi_dst-0.3.27/sibi_dst/utils/clickhouse_writer.py +1 -1
  7. sibi_dst-0.3.25/sibi_dst/utils/_data_utils.py → sibi_dst-0.3.27/sibi_dst/utils/data_utils.py +1 -1
  8. sibi_dst-0.3.25/sibi_dst/utils/_data_wrapper.py → sibi_dst-0.3.27/sibi_dst/utils/data_wrapper.py +0 -26
  9. sibi_dst-0.3.25/sibi_dst/utils/_df_utils.py → sibi_dst-0.3.27/sibi_dst/utils/df_utils.py +1 -1
  10. sibi_dst-0.3.25/sibi_dst/utils/__init__.py +0 -31
  11. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/README.md +0 -0
  12. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/__init__.py +0 -0
  13. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/__init__.py +0 -0
  14. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/_df_helper.py +0 -0
  15. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/_parquet_artifact.py +0 -0
  16. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/_parquet_reader.py +0 -0
  17. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/backends/__init__.py +0 -0
  18. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/backends/django/__init__.py +0 -0
  19. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/backends/django/_db_connection.py +0 -0
  20. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/backends/django/_io_dask.py +0 -0
  21. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/backends/django/_load_from_db.py +0 -0
  22. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/backends/django/_sql_model_builder.py +0 -0
  23. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/backends/http/__init__.py +0 -0
  24. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/backends/http/_http_config.py +0 -0
  25. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/backends/parquet/__init__.py +0 -0
  26. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/backends/parquet/_filter_handler.py +0 -0
  27. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/backends/parquet/_parquet_options.py +0 -0
  28. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/backends/sqlalchemy/__init__.py +0 -0
  29. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
  30. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/backends/sqlalchemy/_filter_handler.py +0 -0
  31. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
  32. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
  33. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +0 -0
  34. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/core/__init__.py +0 -0
  35. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/core/_defaults.py +0 -0
  36. {sibi_dst-0.3.25 → sibi_dst-0.3.27}/sibi_dst/df_helper/core/_query_config.py +0 -0
  37. /sibi_dst-0.3.25/sibi_dst/utils/_airflow_manager.py → /sibi_dst-0.3.27/sibi_dst/utils/airflow_manager.py +0 -0
  38. /sibi_dst-0.3.25/sibi_dst/utils/_credentials.py → /sibi_dst-0.3.27/sibi_dst/utils/credentials.py +0 -0
  39. /sibi_dst-0.3.25/sibi_dst/utils/_date_utils.py → /sibi_dst-0.3.27/sibi_dst/utils/date_utils.py +0 -0
  40. /sibi_dst-0.3.25/sibi_dst/utils/_file_utils.py → /sibi_dst-0.3.27/sibi_dst/utils/file_utils.py +0 -0
  41. /sibi_dst-0.3.25/sibi_dst/utils/_filepath_generator.py → /sibi_dst-0.3.27/sibi_dst/utils/filepath_generator.py +0 -0
  42. /sibi_dst-0.3.25/sibi_dst/utils/_log_utils.py → /sibi_dst-0.3.27/sibi_dst/utils/log_utils.py +0 -0
  43. /sibi_dst-0.3.25/sibi_dst/utils/_parquet_saver.py → /sibi_dst-0.3.27/sibi_dst/utils/parquet_saver.py +0 -0
  44. /sibi_dst-0.3.25/sibi_dst/utils/_storage_manager.py → /sibi_dst-0.3.27/sibi_dst/utils/storage_manager.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sibi-dst
3
- Version: 0.3.25
3
+ Version: 0.3.27
4
4
  Summary: Data Science Toolkit
5
5
  Author: Luis Valverde
6
6
  Author-email: lvalverdeb@gmail.com
@@ -32,6 +32,7 @@ Requires-Dist: pymysql (>=1.1.1,<2.0.0)
32
32
  Requires-Dist: pytest (>=8.3.3,<9.0.0)
33
33
  Requires-Dist: pytest-mock (>=3.14.0,<4.0.0)
34
34
  Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
35
+ Requires-Dist: s3fs (>=2024.12.0,<2025.0.0)
35
36
  Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0)
36
37
  Requires-Dist: tornado (>=6.4.1,<7.0.0)
37
38
  Requires-Dist: tqdm (>=4.67.0,<5.0.0)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "sibi-dst"
3
- version = "0.3.25"
3
+ version = "0.3.27"
4
4
  description = "Data Science Toolkit"
5
5
  authors = ["Luis Valverde <lvalverdeb@gmail.com>"]
6
6
  readme = "README.md"
@@ -36,6 +36,7 @@ dask-expr = "^1.1.20"
36
36
  psycopg2 = "^2.9.10"
37
37
  uvicorn = "^0.34.0"
38
38
  pytest-mock = "^3.14.0"
39
+ s3fs = "^2024.12.0"
39
40
 
40
41
 
41
42
  [build-system]
@@ -185,6 +185,15 @@ class FilterHandler:
185
185
  "startswith": lambda col, val: col.like(f"{val}%"),
186
186
  "endswith": lambda col, val: col.like(f"%{val}"),
187
187
  "isnull": lambda col, val: col.is_(None) if val else col.isnot(None),
188
+ "not_exact": lambda col, val: col != val,
189
+ "not_contains": lambda col, val: ~col.like(f"%{val}%"),
190
+ "not_in": lambda col, val: ~col.in_(val), # Custom operation
191
+ "regex": lambda col, val: col.op("~")(val), # Custom operation
192
+ "icontains": lambda col, val: col.ilike(f"%{val}%"), # Custom operation
193
+ "istartswith": lambda col, val: col.ilike(f"{val}%"), # Custom operation
194
+ "iendswith": lambda col, val: col.ilike(f"%{val}"), # Custom operation
195
+ "iexact": lambda col, val: col.ilike(val), # Added iexact
196
+ "iregex": lambda col, val: col.op("~*")(val), # Added iregex
188
197
  }
189
198
 
190
199
  @staticmethod
@@ -201,6 +210,15 @@ class FilterHandler:
201
210
  "startswith": lambda col, val: col.str.startswith(val),
202
211
  "endswith": lambda col, val: col.str.endswith(val),
203
212
  "isnull": lambda col, val: col.isnull() if val else col.notnull(),
213
+ "not_exact": lambda col, val: col != val,
214
+ "not_contains": lambda col, val: ~col.str.contains(val, regex=True),
215
+ "not_in": lambda col, val: ~col.isin(val), # Custom operation
216
+ "regex": lambda col, val: col.str.contains(val, regex=True), # Custom operation
217
+ "icontains": lambda col, val: col.str.contains(val, case=False, regex=True), # Custom operation
218
+ "istartswith": lambda col, val: col.str.startswith(val, case=False), # Custom operation
219
+ "iendswith": lambda col, val: col.str.endswith(val, case=False), # Custom operation
220
+ "iexact": lambda col, val: col.str.contains(f"^{val}$", case=False, regex=True), # Added iexact
221
+ "iregex": lambda col, val: col.str.contains(val, case=False, regex=True), # Added iregex
204
222
  }
205
223
 
206
224
  @staticmethod
@@ -216,4 +234,7 @@ class FilterHandler:
216
234
  return [
217
235
  "gte", "lte", "gt", "lt", "exact", "in", "range",
218
236
  "contains", "startswith", "endswith", "isnull",
237
+ "not_exact", "not_contains", "not_in",
238
+ "regex", "icontains", "istartswith", "iendswith",
239
+ "iexact", "iregex"
219
240
  ]
@@ -29,8 +29,8 @@ LOOKUP_SEP = "__"
29
29
  class ParamsConfig(BaseModel):
30
30
  field_map: Optional[Dict] = Field(default_factory=dict)
31
31
  legacy_filters: bool = False
32
- sticky_filters: Dict[str, Union[str, bool, int, float]] = Field(default_factory=dict)
33
- filters: Dict[str, Union[str, Dict, bool, int, float]] = Field(default_factory=dict)
32
+ sticky_filters: Dict[str, Union[str, bool, int, float, list, tuple]] = Field(default_factory=dict)
33
+ filters: Dict[str, Union[str, Dict, bool, int, float, list, tuple]] = Field(default_factory=dict)
34
34
  df_params: Dict[str, Union[tuple, str, bool, None]] = Field(default_factory=dict)
35
35
  df_options: Dict[str, Union[bool, str, None]] = Field(default_factory=dict)
36
36
  params: Dict[str, Union[str, bool, int, float, List[Union[str, int, bool, float]]]] = Field(default_factory=dict)
@@ -0,0 +1,31 @@
1
+ from __future__ import annotations
2
+
3
+ from .log_utils import Logger
4
+ from .date_utils import *
5
+ from .data_utils import DataUtils
6
+ from .file_utils import FileUtils
7
+ from .filepath_generator import FilePathGenerator
8
+ from .df_utils import DfUtils
9
+ from .storage_manager import StorageManager
10
+ from .parquet_saver import ParquetSaver
11
+ from .clickhouse_writer import ClickHouseWriter
12
+ from .airflow_manager import AirflowDAGManager
13
+ from .credentials import *
14
+ from .data_wrapper import DataWrapper
15
+
16
+ __all__ = [
17
+ "Logger",
18
+ "ConfigManager",
19
+ "ConfigLoader",
20
+ "DateUtils",
21
+ "BusinessDays",
22
+ "FileUtils",
23
+ "DataWrapper",
24
+ "DataUtils",
25
+ "FilePathGenerator",
26
+ "ParquetSaver",
27
+ "StorageManager",
28
+ "DfUtils",
29
+ "ClickHouseWriter",
30
+ "AirflowDAGManager",
31
+ ]
@@ -125,7 +125,7 @@ class ClickHouseWriter:
125
125
  """
126
126
  Writes the Dask DataFrame to a ClickHouse table partition by partition.
127
127
  """
128
- if len(self.df.head().index) == 0:
128
+ if len(self.df.index) == 0:
129
129
  self.logger.debug("No data found. Nothing written.")
130
130
  return
131
131
 
@@ -95,7 +95,7 @@ class DataUtils:
95
95
  # Get unique IDs from source column
96
96
  ids = df[source_col].dropna().unique()
97
97
  # Compute if it's a Dask Series
98
- if isinstance(ids, dd.core.Series):
98
+ if isinstance(ids, dd.Series):
99
99
  ids = ids.compute()
100
100
 
101
101
  # Check if any IDs are found
@@ -126,32 +126,6 @@ class DataWrapper:
126
126
  futures[new_future] = priority
127
127
  self.logger.info(f"Resubmitted task for priority {priority} after timeout.")
128
128
 
129
- # def process(self):
130
- # """Execute the update plan following the specified hierarchy."""
131
- # update_plan_table = self.generate_update_plan_with_conditions()
132
- #
133
- # # Display the update plan table to the user if show_progress is True
134
- # if self.show_progress:
135
- # display(update_plan_table)
136
- #
137
- # # Process files according to the hierarchy, considering only `update_required` dates
138
- # for category, description in [
139
- # ("overwrite", "Processing files due to overwrite=True"),
140
- # ("history_days", "Processing files within history_days_threshold"),
141
- # ("missing_files", "Processing missing files")
142
- # ]:
143
- # # Filter dates in the category where `update_required` is True
144
- # dates_to_process = update_plan_table[
145
- # (update_plan_table["update_category"] == category) & (update_plan_table["update_required"])
146
- # ]["date"].tolist()
147
- #
148
- # date_iterator = dates_to_process
149
- # if self.show_progress:
150
- # date_iterator = tqdm(date_iterator, desc=f"{description}:{self.dataclass.__name__}", unit="date")
151
- #
152
- # for current_date in date_iterator:
153
- # self.process_date(current_date)
154
-
155
129
  def is_file_older_than(self, file_path: str) -> bool:
156
130
  """
157
131
  Check if a file is older than the specified max_age_minutes.
@@ -1,7 +1,7 @@
1
1
  import dask.dataframe as dd
2
2
  import pandas as pd
3
3
 
4
- from ._log_utils import Logger
4
+ from .log_utils import Logger
5
5
 
6
6
 
7
7
  class DfUtils:
@@ -1,31 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from ._log_utils import Logger
4
- from ._date_utils import *
5
- from ._data_utils import DataUtils
6
- from ._file_utils import FileUtils
7
- from ._filepath_generator import FilePathGenerator
8
- from ._df_utils import DfUtils
9
- from ._storage_manager import StorageManager
10
- from ._parquet_saver import ParquetSaver
11
- from ._clickhouse_writer import ClickHouseWriter
12
- from ._airflow_manager import AirflowDAGManager
13
- from ._credentials import *
14
- from ._data_wrapper import DataWrapper
15
-
16
- __all__ = [
17
- "Logger",
18
- "ConfigManager",
19
- "ConfigLoader",
20
- "DateUtils",
21
- "BusinessDays",
22
- "FileUtils",
23
- "DataWrapper",
24
- "DataUtils",
25
- "FilePathGenerator",
26
- "ParquetSaver",
27
- "StorageManager",
28
- "DfUtils",
29
- "ClickHouseWriter",
30
- "AirflowDAGManager",
31
- ]
File without changes