sibi-dst 0.3.26__py3-none-any.whl → 0.3.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -185,6 +185,15 @@ class FilterHandler:
185
185
  "startswith": lambda col, val: col.like(f"{val}%"),
186
186
  "endswith": lambda col, val: col.like(f"%{val}"),
187
187
  "isnull": lambda col, val: col.is_(None) if val else col.isnot(None),
188
+ "not_exact": lambda col, val: col != val,
189
+ "not_contains": lambda col, val: ~col.like(f"%{val}%"),
190
+ "not_in": lambda col, val: ~col.in_(val), # Custom operation
191
+ "regex": lambda col, val: col.op("~")(val), # Custom operation
192
+ "icontains": lambda col, val: col.ilike(f"%{val}%"), # Custom operation
193
+ "istartswith": lambda col, val: col.ilike(f"{val}%"), # Custom operation
194
+ "iendswith": lambda col, val: col.ilike(f"%{val}"), # Custom operation
195
+ "iexact": lambda col, val: col.ilike(val), # Added iexact
196
+ "iregex": lambda col, val: col.op("~*")(val), # Added iregex
188
197
  }
189
198
 
190
199
  @staticmethod
@@ -201,6 +210,15 @@ class FilterHandler:
201
210
  "startswith": lambda col, val: col.str.startswith(val),
202
211
  "endswith": lambda col, val: col.str.endswith(val),
203
212
  "isnull": lambda col, val: col.isnull() if val else col.notnull(),
213
+ "not_exact": lambda col, val: col != val,
214
+ "not_contains": lambda col, val: ~col.str.contains(val, regex=True),
215
+ "not_in": lambda col, val: ~col.isin(val), # Custom operation
216
+ "regex": lambda col, val: col.str.contains(val, regex=True), # Custom operation
217
+ "icontains": lambda col, val: col.str.contains(val, case=False, regex=True), # Custom operation
218
+ "istartswith": lambda col, val: col.str.startswith(val, case=False), # Custom operation
219
+ "iendswith": lambda col, val: col.str.endswith(val, case=False), # Custom operation
220
+ "iexact": lambda col, val: col.str.contains(f"^{val}$", case=False, regex=True), # Added iexact
221
+ "iregex": lambda col, val: col.str.contains(val, case=False, regex=True), # Added iregex
204
222
  }
205
223
 
206
224
  @staticmethod
@@ -216,4 +234,7 @@ class FilterHandler:
216
234
  return [
217
235
  "gte", "lte", "gt", "lt", "exact", "in", "range",
218
236
  "contains", "startswith", "endswith", "isnull",
237
+ "not_exact", "not_contains", "not_in",
238
+ "regex", "icontains", "istartswith", "iendswith",
239
+ "iexact", "iregex"
219
240
  ]
@@ -29,8 +29,8 @@ LOOKUP_SEP = "__"
29
29
  class ParamsConfig(BaseModel):
30
30
  field_map: Optional[Dict] = Field(default_factory=dict)
31
31
  legacy_filters: bool = False
32
- sticky_filters: Dict[str, Union[str, bool, int, float]] = Field(default_factory=dict)
33
- filters: Dict[str, Union[str, Dict, bool, int, float]] = Field(default_factory=dict)
32
+ sticky_filters: Dict[str, Union[str, bool, int, float, list, tuple]] = Field(default_factory=dict)
33
+ filters: Dict[str, Union[str, Dict, bool, int, float, list, tuple]] = Field(default_factory=dict)
34
34
  df_params: Dict[str, Union[tuple, str, bool, None]] = Field(default_factory=dict)
35
35
  df_options: Dict[str, Union[bool, str, None]] = Field(default_factory=dict)
36
36
  params: Dict[str, Union[str, bool, int, float, List[Union[str, int, bool, float]]]] = Field(default_factory=dict)
@@ -125,7 +125,7 @@ class ClickHouseWriter:
125
125
  """
126
126
  Writes the Dask DataFrame to a ClickHouse table partition by partition.
127
127
  """
128
- if len(self.df.head().index) == 0:
128
+ if len(self.df.index) == 0:
129
129
  self.logger.debug("No data found. Nothing written.")
130
130
  return
131
131
 
@@ -95,7 +95,7 @@ class DataUtils:
95
95
  # Get unique IDs from source column
96
96
  ids = df[source_col].dropna().unique()
97
97
  # Compute if it's a Dask Series
98
- if isinstance(ids, dd.core.Series):
98
+ if isinstance(ids, dd.Series):
99
99
  ids = ids.compute()
100
100
 
101
101
  # Check if any IDs are found
@@ -126,32 +126,6 @@ class DataWrapper:
126
126
  futures[new_future] = priority
127
127
  self.logger.info(f"Resubmitted task for priority {priority} after timeout.")
128
128
 
129
- # def process(self):
130
- # """Execute the update plan following the specified hierarchy."""
131
- # update_plan_table = self.generate_update_plan_with_conditions()
132
- #
133
- # # Display the update plan table to the user if show_progress is True
134
- # if self.show_progress:
135
- # display(update_plan_table)
136
- #
137
- # # Process files according to the hierarchy, considering only `update_required` dates
138
- # for category, description in [
139
- # ("overwrite", "Processing files due to overwrite=True"),
140
- # ("history_days", "Processing files within history_days_threshold"),
141
- # ("missing_files", "Processing missing files")
142
- # ]:
143
- # # Filter dates in the category where `update_required` is True
144
- # dates_to_process = update_plan_table[
145
- # (update_plan_table["update_category"] == category) & (update_plan_table["update_required"])
146
- # ]["date"].tolist()
147
- #
148
- # date_iterator = dates_to_process
149
- # if self.show_progress:
150
- # date_iterator = tqdm(date_iterator, desc=f"{description}:{self.dataclass.__name__}", unit="date")
151
- #
152
- # for current_date in date_iterator:
153
- # self.process_date(current_date)
154
-
155
129
  def is_file_older_than(self, file_path: str) -> bool:
156
130
  """
157
131
  Check if a file is older than the specified max_age_minutes.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sibi-dst
3
- Version: 0.3.26
3
+ Version: 0.3.27
4
4
  Summary: Data Science Toolkit
5
5
  Author: Luis Valverde
6
6
  Author-email: lvalverdeb@gmail.com
@@ -32,6 +32,7 @@ Requires-Dist: pymysql (>=1.1.1,<2.0.0)
32
32
  Requires-Dist: pytest (>=8.3.3,<9.0.0)
33
33
  Requires-Dist: pytest-mock (>=3.14.0,<4.0.0)
34
34
  Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
35
+ Requires-Dist: s3fs (>=2024.12.0,<2025.0.0)
35
36
  Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0)
36
37
  Requires-Dist: tornado (>=6.4.1,<7.0.0)
37
38
  Requires-Dist: tqdm (>=4.67.0,<5.0.0)
@@ -22,15 +22,15 @@ sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py,sha256=ML-m_WeTR1_UMgiDR
22
22
  sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py,sha256=Bmhh6VvmBfNfBA2JpuEdsYD_193yJ768Si2TvkY9HmU,4405
23
23
  sibi_dst/df_helper/core/__init__.py,sha256=o4zDwgVmaijde3oix0ezb6KLxI5QFy-SGUhFTDVFLT4,569
24
24
  sibi_dst/df_helper/core/_defaults.py,sha256=eNpHD2sZxir-2xO0b3_V16ryw8YP_5FfpIKK0HNuiN4,7011
25
- sibi_dst/df_helper/core/_filter_handler.py,sha256=1-IdviSYi5Hc28KckO4dkYHDfQ8X9SUb6kwfobm16_E,8580
26
- sibi_dst/df_helper/core/_params_config.py,sha256=mM1CnF29zls5LXx7rpKY8uix_GyOG5smO4ry_OX31IU,3477
25
+ sibi_dst/df_helper/core/_filter_handler.py,sha256=g9FMcB_koT724ggcWt98jow2XgUnmupK_fNhF95W5bQ,10217
26
+ sibi_dst/df_helper/core/_params_config.py,sha256=Og3GYth0GVWpcOYWZWRy7CZ5PDsg63Nmqo-W7TUrA_0,3503
27
27
  sibi_dst/df_helper/core/_query_config.py,sha256=Y8LVSyaKuVkrPluRDkQoOwuXHQxner1pFWG3HPfnDHM,441
28
28
  sibi_dst/utils/__init__.py,sha256=z51o5sjIo_gTjnDXk5SBniCxWJIrDBMS7df0dTs8VMk,775
29
29
  sibi_dst/utils/airflow_manager.py,sha256=-d44EKUZNYJyp4wuNwRvilRQktunArPOB5fZuWdQv10,7526
30
- sibi_dst/utils/clickhouse_writer.py,sha256=dL5pixjn4cj0Rwpc3POfCcY2D-aQCMbPSECX0dKATyE,8587
30
+ sibi_dst/utils/clickhouse_writer.py,sha256=xUhFDOuZt0eZDpVJNuLb7pfTHUV06NCYrNUx_a7qrSM,8580
31
31
  sibi_dst/utils/credentials.py,sha256=cHJPPsmVyijqbUQIq7WWPe-lIallA-mI5RAy3YUuRME,1724
32
- sibi_dst/utils/data_utils.py,sha256=ch4j5FEs8ZnniUzpbeLO-b4Yco_6nwCu71xHaVqMGi4,7050
33
- sibi_dst/utils/data_wrapper.py,sha256=4U0sKVXK7qDTObhufO19jxTzJa6ohs2VOh3WAhhzLCU,11982
32
+ sibi_dst/utils/data_utils.py,sha256=Kv87Br78EXlH_MSVzRspqLwrf6sqHIRQc0t3LDI0dSM,7045
33
+ sibi_dst/utils/data_wrapper.py,sha256=DTK4hd_GUUi5lxcbEbMraKwzpmPh2IwX6WNWA4t-vx0,10693
34
34
  sibi_dst/utils/date_utils.py,sha256=CMAZBNwVj7cvERcNiTA8Pf7_5EjV9By9yxkYJpkqz1g,10656
35
35
  sibi_dst/utils/df_utils.py,sha256=OFEtcwVKIilvf9qVf-IfIOHp4jcFAHX5l2IDGudhPZg,10989
36
36
  sibi_dst/utils/file_utils.py,sha256=JpsybYj3XvVJisSBeVU6YSaZnYRm4_6YWTI3TLnnY4Y,1257
@@ -38,6 +38,6 @@ sibi_dst/utils/filepath_generator.py,sha256=hjI7gQwfwRToPeuzoUQDayHKQrr4Ivhi4Chl
38
38
  sibi_dst/utils/log_utils.py,sha256=4eLmoV8VC7wDwPr1mRfDKP24_-laGO6ogE4U0u3DUuA,2315
39
39
  sibi_dst/utils/parquet_saver.py,sha256=hLrWr1G132y94eLopDPPGQGDsAiR1lQ8id4QQtGYPE4,4349
40
40
  sibi_dst/utils/storage_manager.py,sha256=7nkfeBW_2xlF59pGj7V2aY5TLwpJnPQuPVclqjavJOA,3856
41
- sibi_dst-0.3.26.dist-info/METADATA,sha256=kalIXDq4iKvLdvMVXvm96EkKnUiGY5Yn3-FId81Vh6Q,2221
42
- sibi_dst-0.3.26.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
43
- sibi_dst-0.3.26.dist-info/RECORD,,
41
+ sibi_dst-0.3.27.dist-info/METADATA,sha256=YFb0ZGbz2m0-aczvItyKK4Iqf1wn6pSVGE41ZUQ6YI8,2265
42
+ sibi_dst-0.3.27.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
43
+ sibi_dst-0.3.27.dist-info/RECORD,,