sibi-dst 0.3.25__py3-none-any.whl → 0.3.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sibi_dst/df_helper/core/_filter_handler.py +21 -0
- sibi_dst/df_helper/core/_params_config.py +2 -2
- sibi_dst/utils/__init__.py +12 -12
- sibi_dst/utils/{_clickhouse_writer.py → clickhouse_writer.py} +1 -1
- sibi_dst/utils/{_data_utils.py → data_utils.py} +1 -1
- sibi_dst/utils/{_data_wrapper.py → data_wrapper.py} +0 -26
- sibi_dst/utils/{_df_utils.py → df_utils.py} +1 -1
- {sibi_dst-0.3.25.dist-info → sibi_dst-0.3.27.dist-info}/METADATA +2 -1
- {sibi_dst-0.3.25.dist-info → sibi_dst-0.3.27.dist-info}/RECORD +18 -18
- /sibi_dst/utils/{_airflow_manager.py → airflow_manager.py} +0 -0
- /sibi_dst/utils/{_credentials.py → credentials.py} +0 -0
- /sibi_dst/utils/{_date_utils.py → date_utils.py} +0 -0
- /sibi_dst/utils/{_file_utils.py → file_utils.py} +0 -0
- /sibi_dst/utils/{_filepath_generator.py → filepath_generator.py} +0 -0
- /sibi_dst/utils/{_log_utils.py → log_utils.py} +0 -0
- /sibi_dst/utils/{_parquet_saver.py → parquet_saver.py} +0 -0
- /sibi_dst/utils/{_storage_manager.py → storage_manager.py} +0 -0
- {sibi_dst-0.3.25.dist-info → sibi_dst-0.3.27.dist-info}/WHEEL +0 -0
@@ -185,6 +185,15 @@ class FilterHandler:
|
|
185
185
|
"startswith": lambda col, val: col.like(f"{val}%"),
|
186
186
|
"endswith": lambda col, val: col.like(f"%{val}"),
|
187
187
|
"isnull": lambda col, val: col.is_(None) if val else col.isnot(None),
|
188
|
+
"not_exact": lambda col, val: col != val,
|
189
|
+
"not_contains": lambda col, val: ~col.like(f"%{val}%"),
|
190
|
+
"not_in": lambda col, val: ~col.in_(val), # Custom operation
|
191
|
+
"regex": lambda col, val: col.op("~")(val), # Custom operation
|
192
|
+
"icontains": lambda col, val: col.ilike(f"%{val}%"), # Custom operation
|
193
|
+
"istartswith": lambda col, val: col.ilike(f"{val}%"), # Custom operation
|
194
|
+
"iendswith": lambda col, val: col.ilike(f"%{val}"), # Custom operation
|
195
|
+
"iexact": lambda col, val: col.ilike(val), # Added iexact
|
196
|
+
"iregex": lambda col, val: col.op("~*")(val), # Added iregex
|
188
197
|
}
|
189
198
|
|
190
199
|
@staticmethod
|
@@ -201,6 +210,15 @@ class FilterHandler:
|
|
201
210
|
"startswith": lambda col, val: col.str.startswith(val),
|
202
211
|
"endswith": lambda col, val: col.str.endswith(val),
|
203
212
|
"isnull": lambda col, val: col.isnull() if val else col.notnull(),
|
213
|
+
"not_exact": lambda col, val: col != val,
|
214
|
+
"not_contains": lambda col, val: ~col.str.contains(val, regex=True),
|
215
|
+
"not_in": lambda col, val: ~col.isin(val), # Custom operation
|
216
|
+
"regex": lambda col, val: col.str.contains(val, regex=True), # Custom operation
|
217
|
+
"icontains": lambda col, val: col.str.contains(val, case=False, regex=True), # Custom operation
|
218
|
+
"istartswith": lambda col, val: col.str.startswith(val, case=False), # Custom operation
|
219
|
+
"iendswith": lambda col, val: col.str.endswith(val, case=False), # Custom operation
|
220
|
+
"iexact": lambda col, val: col.str.contains(f"^{val}$", case=False, regex=True), # Added iexact
|
221
|
+
"iregex": lambda col, val: col.str.contains(val, case=False, regex=True), # Added iregex
|
204
222
|
}
|
205
223
|
|
206
224
|
@staticmethod
|
@@ -216,4 +234,7 @@ class FilterHandler:
|
|
216
234
|
return [
|
217
235
|
"gte", "lte", "gt", "lt", "exact", "in", "range",
|
218
236
|
"contains", "startswith", "endswith", "isnull",
|
237
|
+
"not_exact", "not_contains", "not_in",
|
238
|
+
"regex", "icontains", "istartswith", "iendswith",
|
239
|
+
"iexact", "iregex"
|
219
240
|
]
|
@@ -29,8 +29,8 @@ LOOKUP_SEP = "__"
|
|
29
29
|
class ParamsConfig(BaseModel):
|
30
30
|
field_map: Optional[Dict] = Field(default_factory=dict)
|
31
31
|
legacy_filters: bool = False
|
32
|
-
sticky_filters: Dict[str, Union[str, bool, int, float]] = Field(default_factory=dict)
|
33
|
-
filters: Dict[str, Union[str, Dict, bool, int, float]] = Field(default_factory=dict)
|
32
|
+
sticky_filters: Dict[str, Union[str, bool, int, float, list, tuple]] = Field(default_factory=dict)
|
33
|
+
filters: Dict[str, Union[str, Dict, bool, int, float, list, tuple]] = Field(default_factory=dict)
|
34
34
|
df_params: Dict[str, Union[tuple, str, bool, None]] = Field(default_factory=dict)
|
35
35
|
df_options: Dict[str, Union[bool, str, None]] = Field(default_factory=dict)
|
36
36
|
params: Dict[str, Union[str, bool, int, float, List[Union[str, int, bool, float]]]] = Field(default_factory=dict)
|
sibi_dst/utils/__init__.py
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from .
|
4
|
-
from .
|
5
|
-
from .
|
6
|
-
from .
|
7
|
-
from .
|
8
|
-
from .
|
9
|
-
from .
|
10
|
-
from .
|
11
|
-
from .
|
12
|
-
from .
|
13
|
-
from .
|
14
|
-
from .
|
3
|
+
from .log_utils import Logger
|
4
|
+
from .date_utils import *
|
5
|
+
from .data_utils import DataUtils
|
6
|
+
from .file_utils import FileUtils
|
7
|
+
from .filepath_generator import FilePathGenerator
|
8
|
+
from .df_utils import DfUtils
|
9
|
+
from .storage_manager import StorageManager
|
10
|
+
from .parquet_saver import ParquetSaver
|
11
|
+
from .clickhouse_writer import ClickHouseWriter
|
12
|
+
from .airflow_manager import AirflowDAGManager
|
13
|
+
from .credentials import *
|
14
|
+
from .data_wrapper import DataWrapper
|
15
15
|
|
16
16
|
__all__ = [
|
17
17
|
"Logger",
|
@@ -125,7 +125,7 @@ class ClickHouseWriter:
|
|
125
125
|
"""
|
126
126
|
Writes the Dask DataFrame to a ClickHouse table partition by partition.
|
127
127
|
"""
|
128
|
-
if len(self.df.
|
128
|
+
if len(self.df.index) == 0:
|
129
129
|
self.logger.debug("No data found. Nothing written.")
|
130
130
|
return
|
131
131
|
|
@@ -95,7 +95,7 @@ class DataUtils:
|
|
95
95
|
# Get unique IDs from source column
|
96
96
|
ids = df[source_col].dropna().unique()
|
97
97
|
# Compute if it's a Dask Series
|
98
|
-
if isinstance(ids, dd.
|
98
|
+
if isinstance(ids, dd.Series):
|
99
99
|
ids = ids.compute()
|
100
100
|
|
101
101
|
# Check if any IDs are found
|
@@ -126,32 +126,6 @@ class DataWrapper:
|
|
126
126
|
futures[new_future] = priority
|
127
127
|
self.logger.info(f"Resubmitted task for priority {priority} after timeout.")
|
128
128
|
|
129
|
-
# def process(self):
|
130
|
-
# """Execute the update plan following the specified hierarchy."""
|
131
|
-
# update_plan_table = self.generate_update_plan_with_conditions()
|
132
|
-
#
|
133
|
-
# # Display the update plan table to the user if show_progress is True
|
134
|
-
# if self.show_progress:
|
135
|
-
# display(update_plan_table)
|
136
|
-
#
|
137
|
-
# # Process files according to the hierarchy, considering only `update_required` dates
|
138
|
-
# for category, description in [
|
139
|
-
# ("overwrite", "Processing files due to overwrite=True"),
|
140
|
-
# ("history_days", "Processing files within history_days_threshold"),
|
141
|
-
# ("missing_files", "Processing missing files")
|
142
|
-
# ]:
|
143
|
-
# # Filter dates in the category where `update_required` is True
|
144
|
-
# dates_to_process = update_plan_table[
|
145
|
-
# (update_plan_table["update_category"] == category) & (update_plan_table["update_required"])
|
146
|
-
# ]["date"].tolist()
|
147
|
-
#
|
148
|
-
# date_iterator = dates_to_process
|
149
|
-
# if self.show_progress:
|
150
|
-
# date_iterator = tqdm(date_iterator, desc=f"{description}:{self.dataclass.__name__}", unit="date")
|
151
|
-
#
|
152
|
-
# for current_date in date_iterator:
|
153
|
-
# self.process_date(current_date)
|
154
|
-
|
155
129
|
def is_file_older_than(self, file_path: str) -> bool:
|
156
130
|
"""
|
157
131
|
Check if a file is older than the specified max_age_minutes.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sibi-dst
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.27
|
4
4
|
Summary: Data Science Toolkit
|
5
5
|
Author: Luis Valverde
|
6
6
|
Author-email: lvalverdeb@gmail.com
|
@@ -32,6 +32,7 @@ Requires-Dist: pymysql (>=1.1.1,<2.0.0)
|
|
32
32
|
Requires-Dist: pytest (>=8.3.3,<9.0.0)
|
33
33
|
Requires-Dist: pytest-mock (>=3.14.0,<4.0.0)
|
34
34
|
Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
|
35
|
+
Requires-Dist: s3fs (>=2024.12.0,<2025.0.0)
|
35
36
|
Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0)
|
36
37
|
Requires-Dist: tornado (>=6.4.1,<7.0.0)
|
37
38
|
Requires-Dist: tqdm (>=4.67.0,<5.0.0)
|
@@ -22,22 +22,22 @@ sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py,sha256=ML-m_WeTR1_UMgiDR
|
|
22
22
|
sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py,sha256=Bmhh6VvmBfNfBA2JpuEdsYD_193yJ768Si2TvkY9HmU,4405
|
23
23
|
sibi_dst/df_helper/core/__init__.py,sha256=o4zDwgVmaijde3oix0ezb6KLxI5QFy-SGUhFTDVFLT4,569
|
24
24
|
sibi_dst/df_helper/core/_defaults.py,sha256=eNpHD2sZxir-2xO0b3_V16ryw8YP_5FfpIKK0HNuiN4,7011
|
25
|
-
sibi_dst/df_helper/core/_filter_handler.py,sha256=
|
26
|
-
sibi_dst/df_helper/core/_params_config.py,sha256=
|
25
|
+
sibi_dst/df_helper/core/_filter_handler.py,sha256=g9FMcB_koT724ggcWt98jow2XgUnmupK_fNhF95W5bQ,10217
|
26
|
+
sibi_dst/df_helper/core/_params_config.py,sha256=Og3GYth0GVWpcOYWZWRy7CZ5PDsg63Nmqo-W7TUrA_0,3503
|
27
27
|
sibi_dst/df_helper/core/_query_config.py,sha256=Y8LVSyaKuVkrPluRDkQoOwuXHQxner1pFWG3HPfnDHM,441
|
28
|
-
sibi_dst/utils/__init__.py,sha256
|
29
|
-
sibi_dst/utils/
|
30
|
-
sibi_dst/utils/
|
31
|
-
sibi_dst/utils/
|
32
|
-
sibi_dst/utils/
|
33
|
-
sibi_dst/utils/
|
34
|
-
sibi_dst/utils/
|
35
|
-
sibi_dst/utils/
|
36
|
-
sibi_dst/utils/
|
37
|
-
sibi_dst/utils/
|
38
|
-
sibi_dst/utils/
|
39
|
-
sibi_dst/utils/
|
40
|
-
sibi_dst/utils/
|
41
|
-
sibi_dst-0.3.
|
42
|
-
sibi_dst-0.3.
|
43
|
-
sibi_dst-0.3.
|
28
|
+
sibi_dst/utils/__init__.py,sha256=z51o5sjIo_gTjnDXk5SBniCxWJIrDBMS7df0dTs8VMk,775
|
29
|
+
sibi_dst/utils/airflow_manager.py,sha256=-d44EKUZNYJyp4wuNwRvilRQktunArPOB5fZuWdQv10,7526
|
30
|
+
sibi_dst/utils/clickhouse_writer.py,sha256=xUhFDOuZt0eZDpVJNuLb7pfTHUV06NCYrNUx_a7qrSM,8580
|
31
|
+
sibi_dst/utils/credentials.py,sha256=cHJPPsmVyijqbUQIq7WWPe-lIallA-mI5RAy3YUuRME,1724
|
32
|
+
sibi_dst/utils/data_utils.py,sha256=Kv87Br78EXlH_MSVzRspqLwrf6sqHIRQc0t3LDI0dSM,7045
|
33
|
+
sibi_dst/utils/data_wrapper.py,sha256=DTK4hd_GUUi5lxcbEbMraKwzpmPh2IwX6WNWA4t-vx0,10693
|
34
|
+
sibi_dst/utils/date_utils.py,sha256=CMAZBNwVj7cvERcNiTA8Pf7_5EjV9By9yxkYJpkqz1g,10656
|
35
|
+
sibi_dst/utils/df_utils.py,sha256=OFEtcwVKIilvf9qVf-IfIOHp4jcFAHX5l2IDGudhPZg,10989
|
36
|
+
sibi_dst/utils/file_utils.py,sha256=JpsybYj3XvVJisSBeVU6YSaZnYRm4_6YWTI3TLnnY4Y,1257
|
37
|
+
sibi_dst/utils/filepath_generator.py,sha256=hjI7gQwfwRToPeuzoUQDayHKQrr4Ivhi4Chl1J4Phlk,6689
|
38
|
+
sibi_dst/utils/log_utils.py,sha256=4eLmoV8VC7wDwPr1mRfDKP24_-laGO6ogE4U0u3DUuA,2315
|
39
|
+
sibi_dst/utils/parquet_saver.py,sha256=hLrWr1G132y94eLopDPPGQGDsAiR1lQ8id4QQtGYPE4,4349
|
40
|
+
sibi_dst/utils/storage_manager.py,sha256=7nkfeBW_2xlF59pGj7V2aY5TLwpJnPQuPVclqjavJOA,3856
|
41
|
+
sibi_dst-0.3.27.dist-info/METADATA,sha256=YFb0ZGbz2m0-aczvItyKK4Iqf1wn6pSVGE41ZUQ6YI8,2265
|
42
|
+
sibi_dst-0.3.27.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
43
|
+
sibi_dst-0.3.27.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|