sibi-dst 0.3.20__py3-none-any.whl → 0.3.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. sibi_dst/__init__.py +1 -1
  2. sibi_dst/df_helper/__init__.py +2 -2
  3. sibi_dst/df_helper/_df_helper.py +34 -33
  4. sibi_dst/df_helper/_parquet_artifact.py +4 -1
  5. sibi_dst/df_helper/_parquet_reader.py +2 -1
  6. sibi_dst/df_helper/backends/django/__init__.py +1 -2
  7. sibi_dst/df_helper/backends/django/_django_db_connection.py +1 -1
  8. sibi_dst/df_helper/backends/django/_django_load_from_db.py +6 -8
  9. sibi_dst/df_helper/backends/django/_django_sql_model_builder.py +5 -5
  10. sibi_dst/df_helper/backends/django/_io_dask.py +0 -1
  11. sibi_dst/df_helper/backends/django/_io_dask_alt.py +5 -4
  12. sibi_dst/df_helper/backends/http/__init__.py +2 -2
  13. sibi_dst/df_helper/backends/http/_http_config.py +6 -3
  14. sibi_dst/df_helper/backends/parquet/__init__.py +3 -3
  15. sibi_dst/df_helper/backends/parquet/_parquet_filter_handler.py +4 -2
  16. sibi_dst/df_helper/backends/parquet/_parquet_options.py +12 -7
  17. sibi_dst/df_helper/backends/sql_alchemy/__init__.py +2 -2
  18. sibi_dst/df_helper/backends/sql_alchemy/_io_sqlalchemy_dask.py +3 -1
  19. sibi_dst/df_helper/backends/sql_alchemy/_sqlachemy_filter_handler.py +2 -3
  20. sibi_dst/df_helper/backends/sql_alchemy/_sqlalchemy_db_connection.py +3 -3
  21. sibi_dst/df_helper/backends/sql_alchemy/_sqlalchemy_load_from_db.py +2 -2
  22. sibi_dst/df_helper/backends/sql_alchemy/_sqlalchemy_model_builder.py +5 -3
  23. sibi_dst/df_helper/backends/sql_model/__init__.py +1 -1
  24. sibi_dst/df_helper/backends/sql_model/_sqlmodel_db_connection.py +5 -4
  25. sibi_dst/df_helper/backends/sql_model/_sqlmodel_load_from_db.py +13 -11
  26. sibi_dst/df_helper/core/_defaults.py +9 -6
  27. sibi_dst/df_helper/core/_filter_handler.py +7 -4
  28. sibi_dst/df_helper/core/_params_config.py +3 -2
  29. sibi_dst/df_helper/core/_query_config.py +0 -2
  30. sibi_dst/utils/__init__.py +10 -9
  31. sibi_dst/utils/_airflow_manager.py +4 -3
  32. sibi_dst/utils/_clickhouse_writer.py +16 -13
  33. sibi_dst/utils/_credentials.py +1 -1
  34. sibi_dst/utils/_data_wrapper.py +7 -4
  35. sibi_dst/utils/_date_utils.py +11 -5
  36. sibi_dst/utils/_df_utils.py +9 -5
  37. sibi_dst/utils/_file_utils.py +3 -1
  38. sibi_dst/utils/_filepath_generator.py +4 -2
  39. sibi_dst/utils/_log_utils.py +1 -1
  40. sibi_dst/utils/_parquet_saver.py +0 -2
  41. sibi_dst/utils/_storage_manager.py +1 -1
  42. {sibi_dst-0.3.20.dist-info → sibi_dst-0.3.21.dist-info}/METADATA +1 -1
  43. sibi_dst-0.3.21.dist-info/RECORD +47 -0
  44. sibi_dst-0.3.20.dist-info/RECORD +0 -47
  45. {sibi_dst-0.3.20.dist-info → sibi_dst-0.3.21.dist-info}/WHEEL +0 -0
@@ -1,19 +1,21 @@
1
- import dask.dataframe as dd
2
- from sqlmodel import Session, select, text
3
- from typing import Any, Dict, Optional
4
1
  import logging
2
+ from typing import Any, Dict, Optional
3
+
4
+ import dask.dataframe as dd
5
5
  import pandas as pd
6
+ from sqlmodel import Session, select, text
7
+
6
8
 
7
9
  class SQLModelLoadFromDb:
8
10
  df: dd.DataFrame
9
11
 
10
12
  def __init__(
11
- self,
12
- db_connection,
13
- db_query: Optional[Dict[str, Any]] = None,
14
- db_params: Optional[Dict[str, Any]] = None,
15
- logger=None,
16
- **kwargs,
13
+ self,
14
+ db_connection,
15
+ db_query: Optional[Dict[str, Any]] = None,
16
+ db_params: Optional[Dict[str, Any]] = None,
17
+ logger=None,
18
+ **kwargs,
17
19
  ):
18
20
  """
19
21
  Initialize the loader with database connection, query, and parameters.
@@ -74,7 +76,7 @@ class SQLModelLoadFromDb:
74
76
  results = session.exec(query).fetchall()
75
77
 
76
78
  # Convert query results to a Dask DataFrame
77
- print("results:",results)
79
+ print("results:", results)
78
80
  if results:
79
81
  df = dd.from_pandas(pd.DataFrame([r.dict() for r in results]), npartitions=1)
80
82
  else:
@@ -96,4 +98,4 @@ class SQLModelLoadFromDb:
96
98
  if field_map:
97
99
  rename_mapping = {k: v for k, v in field_map.items() if k in self.df.columns}
98
100
  if rename_mapping:
99
- self.df = self.df.rename(columns=rename_mapping, meta={v: "object" for v in rename_mapping.values()})
101
+ self.df = self.df.rename(columns=rename_mapping, meta={v: "object" for v in rename_mapping.values()})
@@ -54,8 +54,10 @@ django_field_conversion_map_dask: Dict[str, callable] = {
54
54
  "BooleanField": lambda x: x.astype(bool),
55
55
  "NullBooleanField": lambda x: x.astype(bool),
56
56
  "DateTimeField": lambda x: pd.to_datetime(x, errors="coerce"),
57
- "DateField": lambda x: pd.to_datetime(x, errors="coerce").map_partitions(lambda x: x.dt.date, meta=("date", "object")),
58
- "TimeField": lambda x: pd.to_datetime(x, errors="coerce").map_partitions(lambda x: x.dt.time, meta=("time", "object")),
57
+ "DateField": lambda x: pd.to_datetime(x, errors="coerce").map_partitions(lambda x: x.dt.date,
58
+ meta=("date", "object")),
59
+ "TimeField": lambda x: pd.to_datetime(x, errors="coerce").map_partitions(lambda x: x.dt.time,
60
+ meta=("time", "object")),
59
61
  "DurationField": lambda x: pd.to_timedelta(x, errors="coerce"),
60
62
  "JSONField": lambda x: x.map_partitions(lambda s: s.apply(json.loads), meta=("json", "object")),
61
63
  "ArrayField": lambda x: x.map_partitions(lambda s: s.apply(eval), meta=("array", "object")),
@@ -72,12 +74,15 @@ sqlalchemy_field_conversion_map_dask: Dict[str, callable] = {
72
74
  Numeric.__name__: lambda x: pd.to_numeric(x, errors="coerce"),
73
75
  Boolean.__name__: lambda x: x.astype(bool),
74
76
  DateTime.__name__: lambda x: pd.to_datetime(x, errors="coerce"),
75
- Date.__name__: lambda x: pd.to_datetime(x, errors="coerce").map_partitions(lambda x: x.dt.date, meta=("date", "object")),
76
- Time.__name__: lambda x: pd.to_datetime(x, errors="coerce").map_partitions(lambda x: x.dt.time, meta=("time", "object")),
77
+ Date.__name__: lambda x: pd.to_datetime(x, errors="coerce").map_partitions(lambda x: x.dt.date,
78
+ meta=("date", "object")),
79
+ Time.__name__: lambda x: pd.to_datetime(x, errors="coerce").map_partitions(lambda x: x.dt.time,
80
+ meta=("time", "object")),
77
81
  JSON.__name__: lambda x: x.map_partitions(lambda s: s.apply(json.loads), meta=("json", "object")),
78
82
  UUID.__name__: lambda x: x.astype(str),
79
83
  }
80
84
 
85
+
81
86
  # Conversion map with normalized SQLAlchemy field types
82
87
  # sqlalchemy_field_conversion_map_dask: Dict[str, callable] = {
83
88
  # "String": lambda x: x.map_partitions(lambda s: s.astype(str), meta=("string", "string")),
@@ -129,5 +134,3 @@ def normalize_sqlalchemy_type(field_type):
129
134
 
130
135
  # Fallback to raw class name
131
136
  return field_type.__class__.__name__
132
-
133
-
@@ -1,10 +1,13 @@
1
1
  import datetime
2
+
2
3
  import dask.dataframe as dd
3
4
  import pandas as pd
4
5
  from sqlalchemy import func, cast
5
6
  from sqlalchemy.sql.sqltypes import Date, Time
7
+
6
8
  from sibi_dst.utils import Logger
7
9
 
10
+
8
11
  class FilterHandler:
9
12
  def __init__(self, backend, logger=None):
10
13
  """
@@ -15,7 +18,8 @@ class FilterHandler:
15
18
  logger: Optional logger for debugging purposes.
16
19
  """
17
20
  self.backend = backend
18
- self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__) # No-op logger if none provided
21
+ self.logger = logger or Logger.default_logger(
22
+ logger_name=self.__class__.__name__) # No-op logger if none provided
19
23
  self.backend_methods = self._get_backend_methods(backend)
20
24
 
21
25
  def apply_filters(self, query_or_df, model=None, filters=None):
@@ -34,7 +38,7 @@ class FilterHandler:
34
38
  for key, value in filters.items():
35
39
  field_name, casting, operation = self._parse_filter_key(key)
36
40
  parsed_value = self._parse_filter_value(casting, value)
37
- #print(field_name, casting, operation, parsed_value)
41
+ # print(field_name, casting, operation, parsed_value)
38
42
  # Get the column and apply backend-specific transformations
39
43
  if self.backend == "sqlalchemy":
40
44
  column = self.backend_methods["get_column"](field_name, model, casting)
@@ -67,7 +71,6 @@ class FilterHandler:
67
71
 
68
72
  return field_name, casting, operation
69
73
 
70
-
71
74
  def _parse_filter_value(self, casting, value):
72
75
  """
73
76
  Convert filter value to appropriate type based on the casting (e.g., date).
@@ -213,4 +216,4 @@ class FilterHandler:
213
216
  return [
214
217
  "gte", "lte", "gt", "lt", "exact", "in", "range",
215
218
  "contains", "startswith", "endswith", "isnull",
216
- ]
219
+ ]
@@ -1,7 +1,7 @@
1
+ from typing import Optional, Dict, Union, List
1
2
 
2
3
  from pydantic import BaseModel, model_validator, Field
3
4
 
4
- from typing import Optional, Dict, Union, List
5
5
  dataframe_params: Dict[str, Union[None, str, bool, int, None]] = {
6
6
  "fieldnames": None,
7
7
  "index_col": None,
@@ -25,6 +25,7 @@ dataframe_options: Dict[str, Union[bool, str, int, None]] = {
25
25
 
26
26
  LOOKUP_SEP = "__"
27
27
 
28
+
28
29
  class ParamsConfig(BaseModel):
29
30
  field_map: Optional[Dict] = Field(default_factory=dict)
30
31
  legacy_filters: bool = False
@@ -76,4 +77,4 @@ class ParamsConfig(BaseModel):
76
77
  new_filter_field = LOOKUP_SEP.join(new_parts)
77
78
  new_filters[new_filter_field] = value
78
79
 
79
- self.filters = new_filters
80
+ self.filters = new_filters
@@ -1,7 +1,5 @@
1
1
  from typing import Optional
2
2
 
3
- import dask.dataframe as dd
4
- import pandas as pd
5
3
  from pydantic import BaseModel, model_validator
6
4
 
7
5
 
@@ -1,18 +1,19 @@
1
1
  from __future__ import annotations
2
+
3
+ from ._airflow_manager import AirflowDAGManager
4
+ from ._clickhouse_writer import ClickHouseWriter
2
5
  from ._credentials import *
3
- from ._log_utils import Logger
4
- from ._date_utils import *
5
6
  from ._data_utils import DataUtils
7
+ from ._data_wrapper import DataWrapper
8
+ from ._date_utils import *
9
+ from ._df_utils import DfUtils
6
10
  from ._file_utils import FileUtils
7
11
  from ._filepath_generator import FilePathGenerator
8
- from ._df_utils import DfUtils
9
- from ._storage_manager import StorageManager
12
+ from ._log_utils import Logger
10
13
  from ._parquet_saver import ParquetSaver
11
- from ._clickhouse_writer import ClickHouseWriter
12
- from ._data_wrapper import DataWrapper
13
- from ._airflow_manager import AirflowDAGManager
14
+ from ._storage_manager import StorageManager
14
15
 
15
- __all__=[
16
+ __all__ = [
16
17
  "ConfigManager",
17
18
  "ConfigLoader",
18
19
  "Logger",
@@ -27,4 +28,4 @@ __all__=[
27
28
  "DfUtils",
28
29
  "ClickHouseWriter",
29
30
  "AirflowDAGManager",
30
- ]
31
+ ]
@@ -1,8 +1,9 @@
1
1
  import os
2
- from jinja2 import Template
3
2
  from datetime import datetime
3
+
4
4
  import fsspec
5
5
  import httpx
6
+ from jinja2 import Template
6
7
 
7
8
  """
8
9
  A manager to dynamically generate, save, and upload Airflow DAGs via SSH using fsspec.
@@ -54,8 +55,8 @@ with DAG(
54
55
  {% endfor %}
55
56
  """
56
57
 
57
- class AirflowDAGManager:
58
58
 
59
+ class AirflowDAGManager:
59
60
 
60
61
  def __init__(self, output_dir, remote_dags_path, ssh_host, ssh_user, ssh_password, url, auth, wrapper_module_path):
61
62
  """
@@ -208,4 +209,4 @@ class AirflowDAGManager:
208
209
  return response.json()
209
210
  except httpx.RequestError as e:
210
211
  print(f"Failed to trigger DAG {dag_id}: {e}")
211
- raise
212
+ raise
@@ -1,9 +1,12 @@
1
+ from concurrent.futures import ThreadPoolExecutor
2
+
1
3
  import clickhouse_connect
4
+ import pandas as pd
2
5
  from clickhouse_driver import Client
3
6
  from dask.dataframe import dd
4
- import pandas as pd
7
+
5
8
  from sibi_dst.utils import Logger
6
- from concurrent.futures import ThreadPoolExecutor
9
+
7
10
 
8
11
  class ClickHouseWriter:
9
12
  dtype_to_clickhouse = {
@@ -19,20 +22,20 @@ class ClickHouseWriter:
19
22
  df: dd.DataFrame
20
23
 
21
24
  def __init__(self, logger=None, **kwargs):
22
- self.clickhouse_host = kwargs.setdefault('host',"localhost")
23
- self.clickhouse_port = kwargs.setdefault('port',8123)
24
- self.clickhouse_dbname = kwargs.setdefault('database','sibi_data')
25
- self.clickhouse_user = kwargs.setdefault('user','default')
26
- self.clickhouse_password = kwargs.setdefault('password','')
27
- self.clickhouse_table = kwargs.setdefault('table','test_sibi_table')
25
+ self.clickhouse_host = kwargs.setdefault('host', "localhost")
26
+ self.clickhouse_port = kwargs.setdefault('port', 8123)
27
+ self.clickhouse_dbname = kwargs.setdefault('database', 'sibi_data')
28
+ self.clickhouse_user = kwargs.setdefault('user', 'default')
29
+ self.clickhouse_password = kwargs.setdefault('password', '')
30
+ self.clickhouse_table = kwargs.setdefault('table', 'test_sibi_table')
28
31
 
29
32
  self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
30
33
  self.client = None
31
- self.order_by=kwargs.setdefault('order_by','id')
34
+ self.order_by = kwargs.setdefault('order_by', 'id')
32
35
 
33
36
  def save_to_clickhouse(self, df, **kwargs):
34
37
  self.df = df.copy()
35
- self.order_by = kwargs.setdefault('order_by',self.order_by)
38
+ self.order_by = kwargs.setdefault('order_by', self.order_by)
36
39
  if len(self.df.head().index) == 0:
37
40
  self.logger.debug("Dataframe is empty")
38
41
  return
@@ -86,8 +89,8 @@ class ClickHouseWriter:
86
89
  if engine is None:
87
90
  engine = f"ENGINE = MergeTree() order by {self.order_by}"
88
91
  dtypes = self.df.dtypes
89
- clickhouse_schema = self._generate_clickhouse_schema(dtypes,self.dtype_to_clickhouse)
90
- create_table_sql= f"CREATE TABLE IF NOT EXISTS {self.clickhouse_table} ({clickhouse_schema}) {engine};"
92
+ clickhouse_schema = self._generate_clickhouse_schema(dtypes, self.dtype_to_clickhouse)
93
+ create_table_sql = f"CREATE TABLE IF NOT EXISTS {self.clickhouse_table} ({clickhouse_schema}) {engine};"
91
94
  self.logger.debug(f"Creating table SQL:{create_table_sql}")
92
95
  if self.client:
93
96
  self.client.command(create_table_sql)
@@ -200,4 +203,4 @@ class ClickHouseWriter:
200
203
  with ThreadPoolExecutor() as executor:
201
204
  executor.map(write_partition, partitions, range(len(partitions)))
202
205
  except Exception as e:
203
- self.logger.error(f"Error during multi-partition write: {e}")
206
+ self.logger.error(f"Error during multi-partition write: {e}")
@@ -1,5 +1,6 @@
1
1
  import os
2
2
 
3
+
3
4
  class ConfigLoader:
4
5
  def __init__(self, prefix, keys, defaults=None):
5
6
  """
@@ -54,4 +55,3 @@ class ConfigManager:
54
55
  :return: The configuration dictionary.
55
56
  """
56
57
  return self.configurations.get(name, {})
57
-
@@ -1,12 +1,15 @@
1
1
  import datetime
2
2
  from typing import Type, Any, Dict, Optional
3
+
3
4
  import fsspec
4
5
  import pandas as pd
5
6
  from IPython.display import display
6
- from sibi_dst.utils import Logger
7
7
  from tqdm import tqdm
8
+
9
+ from sibi_dst.utils import Logger
8
10
  from sibi_dst.utils import ParquetSaver
9
11
 
12
+
10
13
  class DataWrapper:
11
14
  DEFAULT_MAX_AGE_MINUTES = 1440
12
15
  DEFAULT_HISTORY_DAYS_THRESHOLD = 30
@@ -89,7 +92,7 @@ class DataWrapper:
89
92
  # Filter dates in the category where `update_required` is True
90
93
  dates_to_process = update_plan_table[
91
94
  (update_plan_table["update_category"] == category) & (update_plan_table["update_required"])
92
- ]["date"].tolist()
95
+ ]["date"].tolist()
93
96
 
94
97
  date_iterator = dates_to_process
95
98
  if self.show_progress:
@@ -130,7 +133,7 @@ class DataWrapper:
130
133
  data_object = self.dataclass(**self.class_params)
131
134
  df = data_object.load_period(dt_field=self.date_field, start=date, end=date)
132
135
 
133
- if len(df.index)==0:
136
+ if len(df.index) == 0:
134
137
  self.logger.error("No data found for the specified date.")
135
138
  return
136
139
 
@@ -194,7 +197,7 @@ class DataWrapper:
194
197
  "missing_file": missing_file,
195
198
  "update_required": update_required,
196
199
  "update_category": category,
197
- "datawrapper class":self.dataclass.__name__
200
+ "datawrapper class": self.dataclass.__name__
198
201
  })
199
202
 
200
203
  update_plan_table = pd.DataFrame(rows)
@@ -1,8 +1,9 @@
1
1
  import datetime
2
- from typing import Union, Tuple, Callable, Dict, Any
2
+ from typing import Union, Tuple, Callable, Dict
3
3
 
4
4
  import numpy as np
5
5
  import pandas as pd
6
+
6
7
  from sibi_dst.utils import Logger
7
8
 
8
9
 
@@ -32,7 +33,8 @@ class DateUtils:
32
33
  raise ValueError(f"Unsupported date format: {value}")
33
34
 
34
35
  @classmethod
35
- def calc_week_range(cls, reference_date: Union[str, datetime.date, datetime.datetime, pd.Timestamp]) -> Tuple[datetime.date, datetime.date]:
36
+ def calc_week_range(cls, reference_date: Union[str, datetime.date, datetime.datetime, pd.Timestamp]) -> Tuple[
37
+ datetime.date, datetime.date]:
36
38
  """
37
39
  Calculate the start and end of the week for a given reference date.
38
40
  """
@@ -49,7 +51,8 @@ class DateUtils:
49
51
  return datetime.date(year, 1, 1), datetime.date(year, 12, 31)
50
52
 
51
53
  @classmethod
52
- def get_first_day_of_the_quarter(cls, reference_date: Union[str, datetime.date, datetime.datetime, pd.Timestamp]) -> datetime.date:
54
+ def get_first_day_of_the_quarter(cls, reference_date: Union[
55
+ str, datetime.date, datetime.datetime, pd.Timestamp]) -> datetime.date:
53
56
  """
54
57
  Get the first day of the quarter for a given date.
55
58
  """
@@ -58,7 +61,8 @@ class DateUtils:
58
61
  return datetime.date(reference_date.year, 3 * quarter - 2, 1)
59
62
 
60
63
  @classmethod
61
- def get_last_day_of_the_quarter(cls, reference_date: Union[str, datetime.date, datetime.datetime, pd.Timestamp]) -> datetime.date:
64
+ def get_last_day_of_the_quarter(cls, reference_date: Union[
65
+ str, datetime.date, datetime.datetime, pd.Timestamp]) -> datetime.date:
62
66
  """
63
67
  Get the last day of the quarter for a given date.
64
68
  """
@@ -116,10 +120,12 @@ class DateUtils:
116
120
  'current_month': lambda: cls.get_month_range(n=0),
117
121
  'last_month': lambda: cls.get_month_range(n=-1),
118
122
  'current_year': lambda: cls.get_year_timerange(today().year),
119
- 'current_quarter': lambda: (cls.get_first_day_of_the_quarter(today()), cls.get_last_day_of_the_quarter(today())),
123
+ 'current_quarter': lambda: (
124
+ cls.get_first_day_of_the_quarter(today()), cls.get_last_day_of_the_quarter(today())),
120
125
  'ytd': lambda: (datetime.date(today().year, 1, 1), today()),
121
126
  }
122
127
 
128
+
123
129
  class BusinessDays:
124
130
  def __init__(self, holiday_list, logger):
125
131
  """
@@ -1,7 +1,9 @@
1
- import pandas as pd
2
1
  import dask.dataframe as dd
2
+ import pandas as pd
3
+
3
4
  from ._log_utils import Logger
4
5
 
6
+
5
7
  class DfUtils:
6
8
  def __init__(self, logger=None):
7
9
  """
@@ -210,7 +212,7 @@ class DfUtils:
210
212
  df['Total'] = df.sum(axis=1, numeric_only=True)
211
213
  return df
212
214
 
213
- def summarise_data(self,df, summary_column, values_column, rule='D', agg_func='count'):
215
+ def summarise_data(self, df, summary_column, values_column, rule='D', agg_func='count'):
214
216
  """
215
217
  Summarizes data by creating a pivot table and resampling.
216
218
 
@@ -233,10 +235,12 @@ class DfUtils:
233
235
  df = df.set_index(dd.to_datetime(df.index))
234
236
 
235
237
  # Group by index and summary columns
236
- df_grouped = df.groupby([dd.to_datetime(df.index)] + [summary_column])[values_column].agg(agg_func).reset_index()
238
+ df_grouped = df.groupby([dd.to_datetime(df.index)] + [summary_column])[values_column].agg(
239
+ agg_func).reset_index()
237
240
 
238
241
  # Pivot the table
239
- df_pivot = df_grouped.pivot_table(index='index', columns=summary_column, values=values_column, aggfunc='sum').fillna(0)
242
+ df_pivot = df_grouped.pivot_table(index='index', columns=summary_column, values=values_column,
243
+ aggfunc='sum').fillna(0)
240
244
 
241
245
  # Resample
242
246
  df_pivot.index = dd.to_datetime(df_pivot.index)
@@ -269,4 +273,4 @@ class DfUtils:
269
273
  Returns:
270
274
  DataFrame: Resampled pivot table.
271
275
  """
272
- return DfUtils.summarise_data(df, summary_columns, value_columns, rule=rule, agg_func=agg_func)
276
+ return DfUtils.summarise_data(df, summary_columns, value_columns, rule=rule, agg_func=agg_func)
@@ -1,10 +1,12 @@
1
1
  import shutil
2
2
  from pathlib import Path
3
3
  from typing import Optional
4
+
4
5
  import fsspec
5
6
 
6
7
  from sibi_dst.utils import Logger
7
8
 
9
+
8
10
  class FileUtils:
9
11
  def __init__(self, logger=None):
10
12
  self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
@@ -27,7 +29,7 @@ class FileUtils:
27
29
  fs.mkdirs(path)
28
30
 
29
31
  @staticmethod
30
- def construct_full_path(storage_path:str, parquet_filename: Optional[str]) -> Path:
32
+ def construct_full_path(storage_path: str, parquet_filename: Optional[str]) -> Path:
31
33
  """Construct and return the full path for the parquet file."""
32
34
  fs, base_path = fsspec.core.url_to_fs(storage_path)
33
35
  parquet_filename = parquet_filename or "default.parquet"
@@ -1,7 +1,8 @@
1
1
  import datetime
2
- import fsspec
3
2
  import re
4
3
 
4
+ import fsspec
5
+
5
6
  from sibi_dst.utils import Logger
6
7
 
7
8
 
@@ -150,6 +151,7 @@ class FilePathGenerator:
150
151
  return datetime.datetime.strptime(date, '%Y-%m-%d')
151
152
  return date
152
153
 
154
+
153
155
  """
154
156
  Usage:
155
157
  # Initialize the generator
@@ -182,4 +184,4 @@ for fp in file_paths:
182
184
 
183
185
  df_pandas = pd.concat(dataframes, ignore_index=True)
184
186
  print(df_pandas.head())
185
- """
187
+ """
@@ -71,4 +71,4 @@ class Logger:
71
71
  self.logger.error(msg)
72
72
 
73
73
  def critical(self, msg):
74
- self.logger.critical(msg)
74
+ self.logger.critical(msg)
@@ -1,7 +1,6 @@
1
1
  from pathlib import Path
2
2
  from typing import Optional
3
3
 
4
- import dask_expr
5
4
  import fsspec
6
5
  import pyarrow as pa
7
6
 
@@ -103,4 +102,3 @@ class ParquetSaver:
103
102
  self.df_result.to_parquet(
104
103
  str(full_path), engine="pyarrow", schema=schema, write_index=False
105
104
  )
106
-
@@ -1,4 +1,5 @@
1
1
  from types import SimpleNamespace
2
+
2
3
  import fsspec
3
4
 
4
5
 
@@ -86,4 +87,3 @@ class StorageManager:
86
87
  print("Rebuilding depot structure...")
87
88
  self.rebuild_depot_paths(depots, clear_existing=clear_existing)
88
89
  print("Rebuild complete.")
89
-
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sibi-dst
3
- Version: 0.3.20
3
+ Version: 0.3.21
4
4
  Summary: Data Science Toolkit
5
5
  Author: Luis Valverde
6
6
  Author-email: lvalverdeb@gmail.com
@@ -0,0 +1,47 @@
1
+ sibi_dst/__init__.py,sha256=CLHfzrFNqklNx5uMKAPtbZfkbBbVYR5qsiMro0RTfmA,252
2
+ sibi_dst/df_helper/__init__.py,sha256=aiAu7j1SWDiw3RVI4UJmvLcADP34OfrJTCYpdupPGII,234
3
+ sibi_dst/df_helper/_df_helper.py,sha256=3fibDnRAX4R0v-xgfG87BKLR-ZCg8AZWrKDIO7qbBPM,13953
4
+ sibi_dst/df_helper/_parquet_artifact.py,sha256=nx1wTEyrjARpCCPNwBxYiBROee3CSb6c-u7Cpme_tdk,4978
5
+ sibi_dst/df_helper/_parquet_reader.py,sha256=sbe8DsScNT2h6huNsz8mUxVnUGpJeRzbaONZ3u2sQeQ,1685
6
+ sibi_dst/df_helper/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ sibi_dst/df_helper/backends/django/__init__.py,sha256=Py4XGV8OnWv_6qkdlJ0hhc1_xT_NLMS712N00CJOg0w,270
8
+ sibi_dst/df_helper/backends/django/_django_db_connection.py,sha256=GypF84Ej7ViTID4r3UEReRGmLiyfMtEknPI_NINSm3g,1641
9
+ sibi_dst/df_helper/backends/django/_django_load_from_db.py,sha256=GLsAsuEQD1cXfEm7BuxofZfR32VwZNEfwR9c-AZn-x0,5555
10
+ sibi_dst/df_helper/backends/django/_django_sql_model_builder.py,sha256=xyOq0JY0k9380aBeV66RXbeXL-mF22CczbQQoXLDhuo,14884
11
+ sibi_dst/df_helper/backends/django/_io_dask.py,sha256=jryDojeA62rB3seRaWWMjsAmekKacK5xctwCQGVklPQ,9063
12
+ sibi_dst/df_helper/backends/django/_io_dask_alt.py,sha256=HUiThJ2hymh95KBN_I5aWiz8z6STd4C48e1y9_vZd6Y,6829
13
+ sibi_dst/df_helper/backends/http/__init__.py,sha256=d1pfgYxbiYg7E0Iw8RbJ7xfqIfJShqqTBQQGU_S6OOo,105
14
+ sibi_dst/df_helper/backends/http/_http_config.py,sha256=l6GdzTsknfzyf8LAo_TuIWeiswLRRrLcmqAmirxpH8Q,2132
15
+ sibi_dst/df_helper/backends/parquet/__init__.py,sha256=ed2H3DrhV6wWNc5e1YiP5ScZErt3Hp1dfTDCcDObrgo,190
16
+ sibi_dst/df_helper/backends/parquet/_parquet_filter_handler.py,sha256=Q8Ic9PLDGT4L97yqr20mr_NsdEeMMOlFkT7Z12yYCxI,3663
17
+ sibi_dst/df_helper/backends/parquet/_parquet_options.py,sha256=5fAv7KzSRvCpW-6ZiXcvrWAyf1KThs1qCgtrzGo3x8A,4503
18
+ sibi_dst/df_helper/backends/sql_alchemy/__init__.py,sha256=072YxHXqV1wn6xo6clrgvh8kpvlOmboIQW9tyOPHXAY,369
19
+ sibi_dst/df_helper/backends/sql_alchemy/_io_sqlalchemy_dask.py,sha256=UuAHzZWBADsTwGhwZTJzR66Xdh189OR81C1IITwzls0,5620
20
+ sibi_dst/df_helper/backends/sql_alchemy/_sqlachemy_filter_handler.py,sha256=58RCda1Hg_nsuJw-2V36IstsT8O84IQFgsdE7FnqvMk,4655
21
+ sibi_dst/df_helper/backends/sql_alchemy/_sqlalchemy_db_connection.py,sha256=6Jxj3bhGJadHcJOCXtNo0YZUMO6RNgbtXhlghkfOtW8,1648
22
+ sibi_dst/df_helper/backends/sql_alchemy/_sqlalchemy_load_from_db.py,sha256=9z7gUy441T6ww1hpbt3xgxv0jFf8W_zzq0PfkkB126E,2237
23
+ sibi_dst/df_helper/backends/sql_alchemy/_sqlalchemy_model_builder.py,sha256=Bmhh6VvmBfNfBA2JpuEdsYD_193yJ768Si2TvkY9HmU,4405
24
+ sibi_dst/df_helper/backends/sql_model/__init__.py,sha256=sbulOpXqT0O5k1FsSbhr3pPgi5sV_uJ_hPWpoJa_754,226
25
+ sibi_dst/df_helper/backends/sql_model/_sqlmodel_db_connection.py,sha256=ama9oTLBQ2ATUVrX2OGvMT23ia5RblakIgZ7f2O9ZgA,4267
26
+ sibi_dst/df_helper/backends/sql_model/_sqlmodel_load_from_db.py,sha256=5PTnXN_ZFvtdLnov1CjEFQE77YvuoGwG1dj0KQzrY_k,3714
27
+ sibi_dst/df_helper/core/__init__.py,sha256=o4zDwgVmaijde3oix0ezb6KLxI5QFy-SGUhFTDVFLT4,569
28
+ sibi_dst/df_helper/core/_defaults.py,sha256=eNpHD2sZxir-2xO0b3_V16ryw8YP_5FfpIKK0HNuiN4,7011
29
+ sibi_dst/df_helper/core/_filter_handler.py,sha256=1-IdviSYi5Hc28KckO4dkYHDfQ8X9SUb6kwfobm16_E,8580
30
+ sibi_dst/df_helper/core/_params_config.py,sha256=mM1CnF29zls5LXx7rpKY8uix_GyOG5smO4ry_OX31IU,3477
31
+ sibi_dst/df_helper/core/_query_config.py,sha256=Y8LVSyaKuVkrPluRDkQoOwuXHQxner1pFWG3HPfnDHM,441
32
+ sibi_dst/utils/__init__.py,sha256=5WeBMxhNGB8ZpHUrp1NOQf8Kn0bLOtjrerFjQdFTa7U,787
33
+ sibi_dst/utils/_airflow_manager.py,sha256=-d44EKUZNYJyp4wuNwRvilRQktunArPOB5fZuWdQv10,7526
34
+ sibi_dst/utils/_clickhouse_writer.py,sha256=dL5pixjn4cj0Rwpc3POfCcY2D-aQCMbPSECX0dKATyE,8587
35
+ sibi_dst/utils/_credentials.py,sha256=cHJPPsmVyijqbUQIq7WWPe-lIallA-mI5RAy3YUuRME,1724
36
+ sibi_dst/utils/_data_utils.py,sha256=ch4j5FEs8ZnniUzpbeLO-b4Yco_6nwCu71xHaVqMGi4,7050
37
+ sibi_dst/utils/_data_wrapper.py,sha256=BmKFO70xVX3AjpGRzqrc6HS1Uw4xerZDx3IpFrcrIIo,9020
38
+ sibi_dst/utils/_date_utils.py,sha256=CMAZBNwVj7cvERcNiTA8Pf7_5EjV9By9yxkYJpkqz1g,10656
39
+ sibi_dst/utils/_df_utils.py,sha256=NHnEJG9KDeRuqfE60kwBOO21B6Hvjh7PzE5B8cQrIXc,10986
40
+ sibi_dst/utils/_file_utils.py,sha256=JpsybYj3XvVJisSBeVU6YSaZnYRm4_6YWTI3TLnnY4Y,1257
41
+ sibi_dst/utils/_filepath_generator.py,sha256=hjI7gQwfwRToPeuzoUQDayHKQrr4Ivhi4Chl1J4Phlk,6689
42
+ sibi_dst/utils/_log_utils.py,sha256=AvKu5Qmi9LXezA7gdkG7yV-MvzZeav8c3PK8s-DwTGE,2314
43
+ sibi_dst/utils/_parquet_saver.py,sha256=hLrWr1G132y94eLopDPPGQGDsAiR1lQ8id4QQtGYPE4,4349
44
+ sibi_dst/utils/_storage_manager.py,sha256=7nkfeBW_2xlF59pGj7V2aY5TLwpJnPQuPVclqjavJOA,3856
45
+ sibi_dst-0.3.21.dist-info/METADATA,sha256=P0GRxm9kh8V1mSjJgfvBSQplvfTuDnPbZZ71uOzwPJQ,2134
46
+ sibi_dst-0.3.21.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
47
+ sibi_dst-0.3.21.dist-info/RECORD,,
@@ -1,47 +0,0 @@
1
- sibi_dst/__init__.py,sha256=1KaC0LYTHxjpENq-NXI325WcEYZ8GCBrHGkLoFxEcu0,251
2
- sibi_dst/df_helper/__init__.py,sha256=rbTr9CqwbJhu8pbZabwfcOqhm-5hm2iXk0vVBtK01bA,231
3
- sibi_dst/df_helper/_df_helper.py,sha256=e6e32CRTCKjFVvYMytWTuBVpwB1VcnVQ1T4Rg8KXWvY,13917
4
- sibi_dst/df_helper/_parquet_artifact.py,sha256=ctISmwxP9icFCXsELBjbPiz-FK3CEojN7yNIlStdOWw,4974
5
- sibi_dst/df_helper/_parquet_reader.py,sha256=A8qWuWQiaiS7pk4sD5EDAvGs-qz7VfziINXpSA7o00U,1683
6
- sibi_dst/df_helper/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- sibi_dst/df_helper/backends/django/__init__.py,sha256=VkvYql-PUyCKOfoKx5aGdUAki8X-kULfhnCvgSQzHDQ,311
8
- sibi_dst/df_helper/backends/django/_django_db_connection.py,sha256=9fGvXXgqPk_TC7BvaSljxTgNDWoXi_LZxKabEgExznM,1654
9
- sibi_dst/df_helper/backends/django/_django_load_from_db.py,sha256=E_6ptiouluyLziXkNy_MztRi36qqW7-3AvlafL78Sug,5592
10
- sibi_dst/df_helper/backends/django/_django_sql_model_builder.py,sha256=GprCh2c6PFlRBUCir4hh8pmf4Eqb-4OIK6Vz_xXQjMw,14864
11
- sibi_dst/df_helper/backends/django/_io_dask.py,sha256=P3WmkuFzmWRzFchjsVD2OElIR3stuevwDH9G6Mu8IWE,9080
12
- sibi_dst/df_helper/backends/django/_io_dask_alt.py,sha256=zDjLyYxBeL0ffn3yfE_7vqMLMpeEEk2o-zMr66sKkDw,6827
13
- sibi_dst/df_helper/backends/http/__init__.py,sha256=AG9JSDRyVna2r1yxCQ9HcY32EaGnzWsfKgNLgPpSXjY,102
14
- sibi_dst/df_helper/backends/http/_http_config.py,sha256=NN3bol7NgBTDv70yOX7hJkazt1-dAAdFWVkYyHdIXsI,2128
15
- sibi_dst/df_helper/backends/parquet/__init__.py,sha256=ClkyIsIh_ovEwqm0dTrkXImbPjLDTVHW2NQqqfQwWAw,187
16
- sibi_dst/df_helper/backends/parquet/_parquet_filter_handler.py,sha256=6iFvblnVq0qj89QvieQuYxe_2RPX5ArKfq5zBcEIj90,3660
17
- sibi_dst/df_helper/backends/parquet/_parquet_options.py,sha256=09b9yLPREvx6ebs62B9qEqJt1cCKJz97plGW82i4630,4414
18
- sibi_dst/df_helper/backends/sql_alchemy/__init__.py,sha256=FHorj40SbHc0OBzQ_ieG6MG-HLbf0tw6I_5eoIjJkOI,369
19
- sibi_dst/df_helper/backends/sql_alchemy/_io_sqlalchemy_dask.py,sha256=YYhjt5rL1yomcrby4i4bD5wPVDzRJpZZbxHp5CM40tQ,5414
20
- sibi_dst/df_helper/backends/sql_alchemy/_sqlachemy_filter_handler.py,sha256=KShsLJYGVxN0ps9Wot7fF0nR0wW9WzcPIcWZ9f5vdBo,4654
21
- sibi_dst/df_helper/backends/sql_alchemy/_sqlalchemy_db_connection.py,sha256=HtMsfH5com4dLVJxh3wdMUpQI3mz0cKDJz0CmFS2S8U,1648
22
- sibi_dst/df_helper/backends/sql_alchemy/_sqlalchemy_load_from_db.py,sha256=QkR-_S4zqJpwH9dJ5cqXW8iy9XoAFUXmcsgUSm3PbLo,2251
23
- sibi_dst/df_helper/backends/sql_alchemy/_sqlalchemy_model_builder.py,sha256=RjtKEk-i8EmX98rwqkq1Bg7IgPwYDduL967gsl9T73c,4401
24
- sibi_dst/df_helper/backends/sql_model/__init__.py,sha256=MXd4OOdTqR4cENSV733SGodPO6eQMCexANs-3w0qL5U,226
25
- sibi_dst/df_helper/backends/sql_model/_sqlmodel_db_connection.py,sha256=6jmMjKIv5Btysj3kZMaXQ98IqKQkhnOC-JWtb1B8rus,4265
26
- sibi_dst/df_helper/backends/sql_model/_sqlmodel_load_from_db.py,sha256=jYwkIz7_E9Z6Mqw1a9TCWKWD146Tbx7mcQFxIpmKgKU,3686
27
- sibi_dst/df_helper/core/__init__.py,sha256=o4zDwgVmaijde3oix0ezb6KLxI5QFy-SGUhFTDVFLT4,569
28
- sibi_dst/df_helper/core/_defaults.py,sha256=pJU-lX7w4nrt0Anx35j08mVr_0oMGn1bTA_iCl_p1qI,6700
29
- sibi_dst/df_helper/core/_filter_handler.py,sha256=SYZqpX4Vt6GAGR0L0LohlDOdjLLWQXJDiWWqFG-lSu0,8563
30
- sibi_dst/df_helper/core/_params_config.py,sha256=hO-PddoaGjFebqJFgtn76WwVHcCjzPW3z5i3NyK6mDw,3475
31
- sibi_dst/df_helper/core/_query_config.py,sha256=HEiyR_fBJjIMum-PSQroY3KaefQ2SpW1w1SQS8oT-NU,489
32
- sibi_dst/utils/__init__.py,sha256=TV229dPIIEzU5qCLI1G6fnCZW-VirUwSuffp7z7OTFg,783
33
- sibi_dst/utils/_airflow_manager.py,sha256=rlt3eolR5QvtxWhAtBTCpHXvxftnKM-ibPMv3fVwNZk,7524
34
- sibi_dst/utils/_clickhouse_writer.py,sha256=JcnWN2635ATCOaFiB6NYglNXDwqKw0jC7Urs9WOZE20,8571
35
- sibi_dst/utils/_credentials.py,sha256=8i6z7y3y5S-6mSk4xrT2AwhzCA32mTn1n1iYX9IVyHk,1724
36
- sibi_dst/utils/_data_utils.py,sha256=ch4j5FEs8ZnniUzpbeLO-b4Yco_6nwCu71xHaVqMGi4,7050
37
- sibi_dst/utils/_data_wrapper.py,sha256=cvUkGRiPfCyLD4XcoX7FWLYzM8gnHBGR1pJ08PMneCk,9010
38
- sibi_dst/utils/_date_utils.py,sha256=KYB07puKDrSG8tOm_i1HGX0TjLNUtSWjwfsCYBmW9co,10619
39
- sibi_dst/utils/_df_utils.py,sha256=pjEfkof9hggXQgYerG0p4DXrwBeIRynJFg4IX3Yrb4c,10919
40
- sibi_dst/utils/_file_utils.py,sha256=5EN90c8N1n9d-_xwz2RzaYcXRMQY_rws2Q3EA3pNAog,1254
41
- sibi_dst/utils/_filepath_generator.py,sha256=ytPSZ9GYOnnSP25zwA-0NjFHupPRZyXwixWnn_68_n0,6686
42
- sibi_dst/utils/_log_utils.py,sha256=rPp8z1UglwvqzBOOAvMOct0syQZ-54gGYafnJDRYZN4,2313
43
- sibi_dst/utils/_parquet_saver.py,sha256=3BK0XXgMOOAdIw4OzbwMxmDrzDw3_MKi8RTpulIVUe0,4367
44
- sibi_dst/utils/_storage_manager.py,sha256=KP2HBXnLUMMquqcO30ecfuoU7g1z8RtaV3Dv0TvEXoY,3856
45
- sibi_dst-0.3.20.dist-info/METADATA,sha256=Bum7DGRTuXnOPHglo8OKh1jCt0pITgvoyjSfetGXpO4,2134
46
- sibi_dst-0.3.20.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
47
- sibi_dst-0.3.20.dist-info/RECORD,,