sibi-dst 0.3.11__py3-none-any.whl → 0.3.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,6 +42,7 @@ class DfHelper:
42
42
  self.dt_field=kwargs.setdefault("dt_field", None)
43
43
  self.as_pandas = kwargs.setdefault("as_pandas", False)
44
44
  kwargs.setdefault("live", True)
45
+ kwargs.setdefault("logger", self.logger)
45
46
  self.post_init(**kwargs)
46
47
 
47
48
 
@@ -211,6 +212,7 @@ class DfHelper:
211
212
  def save_to_parquet(self, parquet_filename: Optional[str] = None):
212
213
  ps = ParquetSaver(self.df, self.parquet_storage_path, self.logger)
213
214
  ps.save_to_parquet(parquet_filename)
215
+ self.logger.info(f"Parquet saved to {parquet_filename} in parquet storage: {self.parquet_storage_path}.")
214
216
 
215
217
  def save_to_clickhouse(self, database, table, order_by=None, **credentials):
216
218
  click_config ={
@@ -219,13 +221,14 @@ class DfHelper:
219
221
  'order_by': order_by or 'id',
220
222
  }
221
223
  credentials = {**credentials, **click_config}
222
- cs=ClickHouseWriter(**credentials)
224
+ cs=ClickHouseWriter(logger=self.logger, **credentials)
223
225
  cs.save_to_clickhouse(self.df)
226
+ self.logger.info("Save to ClickHouse completed.")
224
227
 
225
228
  def _load_from_parquet(self, **options) -> Union[pd.DataFrame, dd.DataFrame]:
226
229
  self.df = self.plugin_parquet.load_files()
227
230
  if options:
228
- self.df = ParquetFilterHandler().apply_filters_dask(self.df, options)
231
+ self.df = ParquetFilterHandler(logger=self.logger).apply_filters_dask(self.df, options)
229
232
  return self.df
230
233
 
231
234
  def load_period(self, **kwargs):
@@ -13,11 +13,10 @@ class HttpConfig(BaseModel):
13
13
  api_key: Optional[SecretStr] = None
14
14
  model_config = ConfigDict(arbitrary_types_allowed=True)
15
15
 
16
- def __init__(self, **data):
16
+ def __init__(self, logger=None, **data):
17
17
  super().__init__(**data)
18
18
  # Initialize the logger if not provided
19
- if not self.logger:
20
- self.logger = Logger(log_dir='./logs/', logger_name="HttpDataSource", log_file='http_data_source.log')
19
+ self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
21
20
 
22
21
  async def fetch_data(self, **options) -> dd.DataFrame:
23
22
  """Asynchronously fetch JSON data from HTTP endpoint, substituting options into the URL path."""
@@ -1,7 +1,11 @@
1
1
  import pandas as pd
2
2
  import dask.dataframe as dd
3
+ from sibi_dst.utils import Logger
3
4
 
4
5
  class ParquetFilterHandler(object):
6
+ def __init__(self, logger=None):
7
+ self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
8
+
5
9
  @staticmethod
6
10
  def apply_filters_dask(df, filters):
7
11
  dt_operators = ['date', 'time']
@@ -25,6 +25,8 @@ class ParquetConfig(BaseModel):
25
25
  @model_validator(mode='after')
26
26
  def check_parquet_params(self):
27
27
  # Configure paths based on fsspec
28
+ if self.logger is None:
29
+ self.logger = Logger.default_logger(logger_name=self.__class__.__name__)
28
30
  self.fs = fsspec.filesystem("file") if "://" not in str(self.parquet_storage_path) else fsspec.filesystem(str(self.parquet_storage_path).split("://")[0])
29
31
 
30
32
  # Validation for parquet path
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
  from ._credentials import ConfigManager, ConfigLoader
3
3
  from ._log_utils import Logger
4
- from ._date_utils import DateUtils
4
+ from ._date_utils import DateUtils, BusinessDays
5
5
  from ._data_utils import DataUtils
6
6
  from ._file_utils import FileUtils
7
7
  from ._filepath_generator import FilePathGenerator
@@ -17,6 +17,7 @@ __all__=[
17
17
  "ConfigLoader",
18
18
  "Logger",
19
19
  "DateUtils",
20
+ "BusinessDays",
20
21
  "FileUtils",
21
22
  "DataWrapper",
22
23
  "DataUtils",
@@ -7,6 +7,27 @@ class DataUtils:
7
7
  def __init__(self, logger=None):
8
8
  self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
9
9
 
10
+ def transform_numeric_cols(self, df, columns, fill_value=0, dtype=int):
11
+ if not columns:
12
+ self.logger.warning('No columns specified')
13
+
14
+ columns = [column for column in columns if column in df.columns]
15
+ for col in columns:
16
+ if isinstance(df, dd.DataFrame):
17
+ # Replace NaN with 0, then convert to boolean
18
+ df[col] = df[col].map_partitions(
19
+ lambda s: pd.to_numeric(s, errors='coerce') # Convert to numeric, invalid to NaN
20
+ .fillna(fill_value) # Replace NaN with 0
21
+ .astype(dtype),
22
+ meta=(col, dtype)
23
+ )
24
+ else:
25
+ # For Pandas DataFrame, handle mixed types and invalid values
26
+ df[col] = pd.to_numeric(df[col], errors='coerce') # Convert to numeric, invalid to NaN
27
+ df[col] = df[col].fillna(fill_value).astype(dtype)
28
+
29
+ return df
30
+
10
31
  @staticmethod
11
32
  def transform_numeric_columns(df, columns=None, fill_value=0, transform_func=None):
12
33
  """
@@ -14,6 +35,7 @@ class DataUtils:
14
35
 
15
36
  Parameters:
16
37
  - df (pandas.DataFrame or dask.dataframe.DataFrame): The DataFrame.
38
+ - columns (list of str, optional): Specific columns to transform. If None, all numeric columns are transformed.
17
39
  - fill_value (int or float): The value to replace NA values with.
18
40
  - transform_func (callable, optional): The transformation function to apply.
19
41
  If None, no additional transformation is applied.
@@ -28,31 +50,28 @@ class DataUtils:
28
50
  if not columns:
29
51
  return df
30
52
 
53
+ columns = [column for column in columns if column in df.columns]
31
54
  # Default transformation function (identity) if none is provided
32
55
  if transform_func is None:
33
56
  transform_func = lambda x: x
34
57
 
35
- # Apply transformations
36
- for col in columns:
37
- dtype = df[col].dtype
38
- if pd.api.types.is_integer_dtype(dtype):
39
- meta_type = 'int64'
40
- elif pd.api.types.is_float_dtype(dtype):
41
- meta_type = 'float64'
42
- else:
43
- continue # Skip non-numeric columns
58
+ # Batch processing for Dask
59
+ if isinstance(df, dd.DataFrame):
60
+ def transform_partition(partition):
61
+ # Apply transformations for all numeric columns in a single pass
62
+ partition[columns] = partition[columns].fillna(fill_value).map(transform_func)
63
+ return partition
64
+
65
+ # Apply the transformation function to all specified columns
66
+ df = df.map_partitions(transform_partition, meta=df)
67
+ else:
68
+ # Pandas: Vectorized operations for all specified columns
69
+ df[columns] = df[columns].fillna(fill_value).map(transform_func)
44
70
 
45
- df[col] = df[col].fillna(fill_value).astype(meta_type)
46
- if isinstance(df, dd.DataFrame):
47
- df[col] = df[col].map_partitions(
48
- lambda s: s.apply(transform_func), meta=(col, meta_type)
49
- )
50
- else:
51
- df[col] = df[col].apply(transform_func)
52
71
  return df
53
72
 
54
73
  @staticmethod
55
- def transform_boolean_columns(df, columns=None, sample_size=100):
74
+ def transform_boolean_columns(df, columns=None):
56
75
  """
57
76
  Detect if the provided columns in a DataFrame (Pandas or Dask) contain only 0 and 1
58
77
  and convert them to boolean. Detection is performed using a sample.
@@ -96,73 +115,67 @@ class DataUtils:
96
115
  Returns:
97
116
  - pandas.DataFrame or dask.dataframe.DataFrame: Updated DataFrame with merged lookup data.
98
117
  """
99
- # Check if the DataFrame is empty
118
+ # Return early if the DataFrame is empty
100
119
  if self.is_dataframe_empty(df):
101
120
  return df
102
121
 
103
- # Extract required parameters with default values
104
- source_col = kwargs.pop('source_col', None)
105
- lookup_col = kwargs.pop('lookup_col', None)
106
- lookup_description_col = kwargs.pop('lookup_description_col', None)
107
- source_description_alias = kwargs.pop('source_description_alias', None)
108
- fillna_source_description_alias = kwargs.pop('fillna_source_description_alias', False)
109
- fieldnames = kwargs.get('fieldnames', None)
110
- column_names = kwargs.get('column_names', None)
122
+ # Extract and validate required parameters
123
+ required_params = ['source_col', 'lookup_col', 'lookup_description_col', 'source_description_alias']
124
+ missing_params = [param for param in required_params if param not in kwargs]
125
+ if missing_params:
126
+ raise ValueError(f"Missing required parameters: {', '.join(missing_params)}")
111
127
 
112
- # Validate required parameters
113
- if not all([source_col, lookup_col, lookup_description_col, source_description_alias]):
114
- raise ValueError(
115
- 'source_col, lookup_col, lookup_description_col, and source_description_alias must be specified'
116
- )
128
+ source_col = kwargs.pop('source_col')
129
+ lookup_col = kwargs.pop('lookup_col')
130
+ lookup_description_col = kwargs.pop('lookup_description_col')
131
+ source_description_alias = kwargs.pop('source_description_alias')
132
+
133
+ # Optional parameters with default values
134
+ fillna_source_description_alias = kwargs.pop('fillna_source_description_alias', False)
135
+ fieldnames = kwargs.pop('fieldnames', (lookup_col, lookup_description_col))
136
+ column_names = kwargs.pop('column_names', ['temp_join_col', source_description_alias])
117
137
 
118
138
  if source_col not in df.columns:
119
- self.logger.info(f'{source_col} not in DataFrame columns')
139
+ self.logger.info(f"{source_col} not in DataFrame columns")
120
140
  return df
121
141
 
122
142
  # Get unique IDs from source column
123
143
  ids = df[source_col].dropna().unique()
124
144
  if isinstance(ids, dd.Series):
125
145
  ids = ids.compute()
126
- ids = ids.tolist()
127
-
128
- if not ids:
129
- self.logger.info(f'No IDs found in the source column: {source_col}')
146
+ if not len(ids):
147
+ self.logger.info(f"No IDs found in the source column: {source_col}")
130
148
  return df
131
-
132
- # Set default fieldnames and column_names if not provided
133
- if fieldnames is None:
134
- kwargs['fieldnames'] = (lookup_col, lookup_description_col)
135
- if column_names is None:
136
- kwargs['column_names'] = ['temp_join_col', source_description_alias]
137
-
149
+ ids = sorted(ids.tolist())
138
150
  # Prepare kwargs for loading lookup data
139
151
  load_kwargs = kwargs.copy()
140
- load_kwargs[f'{lookup_col}__in'] = ids
141
-
152
+ load_kwargs.update({
153
+ 'fieldnames': fieldnames,
154
+ 'column_names': column_names,
155
+ f'{lookup_col}__in': ids
156
+ })
142
157
  # Load lookup data
143
- lookup_instance = classname()
158
+ lookup_instance = classname(debug=True, verbose_debug=True)
144
159
  result = lookup_instance.load(**load_kwargs)
145
-
160
+ if len(result.index) == 0:
161
+ self.logger.info(f"No IDs found in the source column: {source_col}")
162
+ return df
146
163
  # Determine the join column on the result DataFrame
147
- if 'temp_join_col' in kwargs.get("column_names", []):
148
- temp_join_col = 'temp_join_col'
149
- else:
150
- temp_join_col = lookup_col
164
+ temp_join_col = 'temp_join_col' if 'temp_join_col' in column_names else lookup_col
151
165
 
152
166
  # Merge DataFrames
153
167
  df = df.merge(result, how='left', left_on=source_col, right_on=temp_join_col)
154
168
 
155
169
  if fillna_source_description_alias and source_description_alias in df.columns:
156
- df[source_description_alias] = df[source_description_alias].fillna('')
170
+ df[source_description_alias]=df[source_description_alias].fillna('')
157
171
 
158
172
  # Drop temp_join_col if present
159
- if 'temp_join_col' in df.columns:
160
- df = df.drop(columns='temp_join_col')
173
+ df = df.drop(columns='temp_join_col', errors='ignore')
161
174
 
162
175
  return df
163
176
 
164
- @staticmethod
165
- def is_dataframe_empty(df):
177
+
178
+ def is_dataframe_empty(self, df):
166
179
  """
167
180
  Check if a DataFrame (Pandas or Dask) is empty.
168
181
 
@@ -173,14 +186,30 @@ class DataUtils:
173
186
  - bool: True if the DataFrame is empty, False otherwise.
174
187
  """
175
188
  if isinstance(df, dd.DataFrame):
176
- df_size = df.map_partitions(len).sum().compute()
177
- return df_size == 0
178
- else:
189
+ try:
190
+ return len(df.index) == 0
191
+ except Exception as e:
192
+ self.logger.error(f"Error while processing Dask DataFrame: {e}")
193
+ return False
194
+ elif isinstance(df, pd.DataFrame):
179
195
  return df.empty
196
+ else:
197
+ self.logger.error("Input must be a pandas or dask DataFrame.")
198
+ return False
180
199
 
181
200
  @staticmethod
182
- def convert_to_datetime(df, date_fields):
201
+ def convert_to_datetime_dask(df, date_fields):
202
+ """
203
+ Convert specified columns in a Dask DataFrame to datetime, handling errors gracefully.
204
+
205
+ Parameters:
206
+ - df (dask.dataframe.DataFrame): The Dask DataFrame containing the columns.
207
+ - date_fields (list of str): List of column names to convert to datetime.
208
+
209
+ Returns:
210
+ - dask.dataframe.DataFrame: Updated DataFrame with specified columns converted to datetime.
211
+ """
183
212
  for col in date_fields:
184
213
  if col in df.columns:
185
- df[col] = pd.to_datetime(df[col], errors='coerce')
186
- return df
214
+ df[col] = df[col].map_partitions(pd.to_datetime, errors="coerce", meta=(col, "datetime64[ns]"))
215
+ return df
@@ -164,7 +164,7 @@ class DataWrapper:
164
164
 
165
165
  date_range = self.generate_date_range()
166
166
  if self.show_progress:
167
- date_range = tqdm(date_range, desc="Evaluating update plan", unit="date")
167
+ date_range = tqdm(date_range, desc=f"Evaluating update plan {self.__class__.__name__}", unit="date")
168
168
 
169
169
  for current_date in date_range:
170
170
  folder = f'{self.data_path}{current_date.year}/{current_date.month:02d}/{current_date.day:02d}/'
@@ -207,325 +207,7 @@ class DataWrapper:
207
207
  return update_plan_table
208
208
 
209
209
 
210
- # import datetime
211
- # from typing import Type, Any, Dict, Optional
212
- # import fsspec
213
- # import pandas as pd
214
- # from IPython.display import display
215
- #
216
- # from sibi_dst.utils import Logger
217
- # from tqdm import tqdm
218
- # from sibi_dst.utils import ParquetSaver
219
- #
220
- # class DataWrapper:
221
- # DEFAULT_MAX_AGE_MINUTES = 1440
222
- # DEFAULT_HISTORY_DAYS_THRESHOLD = 30
223
- #
224
- # def __init__(self,
225
- # dataclass: Type,
226
- # date_field: str,
227
- # data_path: str,
228
- # parquet_filename: str,
229
- # start_date: Any,
230
- # end_date: Any,
231
- # filesystem_type: str = "file",
232
- # filesystem_options: Optional[Dict] = None,
233
- # verbose: bool = False,
234
- # class_params: Optional[Dict] = None,
235
- # load_params: Optional[Dict] = None,
236
- # reverse_order: bool = False,
237
- # overwrite: bool = False,
238
- # ignore_missing: bool = False,
239
- # logger: Optional[Logger] = None,
240
- # max_age_minutes: int = DEFAULT_MAX_AGE_MINUTES,
241
- # history_days_threshold: int = DEFAULT_HISTORY_DAYS_THRESHOLD,
242
- # show_progress: bool = False):
243
- # self.dataclass = dataclass
244
- # self.date_field = date_field
245
- # self.data_path = self.ensure_forward_slash(data_path)
246
- # self.parquet_filename = parquet_filename
247
- # self.filesystem_type = filesystem_type
248
- # self.filesystem_options = filesystem_options or {}
249
- # self.fs = fsspec.filesystem(filesystem_type, **self.filesystem_options)
250
- # self.verbose = verbose
251
- # self.class_params = class_params or {}
252
- # self.load_params = load_params or {}
253
- # self.reverse_order = reverse_order
254
- # self.overwrite = overwrite
255
- # self.ignore_missing = ignore_missing
256
- # self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
257
- # self.max_age_minutes = max_age_minutes
258
- # self.history_days_threshold = history_days_threshold
259
- # self.show_progress = show_progress
260
- #
261
- # self.start_date = self.convert_to_date(start_date)
262
- # self.end_date = self.convert_to_date(end_date)
263
- #
264
- #
265
- # def convert_to_date(self, date: Any) -> datetime.date:
266
- # try:
267
- # return datetime.datetime.strptime(date, '%Y-%m-%d').date() if isinstance(date, str) else date
268
- # except ValueError as e:
269
- # self.logger.error(f"Error converting {date} to datetime: {e}")
270
- # raise
271
- #
272
- # @staticmethod
273
- # def ensure_forward_slash(path: str) -> str:
274
- # return path if path.endswith('/') else path + '/'
275
- #
276
- # def generate_date_range(self):
277
- # step = -1 if self.reverse_order else 1
278
- # start, end = (self.end_date, self.start_date) if self.reverse_order else (self.start_date, self.end_date)
279
- # current_date = start
280
- # while current_date != end + datetime.timedelta(days=step):
281
- # yield current_date
282
- # current_date += datetime.timedelta(days=step)
283
- #
284
- # def process(self):
285
- # """Execute the update plan following the specified hierarchy."""
286
- # update_plan, update_plan_table = self.generate_update_plan_with_conditions()
287
- #
288
- # # Display the update plan table to the user
289
- #
290
- # display(update_plan_table)
291
- #
292
- # # Process files according to the hierarchy, considering only `update_required` dates
293
- # for category, description in [
294
- # ("overwrite", "Processing files due to overwrite=True"),
295
- # ("history_days", "Processing files within history_days_threshold"),
296
- # ("missing_files", "Processing missing files")
297
- # ]:
298
- # # Filter dates in the category where `update_required` is True
299
- # dates_to_process = update_plan_table[
300
- # (update_plan_table["update_category"] == category) & (update_plan_table["update_required"])
301
- # ]["date"].tolist()
302
- #
303
- # for current_date in tqdm(dates_to_process, desc=description, unit="date"):
304
- # self.process_date(current_date)
305
- #
306
- # def is_file_older_than(self, file_path: str, current_date: datetime.date) -> bool:
307
- # """
308
- # Check if a file is older than the specified max_age_minutes.
309
- # """
310
- # if not self.fs.exists(file_path):
311
- # return True # Treat missing files as old
312
- #
313
- # # Get the file modification time
314
- # file_modification_time = self.fs.info(file_path)['mtime']
315
- # file_modification_datetime = datetime.datetime.fromtimestamp(file_modification_time, tz=datetime.timezone.utc)
316
- #
317
- # # Get the current UTC time as a timezone-aware object
318
- # current_time = datetime.datetime.now(datetime.timezone.utc)
319
- #
320
- # # Calculate file age in seconds and minutes
321
- # file_age_seconds = (current_time - file_modification_datetime).total_seconds()
322
- # file_age_minutes = file_age_seconds / 60
323
- #
324
- # if self.verbose:
325
- # self.logger.info(
326
- # f"File {file_path} is {round(file_age_minutes, 2)} minutes old (threshold: {self.max_age_minutes} minutes)")
327
- #
328
- # # Check if the file date is within the history threshold
329
- # history_start_date = datetime.date.today() - datetime.timedelta(days=self.history_days_threshold)
330
- # within_history_threshold = current_date >= history_start_date
331
- #
332
- # # File is considered old if it exceeds max_age_minutes and is within the history threshold
333
- # return file_age_minutes > self.max_age_minutes and within_history_threshold
334
- #
335
- # def process_date(self, date: datetime.date):
336
- # """Process a specific date by regenerating data as necessary."""
337
- # folder = f'{self.data_path}{date.year}/{date.month:02d}/{date.day:02d}/'
338
- # full_parquet_filename = f"{folder}{self.parquet_filename}"
339
- #
340
- # start_time = datetime.datetime.now()
341
- #
342
- # if self.verbose:
343
- # self.logger.info(f"Processing {full_parquet_filename}...")
344
- #
345
- # data_object = self.dataclass(**self.class_params)
346
- # #date_filter_params = {
347
- # # f'{self.date_field}__year': date.year,
348
- # # f'{self.date_field}__month': date.month,
349
- # # f'{self.date_field}__day': date.day
350
- # #}
351
- # df=data_object.load_period(dt_field=self.date_field, start=date, end=date)
352
- # #df = data_object.load(**self.load_params, **date_filter_params)
353
- #
354
- # if len(df.index) == 0:
355
- # if self.verbose:
356
- # self.logger.info("No data found for the specified date.")
357
- # return
358
- #
359
- # parquet_saver = ParquetSaver(df, folder, self.logger)
360
- # parquet_saver.save_to_parquet(self.parquet_filename, clear_existing=True)
361
- #
362
- # end_time = datetime.datetime.now()
363
- # duration_seconds = (end_time - start_time).total_seconds()
364
- #
365
- # if self.verbose:
366
- # self.logger.info(f"Data saved to {full_parquet_filename}. Processing time: {duration_seconds:.2f} seconds")
367
- #
368
- #
369
- # def remove_empty_directories(self, path: str):
370
- # if not self.fs.isdir(path) or self.fs.abspath(path) == self.fs.abspath(self.data_path):
371
- # return
372
- #
373
- # if not self.fs.ls(path): # Check if directory is empty
374
- # try:
375
- # self.fs.rmdir(path)
376
- # if self.verbose:
377
- # self.logger.info(f"Removed empty directory: {path}")
378
- # self.remove_empty_directories(self.fs.path.dirname(path))
379
- # except Exception as e:
380
- # if self.verbose:
381
- # self.logger.error(f"Error removing directory {path}: {e}")
382
- # else:
383
- # if self.verbose:
384
- # self.logger.info(f"Directory not empty, stopping: {path}")
385
- #
386
- # def generate_update_plan_with_conditions(self):
387
- # """
388
- # Generate an update plan that evaluates files based on the specified hierarchy:
389
- # 1. Overwrite (all files regenerated).
390
- # 2. History threshold: Files within `history_days_threshold` are evaluated for `max_age_minutes`.
391
- # 3. Missing files: Detect missing files, ignoring future dates.
392
- # """
393
- # update_plan = {
394
- # "overwrite": [],
395
- # "history_days": [],
396
- # "missing_files": []
397
- # }
398
- # rows = []
399
- #
400
- # today = datetime.date.today()
401
- # history_start_date = today - datetime.timedelta(
402
- # days=self.history_days_threshold) if self.history_days_threshold else None
403
- #
404
- # for current_date in tqdm(self.generate_date_range(), desc="Evaluating update plan", unit="date"):
405
- # folder = f'{self.data_path}{current_date.year}/{current_date.month:02d}/{current_date.day:02d}/'
406
- # full_parquet_filename = f"{folder}{self.parquet_filename}"
407
- #
408
- # file_exists = self.fs.exists(full_parquet_filename)
409
- # file_age_minutes = None # Initialize file_age_minutes as None
410
- # file_is_old = False
411
- # within_history = False
412
- # missing_file = not file_exists and not self.ignore_missing
413
- # category = None
414
- #
415
- # if file_exists:
416
- # # Calculate file age in minutes
417
- # file_modification_time = self.fs.info(full_parquet_filename)['mtime']
418
- # file_modification_datetime = datetime.datetime.fromtimestamp(file_modification_time,
419
- # tz=datetime.timezone.utc)
420
- # current_time = datetime.datetime.now(datetime.timezone.utc)
421
- # file_age_minutes = (current_time - file_modification_datetime).total_seconds() / 60
422
- #
423
- # # Determine if the file is old
424
- # file_is_old = file_age_minutes > self.max_age_minutes
425
- #
426
- # # Determine if the file is within the history threshold
427
- # if self.history_days_threshold and history_start_date and history_start_date <= current_date <= today:
428
- # within_history = True
429
- #
430
- # # Hierarchy 1: Overwrite (all files are marked for regeneration)
431
- # if self.overwrite:
432
- # category = "overwrite"
433
- #
434
- # # Hierarchy 2: History threshold evaluation
435
- # elif within_history and (missing_file or file_is_old):
436
- # category = "history_days"
437
- #
438
- # # Hierarchy 3: Detect missing files, ignoring future dates
439
- # elif missing_file and current_date <= today:
440
- # category = "missing_files"
441
- #
442
- # # Append to update plan
443
- # if category:
444
- # update_plan[category].append(current_date)
445
- #
446
- # # Collect condition descriptions for the update plan table
447
- # rows.append({
448
- # "date": current_date,
449
- # "file_exists": file_exists,
450
- # "file_age_minutes": file_age_minutes, # Add file age to the table
451
- # "file_is_old": file_is_old,
452
- # "within_history": within_history,
453
- # "missing_file": missing_file,
454
- # "update_required": category is not None, # Mark as true if a category is assigned
455
- # "update_category": category
456
- # })
457
- #
458
- # # Sort dates in descending order if reverse_order is True
459
- # if self.reverse_order:
460
- # for key in update_plan:
461
- # update_plan[key].sort(reverse=True)
462
- #
463
- # update_plan_table = pd.DataFrame(rows)
464
- # return update_plan, update_plan_table
465
- # # def generate_update_plan_with_conditions(self):
466
- # # """
467
- # # Generate an update plan that evaluates files based on the specified hierarchy:
468
- # # 1. Overwrite (all files regenerated).
469
- # # 2. History threshold: Files within `history_days_threshold` are evaluated for `max_age_minutes`.
470
- # # 3. Missing files: Detect missing files, ignoring future dates.
471
- # # """
472
- # # update_plan = {
473
- # # "overwrite": [],
474
- # # "history_days": [],
475
- # # "missing_files": []
476
- # # }
477
- # # rows = []
478
- # #
479
- # # today = datetime.date.today()
480
- # # history_start_date = today - datetime.timedelta(days=self.history_days_threshold) if self.history_days_threshold else None
481
- # #
482
- # # for current_date in tqdm(self.generate_date_range(), desc="Evaluating update plan", unit="date"):
483
- # # folder = f'{self.data_path}{current_date.year}/{current_date.month:02d}/{current_date.day:02d}/'
484
- # # full_parquet_filename = f"{folder}{self.parquet_filename}"
485
- # #
486
- # # file_exists = self.fs.exists(full_parquet_filename)
487
- # # file_is_old = file_exists and self.is_file_older_than(full_parquet_filename, current_date)
488
- # # within_history = False
489
- # # missing_file = not file_exists and not self.ignore_missing
490
- # # category = None
491
- # #
492
- # # # Hierarchy 1: Overwrite (all files are marked for regeneration)
493
- # # if self.overwrite:
494
- # # category = "overwrite"
495
- # #
496
- # # # Hierarchy 2: History threshold evaluation
497
- # # elif self.history_days_threshold and history_start_date and history_start_date <= current_date <= today:
498
- # # within_history = True
499
- # # if missing_file or self.is_file_older_than(full_parquet_filename, current_date):
500
- # # category = "history_days"
501
- # #
502
- # # # Hierarchy 3: Detect missing files, ignoring future dates
503
- # # elif missing_file and current_date <= today:
504
- # # category = "missing_files"
505
- # #
506
- # # # Append to update plan
507
- # # if category:
508
- # # update_plan[category].append(current_date)
509
- # #
510
- # # # Collect condition descriptions for the update plan table
511
- # # rows.append({
512
- # # "date": current_date,
513
- # # "file_exists": file_exists,
514
- # # "file_is_old": file_is_old,
515
- # # "within_history": within_history,
516
- # # "missing_file": missing_file,
517
- # # "update_required": category is not None,
518
- # # "update_category": category
519
- # # })
520
- # #
521
- # # # Sort dates in descending order if reverse_order is True
522
- # # if self.reverse_order:
523
- # # for key in update_plan:
524
- # # update_plan[key].sort(reverse=True)
525
- # #
526
- # # update_plan_table = pd.DataFrame(rows)
527
- # # return update_plan, update_plan_table
528
- #
210
+
529
211
  # # Usage:
530
212
  # # wrapper = DataWrapper(
531
213
  # # dataclass=YourDataClass,
@@ -1,5 +1,7 @@
1
1
  import datetime
2
2
  from typing import Union, Tuple, Callable, Dict, Any
3
+
4
+ import numpy as np
3
5
  import pandas as pd
4
6
  from sibi_dst.utils import Logger
5
7
 
@@ -118,6 +120,134 @@ class DateUtils:
118
120
  'ytd': lambda: (datetime.date(today().year, 1, 1), today()),
119
121
  }
120
122
 
123
+ class BusinessDays:
124
+ def __init__(self, holiday_list, logger):
125
+ """
126
+ Initialize a BusinessDays object with a given holiday list.
127
+ """
128
+ self.logger = logger
129
+ self.HOLIDAY_LIST = holiday_list
130
+ bd_holidays = [day for year in self.HOLIDAY_LIST for day in self.HOLIDAY_LIST[year]]
131
+ self.bd_cal = np.busdaycalendar(holidays=bd_holidays, weekmask="1111100")
132
+ self.holidays = self.bd_cal.holidays
133
+ self.week_mask = self.bd_cal.weekmask
134
+
135
+ def get_business_days_count(self, begin_date, end_date):
136
+ """
137
+ Calculate the number of business days between two dates.
138
+ """
139
+ try:
140
+ begin_date = pd.to_datetime(begin_date)
141
+ end_date = pd.to_datetime(end_date)
142
+ except Exception as e:
143
+ raise ValueError(f"Invalid date format: {e}")
144
+
145
+ years = [str(year) for year in range(begin_date.year, end_date.year + 1)]
146
+ if not all(year in self.HOLIDAY_LIST for year in years):
147
+ raise ValueError("Not all years in date range are in the holiday list")
148
+
149
+ return np.busday_count(
150
+ begin_date.strftime("%Y-%m-%d"),
151
+ end_date.strftime("%Y-%m-%d"),
152
+ busdaycal=self.bd_cal,
153
+ )
154
+
155
+ def calc_business_days_from_df(self, df, begin_date_col, end_date_col, result_col="business_days"):
156
+ """
157
+ Add a column to a Dask DataFrame with the number of business days between two date columns.
158
+ """
159
+ if not all(col in df.columns for col in [begin_date_col, end_date_col]):
160
+ self.logger.error("Column names not found in DataFrame")
161
+ raise ValueError("Required columns are missing")
162
+
163
+ # Extract holidays and weekmask to recreate the busdaycalendar
164
+ holidays = self.bd_cal.holidays
165
+ weekmask = self.bd_cal.weekmask
166
+
167
+ # Define a function to calculate business days
168
+ def calculate_business_days(row, holidays, weekmask):
169
+ begin_date = pd.to_datetime(row[begin_date_col])
170
+ end_date = pd.to_datetime(row[end_date_col])
171
+ busdaycal = np.busdaycalendar(holidays=holidays, weekmask=weekmask)
172
+ return np.busday_count(
173
+ begin_date.strftime("%Y-%m-%d"),
174
+ end_date.strftime("%Y-%m-%d"),
175
+ busdaycal=busdaycal,
176
+ )
177
+
178
+ # Define a wrapper function for partition-wise operations
179
+ def apply_business_days(partition, holidays, weekmask):
180
+ return partition.apply(
181
+ calculate_business_days, axis=1, holidays=holidays, weekmask=weekmask
182
+ )
183
+
184
+ # Apply the function using map_partitions
185
+ df[result_col] = df.map_partitions(
186
+ apply_business_days,
187
+ holidays,
188
+ weekmask,
189
+ meta=(result_col, "int64"),
190
+ )
191
+
192
+ return df
193
+
194
+ def add_business_days(self, start_date, n_days):
195
+ """
196
+ Add n_days business days to start_date.
197
+ """
198
+ try:
199
+ start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d")
200
+ except ValueError:
201
+ raise ValueError("Date should be a string in the format YYYY-MM-DD")
202
+
203
+ if str(start_date.year) not in self.HOLIDAY_LIST:
204
+ self.logger.warning(f"Year {start_date.year} is not in the holiday list")
205
+
206
+ return np.busday_offset(
207
+ start_date.strftime("%Y-%m-%d"),
208
+ n_days,
209
+ roll="forward",
210
+ busdaycal=self.bd_cal,
211
+ )
212
+
213
+ def calc_sla_end_date(self, df, start_date_col, n_days_col, result_col="sla_end_date"):
214
+ """
215
+ Add a column to a Dask DataFrame with SLA end dates based on start date and SLA days.
216
+ """
217
+ if not all(col in df.columns for col in [start_date_col, n_days_col]):
218
+ raise ValueError("Column names not found in DataFrame")
219
+
220
+ # Extract holidays and weekmask to recreate the busdaycalendar
221
+ holidays = self.bd_cal.holidays
222
+ weekmask = self.bd_cal.weekmask
223
+
224
+ # Define a function to calculate SLA end dates
225
+ def calculate_sla_end_date(row, holidays, weekmask):
226
+ start_date = pd.to_datetime(row[start_date_col])
227
+ n_days = row[n_days_col]
228
+ busdaycal = np.busdaycalendar(holidays=holidays, weekmask=weekmask)
229
+ return np.busday_offset(
230
+ start_date.strftime("%Y-%m-%d"),
231
+ n_days,
232
+ roll="forward",
233
+ busdaycal=busdaycal,
234
+ )
235
+
236
+ # Define a wrapper for partition-wise operation
237
+ def apply_sla_end_date(partition, holidays, weekmask):
238
+ return partition.apply(
239
+ calculate_sla_end_date, axis=1, holidays=holidays, weekmask=weekmask
240
+ )
241
+
242
+ # Apply the function using map_partitions
243
+ df[result_col] = df.map_partitions(
244
+ apply_sla_end_date,
245
+ holidays,
246
+ weekmask,
247
+ meta=(result_col, "object"),
248
+ )
249
+
250
+ return df
121
251
  # Class enhancements
122
252
  # DateUtils.register_period('next_week', lambda: (datetime.date.today() + datetime.timedelta(days=7),
123
253
  # datetime.date.today() + datetime.timedelta(days=13)))
@@ -12,6 +12,97 @@ class DfUtils:
12
12
  """
13
13
  self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
14
14
 
15
+ def align_and_merge_by_type(self, df_left, df_right, type_mapping, how='left'):
16
+ """
17
+ Align column data types in two DataFrames based on a type mapping dictionary and perform the merge.
18
+
19
+ Parameters:
20
+ - df_left (pd.DataFrame or dd.DataFrame): Left DataFrame
21
+ - df_right (pd.DataFrame or dd.DataFrame): Right DataFrame
22
+ - type_mapping (dict): Dictionary mapping target dtypes to column pairs.
23
+ Example: {
24
+ 'integer': [('customer_id', 'temp1'), ('product_type_id', 'temp2')],
25
+ 'string': [('group2', 'temp4')]
26
+ }
27
+
28
+ Returns:
29
+ - Merged DataFrame
30
+ """
31
+ # Map string keys to actual dtypes
32
+ dtype_map = {
33
+ 'integer': 'int64',
34
+ 'float': 'float64',
35
+ 'string': 'string',
36
+ 'datetime': 'datetime64[ns]',
37
+ 'boolean': 'bool',
38
+ }
39
+
40
+ # Iterate over each dtype and align the column pairs
41
+ for target_type, column_pairs in type_mapping.items():
42
+ if target_type not in dtype_map:
43
+ self.logger.error(f"Unsupported type: {target_type}")
44
+
45
+ for left_col, right_col in column_pairs:
46
+ # Align dtypes in left and right DataFrames
47
+ if left_col in df_left.columns and right_col in df_right.columns:
48
+ df_left[left_col] = df_left[left_col].astype(dtype_map[target_type])
49
+ df_right[right_col] = df_right[right_col].astype(dtype_map[target_type])
50
+
51
+ # Flatten all column pairs for the merge operation
52
+ all_pairs = [pair for pairs in type_mapping.values() for pair in pairs]
53
+
54
+ # Perform the merge
55
+ return df_left.merge(
56
+ df_right,
57
+ how=how,
58
+ left_on=[pair[0] for pair in all_pairs],
59
+ right_on=[pair[1] for pair in all_pairs]
60
+ )
61
+
62
+ def exclude_from_dataframe(self, df, conditions):
63
+ """
64
+ Generic function to filter rows from a DataFrame (Pandas or Dask).
65
+
66
+ Parameters:
67
+ - df (pandas.DataFrame or dask.dataframe.DataFrame): The DataFrame to filter.
68
+ - conditions (list of tuples): List of conditions to apply for filtering.
69
+ Each condition is a tuple: (column_name, operator, value).
70
+
71
+ Returns:
72
+ - pandas.DataFrame or dask.dataframe.DataFrame: Filtered DataFrame.
73
+ """
74
+ import operator
75
+
76
+ # Mapping string operators to actual Python operators
77
+ ops = {
78
+ "==": operator.eq,
79
+ "!=": operator.ne,
80
+ "<": operator.lt,
81
+ "<=": operator.le,
82
+ ">": operator.gt,
83
+ ">=": operator.ge,
84
+ }
85
+ # Ensure all specified columns exist in the DataFrame
86
+ missing_columns = [col for col, _, _ in conditions if col not in df.columns]
87
+ if missing_columns:
88
+ self.logger.info(f"The following columns are missing in the DataFrame: {', '.join(missing_columns)}")
89
+ return df
90
+
91
+ # Build the combined filtering condition
92
+ combined_condition = None
93
+ for col, op, value in conditions:
94
+ if op not in ops:
95
+ raise ValueError(f"Unsupported operator: {op}")
96
+
97
+ # Get the individual condition
98
+ condition = ops[op](df[col], value)
99
+
100
+ # Combine the condition with AND (&)
101
+ combined_condition = condition if combined_condition is None else (combined_condition & condition)
102
+
103
+ # Apply the filtering and return the DataFrame
104
+ return df[~combined_condition]
105
+
15
106
  def load_grouped_activity(self, df, group_by_expr, group_expr='count', debug=False):
16
107
  """
17
108
  Groups the DataFrame by the specified expression and computes the size.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sibi-dst
3
- Version: 0.3.11
3
+ Version: 0.3.12
4
4
  Summary: Data Science Toolkit
5
5
  Author: Luis Valverde
6
6
  Author-email: lvalverdeb@gmail.com
@@ -9,6 +9,8 @@ Classifier: Programming Language :: Python :: 3
9
9
  Classifier: Programming Language :: Python :: 3.11
10
10
  Classifier: Programming Language :: Python :: 3.12
11
11
  Requires-Dist: apache-airflow-client (>=2.10.0,<3.0.0)
12
+ Requires-Dist: chardet (>=5.2.0,<6.0.0)
13
+ Requires-Dist: charset-normalizer (>=3.4.0,<4.0.0)
12
14
  Requires-Dist: clickhouse-connect (>=0.8.7,<0.9.0)
13
15
  Requires-Dist: clickhouse-driver (>=0.2.9,<0.3.0)
14
16
  Requires-Dist: dask[complete] (>=2024.11.1,<2025.0.0)
@@ -29,6 +31,7 @@ Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
29
31
  Requires-Dist: sqlmodel (>=0.0.22,<0.0.23)
30
32
  Requires-Dist: tornado (>=6.4.1,<7.0.0)
31
33
  Requires-Dist: tqdm (>=4.67.0,<5.0.0)
34
+ Requires-Dist: uvicorn (>=0.32.1,<0.33.0)
32
35
  Description-Content-Type: text/markdown
33
36
 
34
37
  # sibi-dst
@@ -1,6 +1,6 @@
1
1
  sibi_dst/__init__.py,sha256=1KaC0LYTHxjpENq-NXI325WcEYZ8GCBrHGkLoFxEcu0,251
2
2
  sibi_dst/df_helper/__init__.py,sha256=JXJBY47G6wOYhzNI646OBl3pSGWIy4282-3qPGYHU7w,167
3
- sibi_dst/df_helper/_df_helper.py,sha256=e-ptCEDYt5dx8byNiA0ca8Eejl1DG1V5pioZUzabEnY,12747
3
+ sibi_dst/df_helper/_df_helper.py,sha256=43-eY9mDU-j-QFeAtdMjIb3KuC2_hYzLjVi177_EKAo,13006
4
4
  sibi_dst/df_helper/_parquet_artifact.py,sha256=f5oHwXtsNW6-ONSFsRB0AniVefA0THzP92J-nugp9vo,4973
5
5
  sibi_dst/df_helper/core/__init__.py,sha256=NSYY_evzq6XEkO06Nz6xLH5KznzRGI44cLbrnN3zHXQ,503
6
6
  sibi_dst/df_helper/core/_defaults.py,sha256=pJU-lX7w4nrt0Anx35j08mVr_0oMGn1bTA_iCl_p1qI,6700
@@ -14,10 +14,10 @@ sibi_dst/df_helper/plugins/django/_django_sql_model_builder.py,sha256=GprCh2c6PF
14
14
  sibi_dst/df_helper/plugins/django/_io_dask.py,sha256=jryDojeA62rB3seRaWWMjsAmekKacK5xctwCQGVklPQ,9063
15
15
  sibi_dst/df_helper/plugins/django/_io_dask_alt.py,sha256=zDjLyYxBeL0ffn3yfE_7vqMLMpeEEk2o-zMr66sKkDw,6827
16
16
  sibi_dst/df_helper/plugins/http/__init__.py,sha256=AG9JSDRyVna2r1yxCQ9HcY32EaGnzWsfKgNLgPpSXjY,102
17
- sibi_dst/df_helper/plugins/http/_http_config.py,sha256=TaoI0F5S-Gf9jiWJp3ngQZTw2jlks-_WNDzKX1Wybtc,2165
17
+ sibi_dst/df_helper/plugins/http/_http_config.py,sha256=WH0d4vsxfZRhWrWI4iTVAnhsdY3421SBr9kXYZVfeYQ,2126
18
18
  sibi_dst/df_helper/plugins/parquet/__init__.py,sha256=ClkyIsIh_ovEwqm0dTrkXImbPjLDTVHW2NQqqfQwWAw,187
19
- sibi_dst/df_helper/plugins/parquet/_parquet_filter_handler.py,sha256=45mHID1azAg5PmaYWbuRlghoRd3H2aTLj1XcycfLJo0,3497
20
- sibi_dst/df_helper/plugins/parquet/_parquet_options.py,sha256=cKyRj0UCby9-iYPPFnlel1H03x8MnAoEv8k1tp7kHXw,4277
19
+ sibi_dst/df_helper/plugins/parquet/_parquet_filter_handler.py,sha256=6iFvblnVq0qj89QvieQuYxe_2RPX5ArKfq5zBcEIj90,3660
20
+ sibi_dst/df_helper/plugins/parquet/_parquet_options.py,sha256=suJC7LfNEWAo-7_R62YTMSRku3k8orysft83VxRUems,4394
21
21
  sibi_dst/df_helper/plugins/sql_alchemy/__init__.py,sha256=FHorj40SbHc0OBzQ_ieG6MG-HLbf0tw6I_5eoIjJkOI,369
22
22
  sibi_dst/df_helper/plugins/sql_alchemy/_io_sqlalchemy_dask.py,sha256=6IjQEREXqTAzSJE95FKfXjRkTlEjRMS4hJ_yMpyKDTg,5223
23
23
  sibi_dst/df_helper/plugins/sql_alchemy/_sqlachemy_filter_handler.py,sha256=H8ypUjLKzYYl9BerfJjX_Uv9qBVkBR-wZiQlh3uRQXg,4669
@@ -27,19 +27,19 @@ sibi_dst/df_helper/plugins/sql_alchemy/_sqlalchemy_model_builder.py,sha256=vrTTe
27
27
  sibi_dst/df_helper/plugins/sql_model/__init__.py,sha256=MXd4OOdTqR4cENSV733SGodPO6eQMCexANs-3w0qL5U,226
28
28
  sibi_dst/df_helper/plugins/sql_model/_sqlmodel_db_connection.py,sha256=6jmMjKIv5Btysj3kZMaXQ98IqKQkhnOC-JWtb1B8rus,4265
29
29
  sibi_dst/df_helper/plugins/sql_model/_sqlmodel_load_from_db.py,sha256=bLD4tEcGDKkJCfSO4b13_89tzVJcpz55I6uw9D4ERnE,3751
30
- sibi_dst/utils/__init__.py,sha256=jiXJSnmsaGZTRhUThtIo6cssWXBWXNij8ffYmv77QK4,797
30
+ sibi_dst/utils/__init__.py,sha256=nkX7tASNn57kw998YdqQQGY8qXv2J4LC4-g0GoQSiic,831
31
31
  sibi_dst/utils/_airflow_manager.py,sha256=rlt3eolR5QvtxWhAtBTCpHXvxftnKM-ibPMv3fVwNZk,7524
32
32
  sibi_dst/utils/_clickhouse_writer.py,sha256=mdgszbyVluhGvDmvsHY4XDTZrp42L3xtdmiyn3z2bYM,8534
33
33
  sibi_dst/utils/_credentials.py,sha256=8i6z7y3y5S-6mSk4xrT2AwhzCA32mTn1n1iYX9IVyHk,1724
34
- sibi_dst/utils/_data_utils.py,sha256=3hBMg852ANpS5bOtlU-F4H-Q91WIGga5LrKWWyDvnAA,7354
35
- sibi_dst/utils/_data_wrapper.py,sha256=pZnylBFTvsLGfYGv2tTyQHzyb6IbIahfaXR-PxHdivk,24099
36
- sibi_dst/utils/_date_utils.py,sha256=6HCrcTiuYLNsbgrNB3eAVAAgXbfx7Ce1qNc3OJla9nM,5621
37
- sibi_dst/utils/_df_utils.py,sha256=o2bK5-xMGKqIG4i9xfavYRxIkiHLA0nz5TQTN78998k,7350
34
+ sibi_dst/utils/_data_utils.py,sha256=BvmjMNSkICy671BmjW68RhvDMfN5uAXwhffSV-wEwmk,9185
35
+ sibi_dst/utils/_data_wrapper.py,sha256=SmNv1UoZLq7ovRVy4wipsWLMidKJXcRTp4HtxmaCQdk,9399
36
+ sibi_dst/utils/_date_utils.py,sha256=KYB07puKDrSG8tOm_i1HGX0TjLNUtSWjwfsCYBmW9co,10619
37
+ sibi_dst/utils/_df_utils.py,sha256=9_dNYoZ9_ofU0t_sxMdsXALWCuh02gvqUrei-6Lhr6w,10910
38
38
  sibi_dst/utils/_file_utils.py,sha256=5EN90c8N1n9d-_xwz2RzaYcXRMQY_rws2Q3EA3pNAog,1254
39
39
  sibi_dst/utils/_filepath_generator.py,sha256=ytPSZ9GYOnnSP25zwA-0NjFHupPRZyXwixWnn_68_n0,6686
40
40
  sibi_dst/utils/_log_utils.py,sha256=AAenyubYUjk77WqiaNkjgkxws3dnAMIdaGl2Ryz_cA4,2245
41
41
  sibi_dst/utils/_parquet_saver.py,sha256=-A0o_vucyYe7wlwiby_0_yS-ZfT2GHwImyQHrCIBNwk,9051
42
42
  sibi_dst/utils/_storage_manager.py,sha256=KP2HBXnLUMMquqcO30ecfuoU7g1z8RtaV3Dv0TvEXoY,3856
43
- sibi_dst-0.3.11.dist-info/METADATA,sha256=gwl565etE5wLVGk0rqQ7umOyBRtEXpQ_IdCXyEkv2s8,1897
44
- sibi_dst-0.3.11.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
45
- sibi_dst-0.3.11.dist-info/RECORD,,
43
+ sibi_dst-0.3.12.dist-info/METADATA,sha256=5mezOBAiUV2pMgNsVqI7iCZZgmxeZpLuYWDYUAZCTVk,2030
44
+ sibi_dst-0.3.12.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
45
+ sibi_dst-0.3.12.dist-info/RECORD,,