dragon-ml-toolbox 8.0.0__tar.gz → 8.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (41) hide show
  1. {dragon_ml_toolbox-8.0.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-8.1.0}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/data_exploration.py +80 -2
  4. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/pyproject.toml +1 -1
  5. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/LICENSE +0 -0
  6. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/LICENSE-THIRD-PARTY.md +0 -0
  7. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/README.md +0 -0
  8. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  9. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  10. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  11. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  12. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/ETL_engineering.py +0 -0
  13. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/GUI_tools.py +0 -0
  14. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/MICE_imputation.py +0 -0
  15. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/ML_callbacks.py +0 -0
  16. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/ML_datasetmaster.py +0 -0
  17. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/ML_evaluation.py +0 -0
  18. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/ML_evaluation_multi.py +0 -0
  19. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/ML_inference.py +0 -0
  20. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/ML_models.py +0 -0
  21. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/ML_optimization.py +0 -0
  22. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/ML_scaler.py +0 -0
  23. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/ML_trainer.py +0 -0
  24. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/PSO_optimization.py +0 -0
  25. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/RNN_forecast.py +0 -0
  26. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/SQL.py +0 -0
  27. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/VIF_factor.py +0 -0
  28. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/_ML_optimization_multi.py +0 -0
  29. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/__init__.py +0 -0
  30. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/_logger.py +0 -0
  31. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/_script_info.py +0 -0
  32. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/custom_logger.py +0 -0
  33. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/ensemble_evaluation.py +0 -0
  34. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/ensemble_inference.py +0 -0
  35. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/ensemble_learning.py +0 -0
  36. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/handle_excel.py +0 -0
  37. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/keys.py +0 -0
  38. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/optimization_tools.py +0 -0
  39. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/path_manager.py +0 -0
  40. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/ml_tools/utilities.py +0 -0
  41. {dragon_ml_toolbox-8.0.0 → dragon_ml_toolbox-8.1.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 8.0.0
3
+ Version: 8.1.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 8.0.0
3
+ Version: 8.1.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -5,10 +5,12 @@ import matplotlib.pyplot as plt
5
5
  import seaborn as sns
6
6
  from typing import Union, Literal, Dict, Tuple, List, Optional
7
7
  from pathlib import Path
8
+ import re
9
+
8
10
  from .path_manager import sanitize_filename, make_fullpath
9
11
  from ._script_info import _script_info
10
12
  from ._logger import _LOGGER
11
- import re
13
+ from .utilities import save_dataframe
12
14
 
13
15
 
14
16
  # Keep track of all available tools, show using `info()`
@@ -18,6 +20,7 @@ __all__ = [
18
20
  "drop_rows_with_missing_data",
19
21
  "show_null_columns",
20
22
  "drop_columns_with_missing_data",
23
+ "drop_macro",
21
24
  "split_features_targets",
22
25
  "split_continuous_binary",
23
26
  "plot_correlation_heatmap",
@@ -155,7 +158,7 @@ def drop_rows_with_missing_data(df: pd.DataFrame, targets: Optional[list[str]],
155
158
 
156
159
  def show_null_columns(df: pd.DataFrame, round_digits: int = 2):
157
160
  """
158
- Displays a table of columns with missing values, showing both the count and
161
+ Returns a table of columns with missing values, showing both the count and
159
162
  percentage of missing entries per column.
160
163
 
161
164
  Parameters:
@@ -221,6 +224,81 @@ def drop_columns_with_missing_data(df: pd.DataFrame, threshold: float = 0.7, sho
221
224
  return df
222
225
 
223
226
 
227
+ def drop_macro(df: pd.DataFrame,
228
+ log_directory: Union[str,Path],
229
+ targets: list[str],
230
+ skip_targets: bool=False,
231
+ threshold: float=0.7) -> pd.DataFrame:
232
+ """
233
+ Iteratively removes rows and columns with excessive missing data.
234
+
235
+ This function performs a comprehensive cleaning cycle on a DataFrame. It
236
+ repeatedly drops columns with constant values, followed by rows and columns that exceed
237
+ a specified threshold of missing values. The process continues until the
238
+ DataFrame's dimensions stabilize, ensuring that the interdependency between
239
+ row and column deletions is handled.
240
+
241
+ Initial and final missing data reports are saved to the specified log directory.
242
+
243
+ Args:
244
+ df (pd.DataFrame): The input pandas DataFrame to be cleaned.
245
+ log_directory (Union[str, Path]): Path to the directory where the
246
+ 'Missing_Data_start.csv' and 'Missing_Data_final.csv' logs
247
+ will be saved.
248
+ targets (list[str]): A list of column names to be treated as target
249
+ variables. This list guides the row-dropping logic.
250
+ skip_targets (bool, optional): If True, the columns listed in `targets`
251
+ will be exempt from being dropped, even if they exceed the missing
252
+ data threshold.
253
+ threshold (float, optional): The proportion of missing data required to drop
254
+ a row or column. For example, 0.7 means a row/column will be
255
+ dropped if 70% or more of its data is missing.
256
+
257
+ Returns:
258
+ pd.DataFrame: A new, cleaned DataFrame with offending rows and columns removed.
259
+ """
260
+ # make a deep copy to work with
261
+ df_clean = df.copy()
262
+
263
+ # Log initial state
264
+ missing_data = show_null_columns(df=df_clean)
265
+ save_dataframe(df=missing_data.reset_index(drop=False),
266
+ save_dir=log_directory,
267
+ filename="Missing_Data_start")
268
+
269
+ # Clean cycles for rows and columns
270
+ master = True
271
+ while master:
272
+ # track rows and columns
273
+ initial_rows, initial_columns = df_clean.shape
274
+
275
+ # drop constant columns
276
+ df_clean = drop_constant_columns(df=df_clean)
277
+
278
+ # clean rows
279
+ df_clean = drop_rows_with_missing_data(df=df_clean, targets=targets, threshold=threshold)
280
+
281
+ # clean columns
282
+ if skip_targets:
283
+ df_clean = drop_columns_with_missing_data(df=df_clean, threshold=threshold, show_nulls_after=False, skip_columns=targets)
284
+ else:
285
+ df_clean = drop_columns_with_missing_data(df=df_clean, threshold=threshold, show_nulls_after=False)
286
+
287
+ # cleaned?
288
+ remaining_rows, remaining_columns = df_clean.shape
289
+ if remaining_rows >= initial_rows and remaining_columns >= initial_columns:
290
+ master = False
291
+
292
+ # log final state
293
+ missing_data = show_null_columns(df=df_clean)
294
+ save_dataframe(df=missing_data.reset_index(drop=False),
295
+ save_dir=log_directory,
296
+ filename="Missing_Data_final")
297
+
298
+ # return cleaned dataframe
299
+ return df_clean
300
+
301
+
224
302
  def split_features_targets(df: pd.DataFrame, targets: list[str]):
225
303
  """
226
304
  Splits a DataFrame's columns into features and targets.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "8.0.0"
3
+ version = "8.1.0"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl Loza", email = "luigiloza@gmail.com" }