dragon-ml-toolbox 12.1.0__tar.gz → 12.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (46) hide show
  1. {dragon_ml_toolbox-12.1.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-12.2.0}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/data_exploration.py +107 -3
  4. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/pyproject.toml +1 -1
  5. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/LICENSE +0 -0
  6. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/LICENSE-THIRD-PARTY.md +0 -0
  7. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/README.md +0 -0
  8. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  9. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  10. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  11. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  12. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ETL_cleaning.py +0 -0
  13. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ETL_engineering.py +0 -0
  14. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/GUI_tools.py +0 -0
  15. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/MICE_imputation.py +0 -0
  16. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_callbacks.py +0 -0
  17. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_datasetmaster.py +0 -0
  18. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_evaluation.py +0 -0
  19. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_evaluation_multi.py +0 -0
  20. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_inference.py +0 -0
  21. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_models.py +0 -0
  22. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_optimization.py +0 -0
  23. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_scaler.py +0 -0
  24. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_simple_optimization.py +0 -0
  25. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_trainer.py +0 -0
  26. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_utilities.py +0 -0
  27. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/PSO_optimization.py +0 -0
  28. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/RNN_forecast.py +0 -0
  29. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/SQL.py +0 -0
  30. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/VIF_factor.py +0 -0
  31. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/__init__.py +0 -0
  32. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/_logger.py +0 -0
  33. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/_script_info.py +0 -0
  34. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/constants.py +0 -0
  35. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/custom_logger.py +0 -0
  36. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ensemble_evaluation.py +0 -0
  37. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ensemble_inference.py +0 -0
  38. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ensemble_learning.py +0 -0
  39. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/handle_excel.py +0 -0
  40. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/keys.py +0 -0
  41. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/math_utilities.py +0 -0
  42. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/optimization_tools.py +0 -0
  43. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/path_manager.py +0 -0
  44. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/serde.py +0 -0
  45. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/utilities.py +0 -0
  46. {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 12.1.0
3
+ Version: 12.2.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 12.1.0
3
+ Version: 12.2.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -3,7 +3,7 @@ from pandas.api.types import is_numeric_dtype
3
3
  import numpy as np
4
4
  import matplotlib.pyplot as plt
5
5
  import seaborn as sns
6
- from typing import Union, Literal, Dict, Tuple, List, Optional
6
+ from typing import Union, Literal, Dict, Tuple, List, Optional, Any
7
7
  from pathlib import Path
8
8
  import re
9
9
 
@@ -33,7 +33,8 @@ __all__ = [
33
33
  "match_and_filter_columns_by_regex",
34
34
  "standardize_percentages",
35
35
  "create_transformer_categorical_map",
36
- "reconstruct_one_hot"
36
+ "reconstruct_one_hot",
37
+ "reconstruct_binary"
37
38
  ]
38
39
 
39
40
 
@@ -1081,7 +1082,110 @@ def reconstruct_one_hot(
1081
1082
  unique_cols_to_drop = list(set(all_ohe_cols_to_drop))
1082
1083
  new_df.drop(columns=unique_cols_to_drop, inplace=True)
1083
1084
  _LOGGER.info(f"Dropped {len(unique_cols_to_drop)} original one-hot encoded columns.")
1084
-
1085
+
1086
+ _LOGGER.info(f"Successfully reconstructed {reconstructed_count} feature(s).")
1087
+
1088
+ return new_df
1089
+
1090
+
1091
+ def reconstruct_binary(
1092
+ df: pd.DataFrame,
1093
+ reconstruction_map: Dict[str, Tuple[str, Any, Any]],
1094
+ drop_original: bool = True,
1095
+ verbose: bool = True
1096
+ ) -> pd.DataFrame:
1097
+ """
1098
+ Reconstructs new categorical columns from existing binary (0/1) columns.
1099
+
1100
+ Used to reverse a binary encoding by mapping 0 and 1 back to
1101
+ descriptive categorical labels.
1102
+
1103
+ Args:
1104
+ df (pd.DataFrame):
1105
+ The input DataFrame.
1106
+ reconstruction_map (Dict[str, Tuple[str, Any, Any]]):
1107
+ A dictionary defining the reconstructions.
1108
+ Format:
1109
+ { "new_col_name": ("source_col_name", "label_for_0", "label_for_1") }
1110
+ Example:
1111
+ {
1112
+ "Sex": ("Sex_male", "Female", "Male"),
1113
+ "Smoker": ("Is_Smoker", "No", "Yes")
1114
+ }
1115
+ drop_original (bool):
1116
+ If True, the original binary source columns (e.g., "Sex_male")
1117
+ will be dropped from the returned DataFrame.
1118
+ verbose (bool):
1119
+ If True, prints the details of each reconstruction.
1120
+
1121
+ Returns:
1122
+ pd.DataFrame:
1123
+ A new DataFrame with the reconstructed categorical columns.
1124
+
1125
+ Raises:
1126
+ TypeError: If `df` is not a pandas DataFrame.
1127
+ ValueError: If `reconstruction_map` is not a dictionary or a
1128
+ configuration is invalid (e.g., column name collision).
1129
+
1130
+ Notes:
1131
+ - The function operates on a copy of the DataFrame.
1132
+ - Rows with `NaN` in the source column will have `NaN` in the
1133
+ new column.
1134
+ - Values in the source column other than 0 or 1 (e.g., 2) will
1135
+ result in `NaN` in the new column.
1136
+ """
1137
+ if not isinstance(df, pd.DataFrame):
1138
+ _LOGGER.error("Input must be a pandas DataFrame.")
1139
+ raise TypeError()
1140
+
1141
+ if not isinstance(reconstruction_map, dict):
1142
+ _LOGGER.error("`reconstruction_map` must be a dictionary with the required format.")
1143
+ raise ValueError()
1144
+
1145
+ new_df = df.copy()
1146
+ source_cols_to_drop: List[str] = []
1147
+ reconstructed_count = 0
1148
+
1149
+ _LOGGER.info(f"Attempting to reconstruct {len(reconstruction_map)} binary feature(s).")
1150
+
1151
+ for new_col_name, config in reconstruction_map.items():
1152
+
1153
+ # --- 1. Validation ---
1154
+ if not (isinstance(config, tuple) and len(config) == 3):
1155
+ _LOGGER.error(f"Config for '{new_col_name}' is invalid. Must be a 3-item tuple. Skipping.")
1156
+ raise ValueError()
1157
+
1158
+ source_col, label_for_0, label_for_1 = config
1159
+
1160
+ if source_col not in new_df.columns:
1161
+ _LOGGER.error(f"Source column '{source_col}' for new column '{new_col_name}' not found. Skipping.")
1162
+ raise ValueError()
1163
+
1164
+ if new_col_name in new_df.columns and verbose:
1165
+ _LOGGER.warning(f"New column '{new_col_name}' already exists and will be overwritten.")
1166
+
1167
+ if new_col_name == source_col:
1168
+ _LOGGER.error(f"New column name '{new_col_name}' cannot be the same as source column '{source_col}'.")
1169
+ raise ValueError()
1170
+
1171
+ # --- 2. Reconstruction ---
1172
+ # .map() handles 0, 1, preserves NaNs, and converts any other value to NaN.
1173
+ mapping_dict = {0: label_for_0, 1: label_for_1}
1174
+ new_df[new_col_name] = new_df[source_col].map(mapping_dict)
1175
+
1176
+ # --- 3. Logging/Tracking ---
1177
+ source_cols_to_drop.append(source_col)
1178
+ reconstructed_count += 1
1179
+ if verbose:
1180
+ print(f" - Reconstructed '{new_col_name}' from '{source_col}' (0='{label_for_0}', 1='{label_for_1}').")
1181
+
1182
+ # --- 4. Cleanup ---
1183
+ if drop_original and source_cols_to_drop:
1184
+ # Use set() to avoid duplicates if the same source col was used
1185
+ unique_cols_to_drop = list(set(source_cols_to_drop))
1186
+ new_df.drop(columns=unique_cols_to_drop, inplace=True)
1187
+ _LOGGER.info(f"Dropped {len(unique_cols_to_drop)} original binary source column(s).")
1188
+
1085
1189
  _LOGGER.info(f"Successfully reconstructed {reconstructed_count} feature(s).")
1086
1190
 
1087
1191
  return new_df
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "12.1.0"
3
+ version = "12.2.0"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl L. Loza Vidaurre", email = "luigiloza@gmail.com" }