dragon-ml-toolbox 12.1.0__tar.gz → 12.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-12.1.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-12.2.0}/PKG-INFO +1 -1
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/data_exploration.py +107 -3
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/pyproject.toml +1 -1
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/LICENSE +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/README.md +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ETL_cleaning.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ETL_engineering.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/GUI_tools.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/MICE_imputation.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_callbacks.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_datasetmaster.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_evaluation.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_evaluation_multi.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_inference.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_models.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_optimization.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_scaler.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_simple_optimization.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_trainer.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ML_utilities.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/PSO_optimization.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/RNN_forecast.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/SQL.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/VIF_factor.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/__init__.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/_logger.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/_script_info.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/constants.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/custom_logger.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ensemble_evaluation.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ensemble_inference.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/ensemble_learning.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/handle_excel.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/keys.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/math_utilities.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/optimization_tools.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/path_manager.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/serde.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/ml_tools/utilities.py +0 -0
- {dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/setup.cfg +0 -0
|
@@ -3,7 +3,7 @@ from pandas.api.types import is_numeric_dtype
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import matplotlib.pyplot as plt
|
|
5
5
|
import seaborn as sns
|
|
6
|
-
from typing import Union, Literal, Dict, Tuple, List, Optional
|
|
6
|
+
from typing import Union, Literal, Dict, Tuple, List, Optional, Any
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
import re
|
|
9
9
|
|
|
@@ -33,7 +33,8 @@ __all__ = [
|
|
|
33
33
|
"match_and_filter_columns_by_regex",
|
|
34
34
|
"standardize_percentages",
|
|
35
35
|
"create_transformer_categorical_map",
|
|
36
|
-
"reconstruct_one_hot"
|
|
36
|
+
"reconstruct_one_hot",
|
|
37
|
+
"reconstruct_binary"
|
|
37
38
|
]
|
|
38
39
|
|
|
39
40
|
|
|
@@ -1081,7 +1082,110 @@ def reconstruct_one_hot(
|
|
|
1081
1082
|
unique_cols_to_drop = list(set(all_ohe_cols_to_drop))
|
|
1082
1083
|
new_df.drop(columns=unique_cols_to_drop, inplace=True)
|
|
1083
1084
|
_LOGGER.info(f"Dropped {len(unique_cols_to_drop)} original one-hot encoded columns.")
|
|
1084
|
-
|
|
1085
|
+
|
|
1086
|
+
_LOGGER.info(f"Successfully reconstructed {reconstructed_count} feature(s).")
|
|
1087
|
+
|
|
1088
|
+
return new_df
|
|
1089
|
+
|
|
1090
|
+
|
|
1091
|
+
def reconstruct_binary(
|
|
1092
|
+
df: pd.DataFrame,
|
|
1093
|
+
reconstruction_map: Dict[str, Tuple[str, Any, Any]],
|
|
1094
|
+
drop_original: bool = True,
|
|
1095
|
+
verbose: bool = True
|
|
1096
|
+
) -> pd.DataFrame:
|
|
1097
|
+
"""
|
|
1098
|
+
Reconstructs new categorical columns from existing binary (0/1) columns.
|
|
1099
|
+
|
|
1100
|
+
Used to reverse a binary encoding by mapping 0 and 1 back to
|
|
1101
|
+
descriptive categorical labels.
|
|
1102
|
+
|
|
1103
|
+
Args:
|
|
1104
|
+
df (pd.DataFrame):
|
|
1105
|
+
The input DataFrame.
|
|
1106
|
+
reconstruction_map (Dict[str, Tuple[str, Any, Any]]):
|
|
1107
|
+
A dictionary defining the reconstructions.
|
|
1108
|
+
Format:
|
|
1109
|
+
{ "new_col_name": ("source_col_name", "label_for_0", "label_for_1") }
|
|
1110
|
+
Example:
|
|
1111
|
+
{
|
|
1112
|
+
"Sex": ("Sex_male", "Female", "Male"),
|
|
1113
|
+
"Smoker": ("Is_Smoker", "No", "Yes")
|
|
1114
|
+
}
|
|
1115
|
+
drop_original (bool):
|
|
1116
|
+
If True, the original binary source columns (e.g., "Sex_male")
|
|
1117
|
+
will be dropped from the returned DataFrame.
|
|
1118
|
+
verbose (bool):
|
|
1119
|
+
If True, prints the details of each reconstruction.
|
|
1120
|
+
|
|
1121
|
+
Returns:
|
|
1122
|
+
pd.DataFrame:
|
|
1123
|
+
A new DataFrame with the reconstructed categorical columns.
|
|
1124
|
+
|
|
1125
|
+
Raises:
|
|
1126
|
+
TypeError: If `df` is not a pandas DataFrame.
|
|
1127
|
+
ValueError: If `reconstruction_map` is not a dictionary or a
|
|
1128
|
+
configuration is invalid (e.g., column name collision).
|
|
1129
|
+
|
|
1130
|
+
Notes:
|
|
1131
|
+
- The function operates on a copy of the DataFrame.
|
|
1132
|
+
- Rows with `NaN` in the source column will have `NaN` in the
|
|
1133
|
+
new column.
|
|
1134
|
+
- Values in the source column other than 0 or 1 (e.g., 2) will
|
|
1135
|
+
result in `NaN` in the new column.
|
|
1136
|
+
"""
|
|
1137
|
+
if not isinstance(df, pd.DataFrame):
|
|
1138
|
+
_LOGGER.error("Input must be a pandas DataFrame.")
|
|
1139
|
+
raise TypeError()
|
|
1140
|
+
|
|
1141
|
+
if not isinstance(reconstruction_map, dict):
|
|
1142
|
+
_LOGGER.error("`reconstruction_map` must be a dictionary with the required format.")
|
|
1143
|
+
raise ValueError()
|
|
1144
|
+
|
|
1145
|
+
new_df = df.copy()
|
|
1146
|
+
source_cols_to_drop: List[str] = []
|
|
1147
|
+
reconstructed_count = 0
|
|
1148
|
+
|
|
1149
|
+
_LOGGER.info(f"Attempting to reconstruct {len(reconstruction_map)} binary feature(s).")
|
|
1150
|
+
|
|
1151
|
+
for new_col_name, config in reconstruction_map.items():
|
|
1152
|
+
|
|
1153
|
+
# --- 1. Validation ---
|
|
1154
|
+
if not (isinstance(config, tuple) and len(config) == 3):
|
|
1155
|
+
_LOGGER.error(f"Config for '{new_col_name}' is invalid. Must be a 3-item tuple. Skipping.")
|
|
1156
|
+
raise ValueError()
|
|
1157
|
+
|
|
1158
|
+
source_col, label_for_0, label_for_1 = config
|
|
1159
|
+
|
|
1160
|
+
if source_col not in new_df.columns:
|
|
1161
|
+
_LOGGER.error(f"Source column '{source_col}' for new column '{new_col_name}' not found. Skipping.")
|
|
1162
|
+
raise ValueError()
|
|
1163
|
+
|
|
1164
|
+
if new_col_name in new_df.columns and verbose:
|
|
1165
|
+
_LOGGER.warning(f"New column '{new_col_name}' already exists and will be overwritten.")
|
|
1166
|
+
|
|
1167
|
+
if new_col_name == source_col:
|
|
1168
|
+
_LOGGER.error(f"New column name '{new_col_name}' cannot be the same as source column '{source_col}'.")
|
|
1169
|
+
raise ValueError()
|
|
1170
|
+
|
|
1171
|
+
# --- 2. Reconstruction ---
|
|
1172
|
+
# .map() handles 0, 1, preserves NaNs, and converts any other value to NaN.
|
|
1173
|
+
mapping_dict = {0: label_for_0, 1: label_for_1}
|
|
1174
|
+
new_df[new_col_name] = new_df[source_col].map(mapping_dict)
|
|
1175
|
+
|
|
1176
|
+
# --- 3. Logging/Tracking ---
|
|
1177
|
+
source_cols_to_drop.append(source_col)
|
|
1178
|
+
reconstructed_count += 1
|
|
1179
|
+
if verbose:
|
|
1180
|
+
print(f" - Reconstructed '{new_col_name}' from '{source_col}' (0='{label_for_0}', 1='{label_for_1}').")
|
|
1181
|
+
|
|
1182
|
+
# --- 4. Cleanup ---
|
|
1183
|
+
if drop_original and source_cols_to_drop:
|
|
1184
|
+
# Use set() to avoid duplicates if the same source col was used
|
|
1185
|
+
unique_cols_to_drop = list(set(source_cols_to_drop))
|
|
1186
|
+
new_df.drop(columns=unique_cols_to_drop, inplace=True)
|
|
1187
|
+
_LOGGER.info(f"Dropped {len(unique_cols_to_drop)} original binary source column(s).")
|
|
1188
|
+
|
|
1085
1189
|
_LOGGER.info(f"Successfully reconstructed {reconstructed_count} feature(s).")
|
|
1086
1190
|
|
|
1087
1191
|
return new_df
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/dragon_ml_toolbox.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/dragon_ml_toolbox.egg-info/requires.txt
RENAMED
|
File without changes
|
{dragon_ml_toolbox-12.1.0 → dragon_ml_toolbox-12.2.0}/dragon_ml_toolbox.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|