dragon-ml-toolbox 12.9.0__py3-none-any.whl → 12.9.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-12.9.0.dist-info → dragon_ml_toolbox-12.9.2.dist-info}/METADATA +1 -1
- {dragon_ml_toolbox-12.9.0.dist-info → dragon_ml_toolbox-12.9.2.dist-info}/RECORD +7 -7
- ml_tools/data_exploration.py +13 -6
- {dragon_ml_toolbox-12.9.0.dist-info → dragon_ml_toolbox-12.9.2.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-12.9.0.dist-info → dragon_ml_toolbox-12.9.2.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-12.9.0.dist-info → dragon_ml_toolbox-12.9.2.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-12.9.0.dist-info → dragon_ml_toolbox-12.9.2.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
dragon_ml_toolbox-12.9.
|
|
2
|
-
dragon_ml_toolbox-12.9.
|
|
1
|
+
dragon_ml_toolbox-12.9.2.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
|
|
2
|
+
dragon_ml_toolbox-12.9.2.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
|
|
3
3
|
ml_tools/ETL_cleaning.py,sha256=2VBRllV8F-ZiPylPp8Az2gwn5ztgazN0BH5OKnRUhV0,20402
|
|
4
4
|
ml_tools/ETL_engineering.py,sha256=KfYqgsxupAx6e_TxwO1LZXeu5mFkIhVXJrNjP3CzIZc,54927
|
|
5
5
|
ml_tools/GUI_tools.py,sha256=Va6ig-dHULPVRwQYYtH3fvY5XPIoqRcJpRW8oXC55Hw,45413
|
|
@@ -24,7 +24,7 @@ ml_tools/_logger.py,sha256=dlp5cGbzooK9YSNSZYB4yjZrOaQUGW8PTrM411AOvL8,4717
|
|
|
24
24
|
ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
|
|
25
25
|
ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
|
|
26
26
|
ml_tools/custom_logger.py,sha256=xot-VeZFigKjcVxADgzvI54vZO_MqMMejo7JmDED8Xo,5892
|
|
27
|
-
ml_tools/data_exploration.py,sha256=
|
|
27
|
+
ml_tools/data_exploration.py,sha256=haddQFsXAWzuf84NLItcZ4Q7vzN3YWjFoh7lPlWUczo,50679
|
|
28
28
|
ml_tools/ensemble_evaluation.py,sha256=FGHSe8LBI8_w8LjNeJWOcYQ1UK_mc6fVah8gmSvNVGg,26853
|
|
29
29
|
ml_tools/ensemble_inference.py,sha256=0yLmLNj45RVVoSCLH1ZYJG9IoAhTkWUqEZmLOQTFGTY,9348
|
|
30
30
|
ml_tools/ensemble_learning.py,sha256=vsIED7nlheYI4w2SBzP6SC1AnNeMfn-2A1Gqw5EfxsM,21964
|
|
@@ -35,7 +35,7 @@ ml_tools/optimization_tools.py,sha256=P074YCuZzkqkONnAsM-Zb9DTX_i8cRkkJLpwAWz6CR
|
|
|
35
35
|
ml_tools/path_manager.py,sha256=CyDU16pOKmC82jPubqJPT6EBt-u-3rGVbxyPIZCvDDY,18432
|
|
36
36
|
ml_tools/serde.py,sha256=UIshIesHRFmxr8F6B3LxGG8bYc1HHK-nlE3kENSZL18,5288
|
|
37
37
|
ml_tools/utilities.py,sha256=OcAyV1tEcYAfOWlGjRgopsjDLxU3DcI5EynzvWV4q3A,15754
|
|
38
|
-
dragon_ml_toolbox-12.9.
|
|
39
|
-
dragon_ml_toolbox-12.9.
|
|
40
|
-
dragon_ml_toolbox-12.9.
|
|
41
|
-
dragon_ml_toolbox-12.9.
|
|
38
|
+
dragon_ml_toolbox-12.9.2.dist-info/METADATA,sha256=vwKDioQfPVheuLmZasMsZGFynib5C8FMc52Tn1Ql7k0,6166
|
|
39
|
+
dragon_ml_toolbox-12.9.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
40
|
+
dragon_ml_toolbox-12.9.2.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
|
|
41
|
+
dragon_ml_toolbox-12.9.2.dist-info/RECORD,,
|
ml_tools/data_exploration.py
CHANGED
|
@@ -364,7 +364,6 @@ def encode_categorical_features(
|
|
|
364
364
|
- If True, encodes Null values as a distinct category 'null_label' with a value of 0. Other categories start from 1.
|
|
365
365
|
- If False, Nulls are ignored and categories start from 0.
|
|
366
366
|
|
|
367
|
-
|
|
368
367
|
null_label (str): Category to encode Nulls to if `encode_nulls` is True. If a name collision with `null_label` occurs, the fallback key will be "__NULL__".
|
|
369
368
|
split_resulting_dataset (bool):
|
|
370
369
|
- If True, returns two separate DataFrames, one with non-categorical columns and one with the encoded columns.
|
|
@@ -1025,6 +1024,7 @@ def reconstruct_one_hot(
|
|
|
1025
1024
|
df: pd.DataFrame,
|
|
1026
1025
|
features_to_reconstruct: List[Union[str, Tuple[str, Optional[str]]]],
|
|
1027
1026
|
separator: str = '_',
|
|
1027
|
+
baseline_category_name: Optional[str] = "Other",
|
|
1028
1028
|
drop_original: bool = True,
|
|
1029
1029
|
verbose: bool = True
|
|
1030
1030
|
) -> pd.DataFrame:
|
|
@@ -1042,19 +1042,22 @@ def reconstruct_one_hot(
|
|
|
1042
1042
|
A list defining the features to reconstruct. This list can contain:
|
|
1043
1043
|
|
|
1044
1044
|
- A string: (e.g., "Color")
|
|
1045
|
-
This reconstructs the feature 'Color' and assumes all-zero rows represent
|
|
1045
|
+
This reconstructs the feature 'Color' and assumes all-zero rows represent the baseline category ("Other" by default).
|
|
1046
1046
|
- A tuple: (e.g., ("Pet", "Dog"))
|
|
1047
|
-
This reconstructs 'Pet' and maps all-zero rows to the baseline category "Dog"
|
|
1047
|
+
This reconstructs 'Pet' and maps all-zero rows to the baseline category "Dog".
|
|
1048
1048
|
- A tuple with None: (e.g., ("Size", None))
|
|
1049
|
-
This
|
|
1049
|
+
This reconstructs 'Size' and maps all-zero rows to the NaN value.
|
|
1050
1050
|
Example:
|
|
1051
1051
|
[
|
|
1052
|
-
"Mood", # All-zeros ->
|
|
1052
|
+
"Mood", # All-zeros -> "Other"
|
|
1053
1053
|
("Color", "Red"), # All-zeros -> "Red"
|
|
1054
|
+
("Size", None) # All-zeros -> NaN
|
|
1054
1055
|
]
|
|
1055
1056
|
separator (str):
|
|
1056
1057
|
The character separating the base name from the categorical value in
|
|
1057
1058
|
the column names (e.g., '_' in 'B_a').
|
|
1059
|
+
baseline_category_name (str | None):
|
|
1060
|
+
The baseline category name to use by default if it is not explicitly provided.
|
|
1058
1061
|
drop_original (bool):
|
|
1059
1062
|
If True, the original one-hot encoded columns will be dropped from
|
|
1060
1063
|
the returned DataFrame.
|
|
@@ -1077,6 +1080,10 @@ def reconstruct_one_hot(
|
|
|
1077
1080
|
if not isinstance(df, pd.DataFrame):
|
|
1078
1081
|
_LOGGER.error("Input must be a pandas DataFrame.")
|
|
1079
1082
|
raise TypeError()
|
|
1083
|
+
|
|
1084
|
+
if not (baseline_category_name is None or isinstance(baseline_category_name, str)):
|
|
1085
|
+
_LOGGER.error("The baseline_category must be None or a string.")
|
|
1086
|
+
raise TypeError()
|
|
1080
1087
|
|
|
1081
1088
|
new_df = df.copy()
|
|
1082
1089
|
all_ohe_cols_to_drop = []
|
|
@@ -1090,7 +1097,7 @@ def reconstruct_one_hot(
|
|
|
1090
1097
|
if isinstance(item, str):
|
|
1091
1098
|
# Case 1: "Color"
|
|
1092
1099
|
base_name = item
|
|
1093
|
-
baseline_val =
|
|
1100
|
+
baseline_val = baseline_category_name
|
|
1094
1101
|
elif isinstance(item, tuple) and len(item) == 2:
|
|
1095
1102
|
# Case 2: ("Pet", "dog") or ("Size", None)
|
|
1096
1103
|
base_name, baseline_val = item
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|