dragon-ml-toolbox 20.7.0__tar.gz → 20.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-20.7.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-20.8.0}/PKG-INFO +3 -1
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/README.md +2 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0/dragon_ml_toolbox.egg-info}/PKG-INFO +3 -1
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +4 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_configuration/_metrics.py +17 -10
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_evaluation/_classification.py +79 -7
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/data_exploration/__init__.py +5 -1
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/data_exploration/_analysis.py +149 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/data_exploration/_features.py +76 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/keys/_keys.py +1 -0
- dragon_ml_toolbox-20.8.0/ml_tools/resampling/__init__.py +19 -0
- dragon_ml_toolbox-20.8.0/ml_tools/resampling/_base_resampler.py +49 -0
- dragon_ml_toolbox-20.8.0/ml_tools/resampling/_multi_resampling.py +184 -0
- dragon_ml_toolbox-20.8.0/ml_tools/resampling/_single_resampling.py +113 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/utilities/_translate.py +10 -9
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/pyproject.toml +1 -1
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/LICENSE +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ETL_cleaning/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ETL_cleaning/_basic_clean.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ETL_cleaning/_clean_tools.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ETL_cleaning/_dragon_cleaner.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ETL_engineering/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ETL_engineering/_dragon_engineering.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ETL_engineering/_transforms.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/GUI_tools/_GUI_tools.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/GUI_tools/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/IO_tools/_IO_loggers.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/IO_tools/_IO_save_load.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/IO_tools/_IO_utils.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/IO_tools/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/MICE/_MICE_imputation.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/MICE/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/MICE/_dragon_mice.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_callbacks/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_callbacks/_base.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_callbacks/_checkpoint.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_callbacks/_early_stop.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_callbacks/_scheduler.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_chain/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_chain/_chaining_tools.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_chain/_dragon_chain.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_chain/_update_schema.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_configuration/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_configuration/_base_model_config.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_configuration/_finalize.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_configuration/_models.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_configuration/_training.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_datasetmaster/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_datasetmaster/_base_datasetmaster.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_datasetmaster/_datasetmaster.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_datasetmaster/_sequence_datasetmaster.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_datasetmaster/_vision_datasetmaster.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_evaluation/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_evaluation/_feature_importance.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_evaluation/_loss.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_evaluation/_regression.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_evaluation/_sequence.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_evaluation/_vision.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_evaluation_captum/_ML_evaluation_captum.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_evaluation_captum/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_finalize_handler/_ML_finalize_handler.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_finalize_handler/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_inference/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_inference/_base_inference.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_inference/_chain_inference.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_inference/_dragon_inference.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_inference/_multi_inference.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_inference_sequence/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_inference_sequence/_sequence_inference.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_inference_vision/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_inference_vision/_vision_inference.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models/_base_mlp_attention.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models/_base_save_load.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models/_dragon_autoint.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models/_dragon_gate.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models/_dragon_node.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models/_dragon_tabnet.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models/_dragon_tabular.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models/_mlp_attention.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models/_models_advanced_helpers.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models_sequence/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models_sequence/_sequence_models.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models_vision/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models_vision/_base_wrapper.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models_vision/_image_classification.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models_vision/_image_segmentation.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models_vision/_object_detection.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_optimization/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_optimization/_multi_dragon.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_optimization/_single_dragon.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_optimization/_single_manual.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_scaler/_ML_scaler.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_scaler/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_trainer/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_trainer/_base_trainer.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_trainer/_dragon_detection_trainer.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_trainer/_dragon_sequence_trainer.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_trainer/_dragon_trainer.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_utilities/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_utilities/_artifact_finder.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_utilities/_inspection.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_utilities/_train_tools.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_vision_transformers/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_vision_transformers/_core_transforms.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_vision_transformers/_offline_augmentation.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/PSO_optimization/_PSO.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/PSO_optimization/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/SQL/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/SQL/_dragon_SQL.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/VIF/_VIF_factor.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/VIF/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/_core/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/_core/_logger.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/_core/_schema_load_ops.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/_core/_script_info.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/constants.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/data_exploration/_cleaning.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/data_exploration/_plotting.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/data_exploration/_schema_ops.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ensemble_evaluation/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ensemble_evaluation/_ensemble_evaluation.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ensemble_inference/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ensemble_inference/_ensemble_inference.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ensemble_learning/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ensemble_learning/_ensemble_learning.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/excel_handler/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/excel_handler/_excel_handler.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/keys/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/math_utilities/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/math_utilities/_math_utilities.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/optimization_tools/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/optimization_tools/_optimization_bounds.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/optimization_tools/_optimization_plots.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/path_manager/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/path_manager/_dragonmanager.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/path_manager/_path_tools.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/plot_fonts/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/plot_fonts/_plot_fonts.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/schema/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/schema/_feature_schema.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/schema/_gui_schema.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/serde/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/serde/_serde.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/utilities/__init__.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/utilities/_utility_save_load.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/utilities/_utility_tools.py +0 -0
- {dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dragon-ml-toolbox
|
|
3
|
-
Version: 20.
|
|
3
|
+
Version: 20.8.0
|
|
4
4
|
Summary: Complete pipelines and helper tools for data science and machine learning projects.
|
|
5
5
|
Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -174,6 +174,7 @@ ML_vision_transformers
|
|
|
174
174
|
optimization_tools
|
|
175
175
|
path_manager
|
|
176
176
|
plot_fonts
|
|
177
|
+
resampling
|
|
177
178
|
schema
|
|
178
179
|
serde
|
|
179
180
|
SQL
|
|
@@ -206,6 +207,7 @@ optimization_tools
|
|
|
206
207
|
path_manager
|
|
207
208
|
plot_fonts
|
|
208
209
|
PSO_optimization
|
|
210
|
+
resampling
|
|
209
211
|
schema
|
|
210
212
|
serde
|
|
211
213
|
SQL
|
|
@@ -81,6 +81,7 @@ ML_vision_transformers
|
|
|
81
81
|
optimization_tools
|
|
82
82
|
path_manager
|
|
83
83
|
plot_fonts
|
|
84
|
+
resampling
|
|
84
85
|
schema
|
|
85
86
|
serde
|
|
86
87
|
SQL
|
|
@@ -113,6 +114,7 @@ optimization_tools
|
|
|
113
114
|
path_manager
|
|
114
115
|
plot_fonts
|
|
115
116
|
PSO_optimization
|
|
117
|
+
resampling
|
|
116
118
|
schema
|
|
117
119
|
serde
|
|
118
120
|
SQL
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dragon-ml-toolbox
|
|
3
|
-
Version: 20.
|
|
3
|
+
Version: 20.8.0
|
|
4
4
|
Summary: Complete pipelines and helper tools for data science and machine learning projects.
|
|
5
5
|
Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -174,6 +174,7 @@ ML_vision_transformers
|
|
|
174
174
|
optimization_tools
|
|
175
175
|
path_manager
|
|
176
176
|
plot_fonts
|
|
177
|
+
resampling
|
|
177
178
|
schema
|
|
178
179
|
serde
|
|
179
180
|
SQL
|
|
@@ -206,6 +207,7 @@ optimization_tools
|
|
|
206
207
|
path_manager
|
|
207
208
|
plot_fonts
|
|
208
209
|
PSO_optimization
|
|
210
|
+
resampling
|
|
209
211
|
schema
|
|
210
212
|
serde
|
|
211
213
|
SQL
|
{dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/dragon_ml_toolbox.egg-info/SOURCES.txt
RENAMED
|
@@ -136,6 +136,10 @@ ml_tools/path_manager/_dragonmanager.py
|
|
|
136
136
|
ml_tools/path_manager/_path_tools.py
|
|
137
137
|
ml_tools/plot_fonts/__init__.py
|
|
138
138
|
ml_tools/plot_fonts/_plot_fonts.py
|
|
139
|
+
ml_tools/resampling/__init__.py
|
|
140
|
+
ml_tools/resampling/_base_resampler.py
|
|
141
|
+
ml_tools/resampling/_multi_resampling.py
|
|
142
|
+
ml_tools/resampling/_single_resampling.py
|
|
139
143
|
ml_tools/schema/__init__.py
|
|
140
144
|
ml_tools/schema/_feature_schema.py
|
|
141
145
|
ml_tools/schema/_gui_schema.py
|
|
@@ -98,10 +98,11 @@ class _BaseMultiLabelFormat:
|
|
|
98
98
|
cmap: str = "BuGn",
|
|
99
99
|
ROC_PR_line: str='darkorange',
|
|
100
100
|
calibration_bins: Union[int, Literal['auto']]='auto',
|
|
101
|
-
font_size: int =
|
|
102
|
-
xtick_size: int=
|
|
103
|
-
ytick_size: int=
|
|
104
|
-
legend_size: int=
|
|
101
|
+
font_size: int = 26,
|
|
102
|
+
xtick_size: int=22,
|
|
103
|
+
ytick_size: int=22,
|
|
104
|
+
legend_size: int=26,
|
|
105
|
+
cm_font_size: int=26) -> None:
|
|
105
106
|
"""
|
|
106
107
|
Initializes the formatting configuration for multi-label classification metrics.
|
|
107
108
|
|
|
@@ -127,6 +128,8 @@ class _BaseMultiLabelFormat:
|
|
|
127
128
|
|
|
128
129
|
legend_size (int): Font size for plot legends.
|
|
129
130
|
|
|
131
|
+
cm_font_size (int): Font size for the confusion matrix.
|
|
132
|
+
|
|
130
133
|
<br>
|
|
131
134
|
|
|
132
135
|
### [Matplotlib Colormaps](https://matplotlib.org/stable/users/explain/colors/colormaps.html)
|
|
@@ -142,6 +145,7 @@ class _BaseMultiLabelFormat:
|
|
|
142
145
|
self.xtick_size = xtick_size
|
|
143
146
|
self.ytick_size = ytick_size
|
|
144
147
|
self.legend_size = legend_size
|
|
148
|
+
self.cm_font_size = cm_font_size
|
|
145
149
|
|
|
146
150
|
def __repr__(self) -> str:
|
|
147
151
|
parts = [
|
|
@@ -151,7 +155,8 @@ class _BaseMultiLabelFormat:
|
|
|
151
155
|
f"font_size={self.font_size}",
|
|
152
156
|
f"xtick_size={self.xtick_size}",
|
|
153
157
|
f"ytick_size={self.ytick_size}",
|
|
154
|
-
f"legend_size={self.legend_size}"
|
|
158
|
+
f"legend_size={self.legend_size}",
|
|
159
|
+
f"cm_font_size={self.cm_font_size}"
|
|
155
160
|
]
|
|
156
161
|
return f"{self.__class__.__name__}({', '.join(parts)})"
|
|
157
162
|
|
|
@@ -520,10 +525,11 @@ class FormatMultiLabelBinaryClassificationMetrics(_BaseMultiLabelFormat):
|
|
|
520
525
|
cmap: str = "BuGn",
|
|
521
526
|
ROC_PR_line: str='darkorange',
|
|
522
527
|
calibration_bins: Union[int, Literal['auto']]='auto',
|
|
523
|
-
font_size: int =
|
|
524
|
-
xtick_size: int=
|
|
525
|
-
ytick_size: int=
|
|
526
|
-
legend_size: int=
|
|
528
|
+
font_size: int = 26,
|
|
529
|
+
xtick_size: int=22,
|
|
530
|
+
ytick_size: int=22,
|
|
531
|
+
legend_size: int=26,
|
|
532
|
+
cm_font_size: int=26
|
|
527
533
|
) -> None:
|
|
528
534
|
super().__init__(cmap=cmap,
|
|
529
535
|
ROC_PR_line=ROC_PR_line,
|
|
@@ -531,7 +537,8 @@ class FormatMultiLabelBinaryClassificationMetrics(_BaseMultiLabelFormat):
|
|
|
531
537
|
font_size=font_size,
|
|
532
538
|
xtick_size=xtick_size,
|
|
533
539
|
ytick_size=ytick_size,
|
|
534
|
-
legend_size=legend_size
|
|
540
|
+
legend_size=legend_size,
|
|
541
|
+
cm_font_size=cm_font_size)
|
|
535
542
|
|
|
536
543
|
|
|
537
544
|
# Segmentation
|
{dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_evaluation/_classification.py
RENAMED
|
@@ -481,6 +481,10 @@ def multi_label_classification_metrics(
|
|
|
481
481
|
ytick_size = format_config.ytick_size
|
|
482
482
|
legend_size = format_config.legend_size
|
|
483
483
|
base_font_size = format_config.font_size
|
|
484
|
+
|
|
485
|
+
# config font size for heatmap
|
|
486
|
+
cm_font_size = format_config.cm_font_size
|
|
487
|
+
cm_tick_size = cm_font_size - 4
|
|
484
488
|
|
|
485
489
|
# --- Calculate and Save Overall Metrics (using y_pred) ---
|
|
486
490
|
h_loss = hamming_loss(y_true, y_pred)
|
|
@@ -488,7 +492,7 @@ def multi_label_classification_metrics(
|
|
|
488
492
|
j_score_macro = jaccard_score(y_true, y_pred, average='macro')
|
|
489
493
|
|
|
490
494
|
overall_report = (
|
|
491
|
-
f"Overall Multi-Label Metrics:\n"
|
|
495
|
+
f"Overall Multi-Label Metrics:\n"
|
|
492
496
|
f"--------------------------------------------------\n"
|
|
493
497
|
f"Hamming Loss: {h_loss:.4f}\n"
|
|
494
498
|
f"Jaccard Score (micro): {j_score_micro:.4f}\n"
|
|
@@ -498,14 +502,82 @@ def multi_label_classification_metrics(
|
|
|
498
502
|
# print(overall_report)
|
|
499
503
|
overall_report_path = save_dir_path / "classification_report.txt"
|
|
500
504
|
overall_report_path.write_text(overall_report)
|
|
505
|
+
|
|
506
|
+
# --- Save Classification Report Heatmap (Multi-label) ---
|
|
507
|
+
try:
|
|
508
|
+
# Generate full report as dict
|
|
509
|
+
full_report_dict = classification_report(y_true, y_pred, target_names=target_names, output_dict=True)
|
|
510
|
+
report_df = pd.DataFrame(full_report_dict)
|
|
511
|
+
|
|
512
|
+
# Cleanup
|
|
513
|
+
# Remove 'accuracy' column if it exists
|
|
514
|
+
report_df = report_df.drop(columns=['accuracy'], errors='ignore')
|
|
515
|
+
|
|
516
|
+
# Remove 'support' row explicitly
|
|
517
|
+
if 'support' in report_df.index:
|
|
518
|
+
report_df = report_df.drop(index='support')
|
|
519
|
+
|
|
520
|
+
# Transpose: Rows = Classes/Averages, Cols = Metrics
|
|
521
|
+
plot_df = report_df.T
|
|
522
|
+
|
|
523
|
+
# Dynamic Height
|
|
524
|
+
fig_height = max(5.0, len(plot_df.index) * 0.5 + 4.0)
|
|
525
|
+
fig_width = 8.0
|
|
526
|
+
|
|
527
|
+
fig_heat, ax_heat = plt.subplots(figsize=(fig_width, fig_height), dpi=_EvaluationConfig.DPI)
|
|
528
|
+
|
|
529
|
+
# Plot
|
|
530
|
+
sns.heatmap(plot_df,
|
|
531
|
+
annot=True,
|
|
532
|
+
cmap=format_config.cmap,
|
|
533
|
+
fmt='.2f',
|
|
534
|
+
vmin=0.0,
|
|
535
|
+
vmax=1.0,
|
|
536
|
+
cbar_kws={'shrink': 0.9})
|
|
537
|
+
|
|
538
|
+
ax_heat.set_title("Classification Report Heatmap", pad=_EvaluationConfig.LABEL_PADDING, fontsize=cm_font_size)
|
|
539
|
+
|
|
540
|
+
# manually increase the font size of the elements
|
|
541
|
+
for text in ax_heat.texts:
|
|
542
|
+
text.set_fontsize(cm_tick_size)
|
|
543
|
+
|
|
544
|
+
cbar = ax_heat.collections[0].colorbar
|
|
545
|
+
cbar.ax.tick_params(labelsize=cm_tick_size - 4) # type: ignore
|
|
546
|
+
|
|
547
|
+
ax_heat.tick_params(axis='x', labelsize=cm_tick_size, pad=_EvaluationConfig.LABEL_PADDING)
|
|
548
|
+
ax_heat.tick_params(axis='y', labelsize=cm_tick_size, pad=_EvaluationConfig.LABEL_PADDING, rotation=0)
|
|
549
|
+
|
|
550
|
+
plt.tight_layout()
|
|
551
|
+
heatmap_path = save_dir_path / "classification_report_heatmap.svg"
|
|
552
|
+
plt.savefig(heatmap_path)
|
|
553
|
+
_LOGGER.info(f"📊 Report heatmap saved as '{heatmap_path.name}'")
|
|
554
|
+
plt.close(fig_heat)
|
|
555
|
+
|
|
556
|
+
except Exception as e:
|
|
557
|
+
_LOGGER.error(f"Could not generate multi-label classification report heatmap: {e}")
|
|
501
558
|
|
|
502
559
|
# --- Per-Label Metrics and Plots ---
|
|
503
560
|
for i, name in enumerate(target_names):
|
|
504
|
-
|
|
561
|
+
# strip whitespace from name
|
|
562
|
+
name = name.strip()
|
|
563
|
+
|
|
564
|
+
# print(f" -> Evaluating label: '{name}'")
|
|
505
565
|
true_i = y_true[:, i]
|
|
506
566
|
pred_i = y_pred[:, i] # Use passed-in y_pred
|
|
507
567
|
prob_i = y_prob[:, i] # Use passed-in y_prob
|
|
508
568
|
sanitized_name = sanitize_filename(name)
|
|
569
|
+
|
|
570
|
+
# if name is too long, just take the first letter of each word. Each word might be separated by space or underscore
|
|
571
|
+
if len(name) >= _EvaluationConfig.NAME_LIMIT:
|
|
572
|
+
parts = [w for w in name.replace("_", " ").split() if w]
|
|
573
|
+
abbr = "".join(p[0].upper() for p in parts)
|
|
574
|
+
# keep only alpha numeric chars
|
|
575
|
+
abbr = "".join(ch for ch in abbr if ch.isalnum())
|
|
576
|
+
if not abbr:
|
|
577
|
+
# fallback to a sanitized, truncated version of the original name
|
|
578
|
+
abbr = sanitize_filename(name)[: _EvaluationConfig.NAME_LIMIT]
|
|
579
|
+
_LOGGER.warning(f"Using abbreviated name '{abbr}' for '{name}' plots.")
|
|
580
|
+
name = abbr
|
|
509
581
|
|
|
510
582
|
# --- Save Classification Report for the label (uses y_pred) ---
|
|
511
583
|
report_text = classification_report(true_i, pred_i)
|
|
@@ -537,7 +609,7 @@ def multi_label_classification_metrics(
|
|
|
537
609
|
ax_cm.tick_params(axis='y', labelsize=ytick_size)
|
|
538
610
|
|
|
539
611
|
# Set titles and labels with padding
|
|
540
|
-
ax_cm.set_title(f"Confusion Matrix
|
|
612
|
+
ax_cm.set_title(f"Confusion Matrix - {name}", pad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size + 2)
|
|
541
613
|
ax_cm.set_xlabel(ax_cm.get_xlabel(), labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size)
|
|
542
614
|
ax_cm.set_ylabel(ax_cm.get_ylabel(), labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size)
|
|
543
615
|
|
|
@@ -594,7 +666,7 @@ def multi_label_classification_metrics(
|
|
|
594
666
|
ax_roc.plot(fpr, tpr, label=f'AUC = {auc:.2f}', color=format_config.ROC_PR_line) # Use config color
|
|
595
667
|
ax_roc.plot([0, 1], [0, 1], 'k--')
|
|
596
668
|
|
|
597
|
-
ax_roc.set_title(f'ROC Curve
|
|
669
|
+
ax_roc.set_title(f'ROC Curve - {name}', pad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size + 2)
|
|
598
670
|
ax_roc.set_xlabel('False Positive Rate', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size)
|
|
599
671
|
ax_roc.set_ylabel('True Positive Rate', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size)
|
|
600
672
|
|
|
@@ -616,7 +688,7 @@ def multi_label_classification_metrics(
|
|
|
616
688
|
ap_score = average_precision_score(true_i, prob_i)
|
|
617
689
|
fig_pr, ax_pr = plt.subplots(figsize=CLASSIFICATION_PLOT_SIZE, dpi=DPI_value)
|
|
618
690
|
ax_pr.plot(recall, precision, label=f'AP = {ap_score:.2f}', color=format_config.ROC_PR_line) # Use config color
|
|
619
|
-
ax_pr.set_title(f'
|
|
691
|
+
ax_pr.set_title(f'PR Curve - {name}', pad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size + 2)
|
|
620
692
|
ax_pr.set_xlabel('Recall', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size)
|
|
621
693
|
ax_pr.set_ylabel('Precision', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size)
|
|
622
694
|
|
|
@@ -659,10 +731,10 @@ def multi_label_classification_metrics(
|
|
|
659
731
|
prob_true,
|
|
660
732
|
marker='o',
|
|
661
733
|
linewidth=2,
|
|
662
|
-
label=f"Calibration
|
|
734
|
+
label=f"Model Calibration",
|
|
663
735
|
color=format_config.ROC_PR_line)
|
|
664
736
|
|
|
665
|
-
ax_cal.set_title(f'
|
|
737
|
+
ax_cal.set_title(f'Calibration - {name}', pad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size + 2)
|
|
666
738
|
ax_cal.set_xlabel('Mean Predicted Probability', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size)
|
|
667
739
|
ax_cal.set_ylabel('Fraction of Positives', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size)
|
|
668
740
|
|
|
@@ -2,6 +2,7 @@ from ._analysis import (
|
|
|
2
2
|
summarize_dataframe,
|
|
3
3
|
show_null_columns,
|
|
4
4
|
match_and_filter_columns_by_regex,
|
|
5
|
+
check_class_balance,
|
|
5
6
|
)
|
|
6
7
|
|
|
7
8
|
from ._cleaning import (
|
|
@@ -28,6 +29,7 @@ from ._features import (
|
|
|
28
29
|
split_continuous_binary,
|
|
29
30
|
split_continuous_categorical_targets,
|
|
30
31
|
encode_categorical_features,
|
|
32
|
+
encode_classification_target,
|
|
31
33
|
reconstruct_one_hot,
|
|
32
34
|
reconstruct_binary,
|
|
33
35
|
reconstruct_multibinary,
|
|
@@ -44,7 +46,6 @@ from .._core import _imprimir_disponibles
|
|
|
44
46
|
|
|
45
47
|
__all__ = [
|
|
46
48
|
"summarize_dataframe",
|
|
47
|
-
"show_null_columns",
|
|
48
49
|
"drop_constant_columns",
|
|
49
50
|
"drop_rows_with_missing_data",
|
|
50
51
|
"drop_columns_with_missing_data",
|
|
@@ -61,10 +62,13 @@ __all__ = [
|
|
|
61
62
|
"plot_categorical_vs_target",
|
|
62
63
|
"plot_correlation_heatmap",
|
|
63
64
|
"encode_categorical_features",
|
|
65
|
+
"encode_classification_target",
|
|
64
66
|
"finalize_feature_schema",
|
|
65
67
|
"apply_feature_schema",
|
|
66
68
|
"reconstruct_from_schema",
|
|
67
69
|
"match_and_filter_columns_by_regex",
|
|
70
|
+
"show_null_columns",
|
|
71
|
+
"check_class_balance",
|
|
68
72
|
"standardize_percentages",
|
|
69
73
|
"reconstruct_one_hot",
|
|
70
74
|
"reconstruct_binary",
|
{dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/data_exploration/_analysis.py
RENAMED
|
@@ -16,6 +16,7 @@ __all__ = [
|
|
|
16
16
|
"summarize_dataframe",
|
|
17
17
|
"show_null_columns",
|
|
18
18
|
"match_and_filter_columns_by_regex",
|
|
19
|
+
"check_class_balance",
|
|
19
20
|
]
|
|
20
21
|
|
|
21
22
|
|
|
@@ -212,3 +213,151 @@ def match_and_filter_columns_by_regex(
|
|
|
212
213
|
|
|
213
214
|
return filtered_df, matched_columns
|
|
214
215
|
|
|
216
|
+
|
|
217
|
+
def check_class_balance(
|
|
218
|
+
df: pd.DataFrame,
|
|
219
|
+
target: Union[str, list[str]],
|
|
220
|
+
plot_to_dir: Optional[Union[str, Path]] = None,
|
|
221
|
+
plot_filename: str = "Class_Balance"
|
|
222
|
+
) -> pd.DataFrame:
|
|
223
|
+
"""
|
|
224
|
+
Analyzes the class balance for classification targets.
|
|
225
|
+
|
|
226
|
+
Handles two cases:
|
|
227
|
+
1. Single Column (Binary/Multi-class): Calculates frequency of each unique value.
|
|
228
|
+
2. List of Columns (Multi-label Binary): Calculates the frequency of positive values (1) per column.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
df (pd.DataFrame): The input DataFrame.
|
|
232
|
+
target (str | list[str]): The target column name (for single/multi-class classification)
|
|
233
|
+
or list of column names (for multi-label-binary classification).
|
|
234
|
+
plot_to_dir (str | Path | None): Directory to save the balance plot.
|
|
235
|
+
plot_filename (str): Filename for the plot (without extension).
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
pd.DataFrame: Summary table of counts and percentages.
|
|
239
|
+
"""
|
|
240
|
+
# Early fail for empty DataFrame and handle list of targets with only one item
|
|
241
|
+
if df.empty:
|
|
242
|
+
_LOGGER.error("Input DataFrame is empty.")
|
|
243
|
+
raise ValueError()
|
|
244
|
+
|
|
245
|
+
if isinstance(target, list):
|
|
246
|
+
if len(target) == 0:
|
|
247
|
+
_LOGGER.error("Target list is empty.")
|
|
248
|
+
raise ValueError()
|
|
249
|
+
elif len(target) == 1:
|
|
250
|
+
target = target[0] # Simplify to single column case
|
|
251
|
+
|
|
252
|
+
# Case 1: Single Target (Binary or Multi-class)
|
|
253
|
+
if isinstance(target, str):
|
|
254
|
+
if target not in df.columns:
|
|
255
|
+
_LOGGER.error(f"Target column '{target}' not found in DataFrame.")
|
|
256
|
+
raise ValueError()
|
|
257
|
+
|
|
258
|
+
# Calculate stats
|
|
259
|
+
counts = df[target].value_counts(dropna=False).sort_index()
|
|
260
|
+
percents = df[target].value_counts(normalize=True, dropna=False).sort_index() * 100
|
|
261
|
+
|
|
262
|
+
summary = pd.DataFrame({
|
|
263
|
+
'Count': counts,
|
|
264
|
+
'Percentage': percents.round(2)
|
|
265
|
+
})
|
|
266
|
+
summary.index.name = "Class"
|
|
267
|
+
|
|
268
|
+
# Plotting
|
|
269
|
+
if plot_to_dir:
|
|
270
|
+
try:
|
|
271
|
+
save_path = make_fullpath(plot_to_dir, make=True, enforce="directory")
|
|
272
|
+
|
|
273
|
+
plt.figure(figsize=(10, 6))
|
|
274
|
+
# Convert index to str to handle numeric classes cleanly on x-axis
|
|
275
|
+
x_labels = summary.index.astype(str)
|
|
276
|
+
bars = plt.bar(x_labels, summary['Count'], color='lightgreen', edgecolor='black', alpha=0.7)
|
|
277
|
+
|
|
278
|
+
plt.title(f"Class Balance: {target}")
|
|
279
|
+
plt.xlabel(target)
|
|
280
|
+
plt.ylabel("Count")
|
|
281
|
+
plt.grid(axis='y', linestyle='--', alpha=0.5)
|
|
282
|
+
|
|
283
|
+
# Add percentage labels on top of bars
|
|
284
|
+
for bar, pct in zip(bars, summary['Percentage']):
|
|
285
|
+
height = bar.get_height()
|
|
286
|
+
plt.text(bar.get_x() + bar.get_width()/2, height,
|
|
287
|
+
f'{pct:.1f}%', ha='center', va='bottom', fontsize=10)
|
|
288
|
+
|
|
289
|
+
plt.tight_layout()
|
|
290
|
+
full_filename = sanitize_filename(plot_filename) + ".svg"
|
|
291
|
+
plt.savefig(save_path / full_filename, format='svg', bbox_inches="tight")
|
|
292
|
+
plt.close()
|
|
293
|
+
_LOGGER.info(f"Saved class balance plot: '{full_filename}'")
|
|
294
|
+
except Exception as e:
|
|
295
|
+
_LOGGER.error(f"Failed to plot class balance. Error: {e}")
|
|
296
|
+
plt.close()
|
|
297
|
+
|
|
298
|
+
return summary
|
|
299
|
+
|
|
300
|
+
# Case 2: Multi-label (List of binary columns)
|
|
301
|
+
elif isinstance(target, list):
|
|
302
|
+
missing_cols = [t for t in target if t not in df.columns]
|
|
303
|
+
if missing_cols:
|
|
304
|
+
_LOGGER.error(f"Target columns not found: {missing_cols}")
|
|
305
|
+
raise ValueError()
|
|
306
|
+
|
|
307
|
+
stats = []
|
|
308
|
+
for col in target:
|
|
309
|
+
# Assume 0/1 or False/True. Sum gives the count of positives.
|
|
310
|
+
# We enforce numeric to be safe
|
|
311
|
+
try:
|
|
312
|
+
numeric_series = pd.to_numeric(df[col], errors='coerce').fillna(0)
|
|
313
|
+
pos_count = numeric_series.sum()
|
|
314
|
+
total_count = len(df)
|
|
315
|
+
pct = (pos_count / total_count) * 100
|
|
316
|
+
except Exception:
|
|
317
|
+
_LOGGER.warning(f"Column '{col}' could not be processed as numeric. Assuming 0 positives.")
|
|
318
|
+
pos_count = 0
|
|
319
|
+
pct = 0.0
|
|
320
|
+
|
|
321
|
+
stats.append({
|
|
322
|
+
'Label': col,
|
|
323
|
+
'Positive_Count': int(pos_count),
|
|
324
|
+
'Positive_Percentage': round(pct, 2)
|
|
325
|
+
})
|
|
326
|
+
|
|
327
|
+
summary = pd.DataFrame(stats).set_index("Label").sort_values("Positive_Percentage", ascending=True)
|
|
328
|
+
|
|
329
|
+
# Plotting
|
|
330
|
+
if plot_to_dir:
|
|
331
|
+
try:
|
|
332
|
+
save_path = make_fullpath(plot_to_dir, make=True, enforce="directory")
|
|
333
|
+
|
|
334
|
+
# Dynamic height for many labels
|
|
335
|
+
height = max(6, len(target) * 0.4)
|
|
336
|
+
plt.figure(figsize=(10, height))
|
|
337
|
+
|
|
338
|
+
bars = plt.barh(summary.index, summary['Positive_Percentage'], color='lightgreen', edgecolor='black', alpha=0.7)
|
|
339
|
+
|
|
340
|
+
plt.title(f"Multi-label Binary Class Balance")
|
|
341
|
+
plt.xlabel("Positive Class Percentage (%)")
|
|
342
|
+
plt.xlim(0, 100)
|
|
343
|
+
plt.grid(axis='x', linestyle='--', alpha=0.5)
|
|
344
|
+
|
|
345
|
+
# Add count labels at the end of bars
|
|
346
|
+
for bar, count in zip(bars, summary['Positive_Count']):
|
|
347
|
+
width = bar.get_width()
|
|
348
|
+
plt.text(width + 1, bar.get_y() + bar.get_height()/2, f'{width:.1f}%', ha='left', va='center', fontsize=9)
|
|
349
|
+
|
|
350
|
+
plt.tight_layout()
|
|
351
|
+
full_filename = sanitize_filename(plot_filename) + ".svg"
|
|
352
|
+
plt.savefig(save_path / full_filename, format='svg', bbox_inches="tight")
|
|
353
|
+
plt.close()
|
|
354
|
+
_LOGGER.info(f"Saved multi-label balance plot: '{full_filename}'")
|
|
355
|
+
except Exception as e:
|
|
356
|
+
_LOGGER.error(f"Failed to plot class balance. Error: {e}")
|
|
357
|
+
plt.close()
|
|
358
|
+
|
|
359
|
+
return summary.sort_values("Positive_Percentage", ascending=False)
|
|
360
|
+
|
|
361
|
+
else:
|
|
362
|
+
_LOGGER.error("Target must be a string or a list of strings.")
|
|
363
|
+
raise TypeError()
|
{dragon_ml_toolbox-20.7.0 → dragon_ml_toolbox-20.8.0}/ml_tools/data_exploration/_features.py
RENAMED
|
@@ -3,7 +3,10 @@ from pandas.api.types import is_numeric_dtype, is_object_dtype
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
from typing import Any, Optional, Union
|
|
5
5
|
import re
|
|
6
|
+
import json
|
|
7
|
+
from pathlib import Path
|
|
6
8
|
|
|
9
|
+
from ..path_manager import make_fullpath
|
|
7
10
|
from .._core import get_logger
|
|
8
11
|
|
|
9
12
|
|
|
@@ -15,6 +18,7 @@ __all__ = [
|
|
|
15
18
|
"split_continuous_binary",
|
|
16
19
|
"split_continuous_categorical_targets",
|
|
17
20
|
"encode_categorical_features",
|
|
21
|
+
"encode_classification_target",
|
|
18
22
|
"reconstruct_one_hot",
|
|
19
23
|
"reconstruct_binary",
|
|
20
24
|
"reconstruct_multibinary",
|
|
@@ -263,6 +267,78 @@ def encode_categorical_features(
|
|
|
263
267
|
return df_encoded, mappings
|
|
264
268
|
|
|
265
269
|
|
|
270
|
+
def encode_classification_target(
|
|
271
|
+
df: pd.DataFrame,
|
|
272
|
+
target_col: str,
|
|
273
|
+
save_dir: Union[str, Path],
|
|
274
|
+
verbose: int = 2
|
|
275
|
+
) -> tuple[pd.DataFrame, dict[str, int]]:
|
|
276
|
+
"""
|
|
277
|
+
Encodes a target classification column into integers (0, 1, 2...) and saves the mapping to a JSON file.
|
|
278
|
+
|
|
279
|
+
This ensures that the target variable is in the correct numeric format for training
|
|
280
|
+
and provides a persistent artifact (the JSON file) to map predictions back to labels later.
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
df (pd.DataFrame): Input DataFrame.
|
|
284
|
+
target_col (str): Name of the target column to encode.
|
|
285
|
+
save_dir (str | Path): Directory where the class map JSON will be saved.
|
|
286
|
+
verbose (int): Verbosity level for logging.
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
Tuple (Dataframe, Dict):
|
|
290
|
+
- A new DataFrame with the target column encoded as integers.
|
|
291
|
+
- The dictionary mapping original labels (str) to integers (int).
|
|
292
|
+
"""
|
|
293
|
+
if target_col not in df.columns:
|
|
294
|
+
_LOGGER.error(f"Target column '{target_col}' not found in DataFrame.")
|
|
295
|
+
raise ValueError()
|
|
296
|
+
|
|
297
|
+
# Validation: Check for missing values in target
|
|
298
|
+
if df[target_col].isnull().any():
|
|
299
|
+
n_missing = df[target_col].isnull().sum()
|
|
300
|
+
_LOGGER.error(f"Target column '{target_col}' contains {n_missing} missing values. Please handle them before encoding.")
|
|
301
|
+
raise ValueError()
|
|
302
|
+
|
|
303
|
+
# Ensure directory exists
|
|
304
|
+
save_path = make_fullpath(save_dir, make=True, enforce="directory")
|
|
305
|
+
file_path = save_path / "class_map.json"
|
|
306
|
+
|
|
307
|
+
# Get unique values and sort them to ensure deterministic encoding (0, 1, 2...)
|
|
308
|
+
# Convert to string to ensure the keys in JSON are strings
|
|
309
|
+
unique_labels = sorted(df[target_col].astype(str).unique())
|
|
310
|
+
|
|
311
|
+
# Create mapping: { Label -> Integer }
|
|
312
|
+
class_map = {label: idx for idx, label in enumerate(unique_labels)}
|
|
313
|
+
|
|
314
|
+
# Apply mapping
|
|
315
|
+
# cast column to string to match the keys in class_map
|
|
316
|
+
df_encoded = df.copy()
|
|
317
|
+
df_encoded[target_col] = df_encoded[target_col].astype(str).map(class_map)
|
|
318
|
+
|
|
319
|
+
# Save to JSON
|
|
320
|
+
try:
|
|
321
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
322
|
+
json.dump(class_map, f, indent=4)
|
|
323
|
+
|
|
324
|
+
if verbose >= 2:
|
|
325
|
+
_LOGGER.info(f"Class mapping saved to: '{file_path}'")
|
|
326
|
+
|
|
327
|
+
if verbose >= 3:
|
|
328
|
+
_LOGGER.info(f"Target '{target_col}' encoded with {len(class_map)} classes.")
|
|
329
|
+
# Print a preview
|
|
330
|
+
if len(class_map) <= 10:
|
|
331
|
+
print(f" Mapping: {class_map}")
|
|
332
|
+
else:
|
|
333
|
+
print(f" Mapping (first 5): {dict(list(class_map.items())[:5])} ...")
|
|
334
|
+
|
|
335
|
+
except Exception as e:
|
|
336
|
+
_LOGGER.error(f"Failed to save class map JSON. Error: {e}")
|
|
337
|
+
raise IOError()
|
|
338
|
+
|
|
339
|
+
return df_encoded, class_map
|
|
340
|
+
|
|
341
|
+
|
|
266
342
|
def reconstruct_one_hot(
|
|
267
343
|
df: pd.DataFrame,
|
|
268
344
|
features_to_reconstruct: list[Union[str, tuple[str, Optional[str]]]],
|
|
@@ -306,6 +306,7 @@ class _EvaluationConfig:
|
|
|
306
306
|
LOSS_PLOT_LEGEND_SIZE = 24
|
|
307
307
|
# CM settings
|
|
308
308
|
CM_SIZE = (9, 8) # used for multi label binary classification confusion matrix
|
|
309
|
+
NAME_LIMIT = 20 # max number of characters for feature/label names in plots
|
|
309
310
|
|
|
310
311
|
class _OneHotOtherPlaceholder:
|
|
311
312
|
"""Used internally by GUI_tools."""
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from ._single_resampling import (
|
|
2
|
+
DragonResampler,
|
|
3
|
+
)
|
|
4
|
+
|
|
5
|
+
from ._multi_resampling import (
|
|
6
|
+
DragonMultiResampler,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
from .._core import _imprimir_disponibles
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"DragonResampler",
|
|
14
|
+
"DragonMultiResampler",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def info():
|
|
19
|
+
_imprimir_disponibles(__all__)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import polars as pl
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from typing import Union
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
__all__ = ["_DragonBaseResampler"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class _DragonBaseResampler(ABC):
|
|
11
|
+
"""
|
|
12
|
+
Base class for Dragon resamplers handling common I/O and state.
|
|
13
|
+
"""
|
|
14
|
+
def __init__(self,
|
|
15
|
+
return_pandas: bool = False,
|
|
16
|
+
seed: int = 42):
|
|
17
|
+
self.return_pandas = return_pandas
|
|
18
|
+
self.seed = seed
|
|
19
|
+
|
|
20
|
+
def _convert_to_polars(self, df: Union[pd.DataFrame, pl.DataFrame]) -> pl.DataFrame:
|
|
21
|
+
"""Standardizes input to Polars DataFrame."""
|
|
22
|
+
if isinstance(df, pd.DataFrame):
|
|
23
|
+
return pl.from_pandas(df)
|
|
24
|
+
return df
|
|
25
|
+
|
|
26
|
+
def _convert_to_pandas(self, df: pl.DataFrame) -> pd.DataFrame:
|
|
27
|
+
"""Converts Polars DataFrame back to Pandas."""
|
|
28
|
+
return df.to_pandas(use_pyarrow_extension_array=False)
|
|
29
|
+
|
|
30
|
+
def _process_return(self, df: pl.DataFrame, shuffle: bool = True) -> Union[pd.DataFrame, pl.DataFrame]:
|
|
31
|
+
"""
|
|
32
|
+
Finalizes the DataFrame:
|
|
33
|
+
1. Global Shuffle (optional but recommended for ML).
|
|
34
|
+
2. Conversion to Pandas (if requested).
|
|
35
|
+
"""
|
|
36
|
+
if shuffle:
|
|
37
|
+
# Random shuffle of the final dataset
|
|
38
|
+
df = df.sample(fraction=1.0, seed=self.seed, with_replacement=False)
|
|
39
|
+
|
|
40
|
+
if self.return_pandas:
|
|
41
|
+
return self._convert_to_pandas(df)
|
|
42
|
+
return df
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
def describe_balance(self, df: Union[pd.DataFrame, pl.DataFrame], top_n: int = 10) -> None:
|
|
46
|
+
"""
|
|
47
|
+
Prints a statistical summary of the target distribution.
|
|
48
|
+
"""
|
|
49
|
+
pass
|