dragon-ml-toolbox 20.7.1__tar.gz → 20.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. {dragon_ml_toolbox-20.7.1/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-20.8.0}/PKG-INFO +3 -1
  2. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/README.md +2 -0
  3. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0/dragon_ml_toolbox.egg-info}/PKG-INFO +3 -1
  4. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +4 -0
  5. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_configuration/_metrics.py +17 -10
  6. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_evaluation/_classification.py +79 -7
  7. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/data_exploration/__init__.py +5 -1
  8. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/data_exploration/_analysis.py +149 -0
  9. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/data_exploration/_features.py +76 -0
  10. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/keys/_keys.py +1 -0
  11. dragon_ml_toolbox-20.8.0/ml_tools/resampling/__init__.py +19 -0
  12. dragon_ml_toolbox-20.8.0/ml_tools/resampling/_base_resampler.py +49 -0
  13. dragon_ml_toolbox-20.8.0/ml_tools/resampling/_multi_resampling.py +184 -0
  14. dragon_ml_toolbox-20.8.0/ml_tools/resampling/_single_resampling.py +113 -0
  15. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/pyproject.toml +1 -1
  16. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/LICENSE +0 -0
  17. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/LICENSE-THIRD-PARTY.md +0 -0
  18. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  19. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  20. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  21. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ETL_cleaning/__init__.py +0 -0
  22. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ETL_cleaning/_basic_clean.py +0 -0
  23. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ETL_cleaning/_clean_tools.py +0 -0
  24. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ETL_cleaning/_dragon_cleaner.py +0 -0
  25. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ETL_engineering/__init__.py +0 -0
  26. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ETL_engineering/_dragon_engineering.py +0 -0
  27. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ETL_engineering/_transforms.py +0 -0
  28. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/GUI_tools/_GUI_tools.py +0 -0
  29. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/GUI_tools/__init__.py +0 -0
  30. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/IO_tools/_IO_loggers.py +0 -0
  31. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/IO_tools/_IO_save_load.py +0 -0
  32. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/IO_tools/_IO_utils.py +0 -0
  33. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/IO_tools/__init__.py +0 -0
  34. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/MICE/_MICE_imputation.py +0 -0
  35. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/MICE/__init__.py +0 -0
  36. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/MICE/_dragon_mice.py +0 -0
  37. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_callbacks/__init__.py +0 -0
  38. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_callbacks/_base.py +0 -0
  39. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_callbacks/_checkpoint.py +0 -0
  40. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_callbacks/_early_stop.py +0 -0
  41. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_callbacks/_scheduler.py +0 -0
  42. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_chain/__init__.py +0 -0
  43. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_chain/_chaining_tools.py +0 -0
  44. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_chain/_dragon_chain.py +0 -0
  45. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_chain/_update_schema.py +0 -0
  46. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_configuration/__init__.py +0 -0
  47. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_configuration/_base_model_config.py +0 -0
  48. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_configuration/_finalize.py +0 -0
  49. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_configuration/_models.py +0 -0
  50. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_configuration/_training.py +0 -0
  51. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_datasetmaster/__init__.py +0 -0
  52. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_datasetmaster/_base_datasetmaster.py +0 -0
  53. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_datasetmaster/_datasetmaster.py +0 -0
  54. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_datasetmaster/_sequence_datasetmaster.py +0 -0
  55. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_datasetmaster/_vision_datasetmaster.py +0 -0
  56. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_evaluation/__init__.py +0 -0
  57. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_evaluation/_feature_importance.py +0 -0
  58. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_evaluation/_loss.py +0 -0
  59. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_evaluation/_regression.py +0 -0
  60. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_evaluation/_sequence.py +0 -0
  61. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_evaluation/_vision.py +0 -0
  62. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_evaluation_captum/_ML_evaluation_captum.py +0 -0
  63. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_evaluation_captum/__init__.py +0 -0
  64. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_finalize_handler/_ML_finalize_handler.py +0 -0
  65. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_finalize_handler/__init__.py +0 -0
  66. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_inference/__init__.py +0 -0
  67. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_inference/_base_inference.py +0 -0
  68. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_inference/_chain_inference.py +0 -0
  69. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_inference/_dragon_inference.py +0 -0
  70. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_inference/_multi_inference.py +0 -0
  71. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_inference_sequence/__init__.py +0 -0
  72. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_inference_sequence/_sequence_inference.py +0 -0
  73. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_inference_vision/__init__.py +0 -0
  74. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_inference_vision/_vision_inference.py +0 -0
  75. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models/__init__.py +0 -0
  76. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models/_base_mlp_attention.py +0 -0
  77. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models/_base_save_load.py +0 -0
  78. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models/_dragon_autoint.py +0 -0
  79. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models/_dragon_gate.py +0 -0
  80. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models/_dragon_node.py +0 -0
  81. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models/_dragon_tabnet.py +0 -0
  82. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models/_dragon_tabular.py +0 -0
  83. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models/_mlp_attention.py +0 -0
  84. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models/_models_advanced_helpers.py +0 -0
  85. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models_sequence/__init__.py +0 -0
  86. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models_sequence/_sequence_models.py +0 -0
  87. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models_vision/__init__.py +0 -0
  88. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models_vision/_base_wrapper.py +0 -0
  89. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models_vision/_image_classification.py +0 -0
  90. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models_vision/_image_segmentation.py +0 -0
  91. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_models_vision/_object_detection.py +0 -0
  92. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_optimization/__init__.py +0 -0
  93. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_optimization/_multi_dragon.py +0 -0
  94. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_optimization/_single_dragon.py +0 -0
  95. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_optimization/_single_manual.py +0 -0
  96. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_scaler/_ML_scaler.py +0 -0
  97. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_scaler/__init__.py +0 -0
  98. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_trainer/__init__.py +0 -0
  99. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_trainer/_base_trainer.py +0 -0
  100. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_trainer/_dragon_detection_trainer.py +0 -0
  101. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_trainer/_dragon_sequence_trainer.py +0 -0
  102. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_trainer/_dragon_trainer.py +0 -0
  103. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_utilities/__init__.py +0 -0
  104. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_utilities/_artifact_finder.py +0 -0
  105. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_utilities/_inspection.py +0 -0
  106. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_utilities/_train_tools.py +0 -0
  107. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_vision_transformers/__init__.py +0 -0
  108. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_vision_transformers/_core_transforms.py +0 -0
  109. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ML_vision_transformers/_offline_augmentation.py +0 -0
  110. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/PSO_optimization/_PSO.py +0 -0
  111. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/PSO_optimization/__init__.py +0 -0
  112. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/SQL/__init__.py +0 -0
  113. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/SQL/_dragon_SQL.py +0 -0
  114. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/VIF/_VIF_factor.py +0 -0
  115. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/VIF/__init__.py +0 -0
  116. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/__init__.py +0 -0
  117. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/_core/__init__.py +0 -0
  118. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/_core/_logger.py +0 -0
  119. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/_core/_schema_load_ops.py +0 -0
  120. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/_core/_script_info.py +0 -0
  121. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/constants.py +0 -0
  122. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/data_exploration/_cleaning.py +0 -0
  123. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/data_exploration/_plotting.py +0 -0
  124. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/data_exploration/_schema_ops.py +0 -0
  125. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ensemble_evaluation/__init__.py +0 -0
  126. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ensemble_evaluation/_ensemble_evaluation.py +0 -0
  127. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ensemble_inference/__init__.py +0 -0
  128. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ensemble_inference/_ensemble_inference.py +0 -0
  129. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ensemble_learning/__init__.py +0 -0
  130. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/ensemble_learning/_ensemble_learning.py +0 -0
  131. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/excel_handler/__init__.py +0 -0
  132. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/excel_handler/_excel_handler.py +0 -0
  133. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/keys/__init__.py +0 -0
  134. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/math_utilities/__init__.py +0 -0
  135. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/math_utilities/_math_utilities.py +0 -0
  136. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/optimization_tools/__init__.py +0 -0
  137. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/optimization_tools/_optimization_bounds.py +0 -0
  138. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/optimization_tools/_optimization_plots.py +0 -0
  139. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/path_manager/__init__.py +0 -0
  140. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/path_manager/_dragonmanager.py +0 -0
  141. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/path_manager/_path_tools.py +0 -0
  142. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/plot_fonts/__init__.py +0 -0
  143. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/plot_fonts/_plot_fonts.py +0 -0
  144. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/schema/__init__.py +0 -0
  145. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/schema/_feature_schema.py +0 -0
  146. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/schema/_gui_schema.py +0 -0
  147. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/serde/__init__.py +0 -0
  148. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/serde/_serde.py +0 -0
  149. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/utilities/__init__.py +0 -0
  150. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/utilities/_translate.py +0 -0
  151. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/utilities/_utility_save_load.py +0 -0
  152. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/ml_tools/utilities/_utility_tools.py +0 -0
  153. {dragon_ml_toolbox-20.7.1 → dragon_ml_toolbox-20.8.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 20.7.1
3
+ Version: 20.8.0
4
4
  Summary: Complete pipelines and helper tools for data science and machine learning projects.
5
5
  Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -174,6 +174,7 @@ ML_vision_transformers
174
174
  optimization_tools
175
175
  path_manager
176
176
  plot_fonts
177
+ resampling
177
178
  schema
178
179
  serde
179
180
  SQL
@@ -206,6 +207,7 @@ optimization_tools
206
207
  path_manager
207
208
  plot_fonts
208
209
  PSO_optimization
210
+ resampling
209
211
  schema
210
212
  serde
211
213
  SQL
@@ -81,6 +81,7 @@ ML_vision_transformers
81
81
  optimization_tools
82
82
  path_manager
83
83
  plot_fonts
84
+ resampling
84
85
  schema
85
86
  serde
86
87
  SQL
@@ -113,6 +114,7 @@ optimization_tools
113
114
  path_manager
114
115
  plot_fonts
115
116
  PSO_optimization
117
+ resampling
116
118
  schema
117
119
  serde
118
120
  SQL
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 20.7.1
3
+ Version: 20.8.0
4
4
  Summary: Complete pipelines and helper tools for data science and machine learning projects.
5
5
  Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -174,6 +174,7 @@ ML_vision_transformers
174
174
  optimization_tools
175
175
  path_manager
176
176
  plot_fonts
177
+ resampling
177
178
  schema
178
179
  serde
179
180
  SQL
@@ -206,6 +207,7 @@ optimization_tools
206
207
  path_manager
207
208
  plot_fonts
208
209
  PSO_optimization
210
+ resampling
209
211
  schema
210
212
  serde
211
213
  SQL
@@ -136,6 +136,10 @@ ml_tools/path_manager/_dragonmanager.py
136
136
  ml_tools/path_manager/_path_tools.py
137
137
  ml_tools/plot_fonts/__init__.py
138
138
  ml_tools/plot_fonts/_plot_fonts.py
139
+ ml_tools/resampling/__init__.py
140
+ ml_tools/resampling/_base_resampler.py
141
+ ml_tools/resampling/_multi_resampling.py
142
+ ml_tools/resampling/_single_resampling.py
139
143
  ml_tools/schema/__init__.py
140
144
  ml_tools/schema/_feature_schema.py
141
145
  ml_tools/schema/_gui_schema.py
@@ -98,10 +98,11 @@ class _BaseMultiLabelFormat:
98
98
  cmap: str = "BuGn",
99
99
  ROC_PR_line: str='darkorange',
100
100
  calibration_bins: Union[int, Literal['auto']]='auto',
101
- font_size: int = 25,
102
- xtick_size: int=20,
103
- ytick_size: int=20,
104
- legend_size: int=23) -> None:
101
+ font_size: int = 26,
102
+ xtick_size: int=22,
103
+ ytick_size: int=22,
104
+ legend_size: int=26,
105
+ cm_font_size: int=26) -> None:
105
106
  """
106
107
  Initializes the formatting configuration for multi-label classification metrics.
107
108
 
@@ -127,6 +128,8 @@ class _BaseMultiLabelFormat:
127
128
 
128
129
  legend_size (int): Font size for plot legends.
129
130
 
131
+ cm_font_size (int): Font size for the confusion matrix.
132
+
130
133
  <br>
131
134
 
132
135
  ### [Matplotlib Colormaps](https://matplotlib.org/stable/users/explain/colors/colormaps.html)
@@ -142,6 +145,7 @@ class _BaseMultiLabelFormat:
142
145
  self.xtick_size = xtick_size
143
146
  self.ytick_size = ytick_size
144
147
  self.legend_size = legend_size
148
+ self.cm_font_size = cm_font_size
145
149
 
146
150
  def __repr__(self) -> str:
147
151
  parts = [
@@ -151,7 +155,8 @@ class _BaseMultiLabelFormat:
151
155
  f"font_size={self.font_size}",
152
156
  f"xtick_size={self.xtick_size}",
153
157
  f"ytick_size={self.ytick_size}",
154
- f"legend_size={self.legend_size}"
158
+ f"legend_size={self.legend_size}",
159
+ f"cm_font_size={self.cm_font_size}"
155
160
  ]
156
161
  return f"{self.__class__.__name__}({', '.join(parts)})"
157
162
 
@@ -520,10 +525,11 @@ class FormatMultiLabelBinaryClassificationMetrics(_BaseMultiLabelFormat):
520
525
  cmap: str = "BuGn",
521
526
  ROC_PR_line: str='darkorange',
522
527
  calibration_bins: Union[int, Literal['auto']]='auto',
523
- font_size: int = 25,
524
- xtick_size: int=20,
525
- ytick_size: int=20,
526
- legend_size: int=23
528
+ font_size: int = 26,
529
+ xtick_size: int=22,
530
+ ytick_size: int=22,
531
+ legend_size: int=26,
532
+ cm_font_size: int=26
527
533
  ) -> None:
528
534
  super().__init__(cmap=cmap,
529
535
  ROC_PR_line=ROC_PR_line,
@@ -531,7 +537,8 @@ class FormatMultiLabelBinaryClassificationMetrics(_BaseMultiLabelFormat):
531
537
  font_size=font_size,
532
538
  xtick_size=xtick_size,
533
539
  ytick_size=ytick_size,
534
- legend_size=legend_size)
540
+ legend_size=legend_size,
541
+ cm_font_size=cm_font_size)
535
542
 
536
543
 
537
544
  # Segmentation
@@ -481,6 +481,10 @@ def multi_label_classification_metrics(
481
481
  ytick_size = format_config.ytick_size
482
482
  legend_size = format_config.legend_size
483
483
  base_font_size = format_config.font_size
484
+
485
+ # config font size for heatmap
486
+ cm_font_size = format_config.cm_font_size
487
+ cm_tick_size = cm_font_size - 4
484
488
 
485
489
  # --- Calculate and Save Overall Metrics (using y_pred) ---
486
490
  h_loss = hamming_loss(y_true, y_pred)
@@ -488,7 +492,7 @@ def multi_label_classification_metrics(
488
492
  j_score_macro = jaccard_score(y_true, y_pred, average='macro')
489
493
 
490
494
  overall_report = (
491
- f"Overall Multi-Label Metrics:\n" # No threshold to report here
495
+ f"Overall Multi-Label Metrics:\n"
492
496
  f"--------------------------------------------------\n"
493
497
  f"Hamming Loss: {h_loss:.4f}\n"
494
498
  f"Jaccard Score (micro): {j_score_micro:.4f}\n"
@@ -498,14 +502,82 @@ def multi_label_classification_metrics(
498
502
  # print(overall_report)
499
503
  overall_report_path = save_dir_path / "classification_report.txt"
500
504
  overall_report_path.write_text(overall_report)
505
+
506
+ # --- Save Classification Report Heatmap (Multi-label) ---
507
+ try:
508
+ # Generate full report as dict
509
+ full_report_dict = classification_report(y_true, y_pred, target_names=target_names, output_dict=True)
510
+ report_df = pd.DataFrame(full_report_dict)
511
+
512
+ # Cleanup
513
+ # Remove 'accuracy' column if it exists
514
+ report_df = report_df.drop(columns=['accuracy'], errors='ignore')
515
+
516
+ # Remove 'support' row explicitly
517
+ if 'support' in report_df.index:
518
+ report_df = report_df.drop(index='support')
519
+
520
+ # Transpose: Rows = Classes/Averages, Cols = Metrics
521
+ plot_df = report_df.T
522
+
523
+ # Dynamic Height
524
+ fig_height = max(5.0, len(plot_df.index) * 0.5 + 4.0)
525
+ fig_width = 8.0
526
+
527
+ fig_heat, ax_heat = plt.subplots(figsize=(fig_width, fig_height), dpi=_EvaluationConfig.DPI)
528
+
529
+ # Plot
530
+ sns.heatmap(plot_df,
531
+ annot=True,
532
+ cmap=format_config.cmap,
533
+ fmt='.2f',
534
+ vmin=0.0,
535
+ vmax=1.0,
536
+ cbar_kws={'shrink': 0.9})
537
+
538
+ ax_heat.set_title("Classification Report Heatmap", pad=_EvaluationConfig.LABEL_PADDING, fontsize=cm_font_size)
539
+
540
+ # manually increase the font size of the elements
541
+ for text in ax_heat.texts:
542
+ text.set_fontsize(cm_tick_size)
543
+
544
+ cbar = ax_heat.collections[0].colorbar
545
+ cbar.ax.tick_params(labelsize=cm_tick_size - 4) # type: ignore
546
+
547
+ ax_heat.tick_params(axis='x', labelsize=cm_tick_size, pad=_EvaluationConfig.LABEL_PADDING)
548
+ ax_heat.tick_params(axis='y', labelsize=cm_tick_size, pad=_EvaluationConfig.LABEL_PADDING, rotation=0)
549
+
550
+ plt.tight_layout()
551
+ heatmap_path = save_dir_path / "classification_report_heatmap.svg"
552
+ plt.savefig(heatmap_path)
553
+ _LOGGER.info(f"📊 Report heatmap saved as '{heatmap_path.name}'")
554
+ plt.close(fig_heat)
555
+
556
+ except Exception as e:
557
+ _LOGGER.error(f"Could not generate multi-label classification report heatmap: {e}")
501
558
 
502
559
  # --- Per-Label Metrics and Plots ---
503
560
  for i, name in enumerate(target_names):
504
- print(f" -> Evaluating label: '{name}'")
561
+ # strip whitespace from name
562
+ name = name.strip()
563
+
564
+ # print(f" -> Evaluating label: '{name}'")
505
565
  true_i = y_true[:, i]
506
566
  pred_i = y_pred[:, i] # Use passed-in y_pred
507
567
  prob_i = y_prob[:, i] # Use passed-in y_prob
508
568
  sanitized_name = sanitize_filename(name)
569
+
570
+ # if name is too long, just take the first letter of each word. Each word might be separated by space or underscore
571
+ if len(name) >= _EvaluationConfig.NAME_LIMIT:
572
+ parts = [w for w in name.replace("_", " ").split() if w]
573
+ abbr = "".join(p[0].upper() for p in parts)
574
+ # keep only alpha numeric chars
575
+ abbr = "".join(ch for ch in abbr if ch.isalnum())
576
+ if not abbr:
577
+ # fallback to a sanitized, truncated version of the original name
578
+ abbr = sanitize_filename(name)[: _EvaluationConfig.NAME_LIMIT]
579
+ _LOGGER.warning(f"Using abbreviated name '{abbr}' for '{name}' plots.")
580
+ name = abbr
509
581
 
510
582
  # --- Save Classification Report for the label (uses y_pred) ---
511
583
  report_text = classification_report(true_i, pred_i)
@@ -537,7 +609,7 @@ def multi_label_classification_metrics(
537
609
  ax_cm.tick_params(axis='y', labelsize=ytick_size)
538
610
 
539
611
  # Set titles and labels with padding
540
- ax_cm.set_title(f"Confusion Matrix for '{name}'", pad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size + 2)
612
+ ax_cm.set_title(f"Confusion Matrix - {name}", pad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size + 2)
541
613
  ax_cm.set_xlabel(ax_cm.get_xlabel(), labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size)
542
614
  ax_cm.set_ylabel(ax_cm.get_ylabel(), labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size)
543
615
 
@@ -594,7 +666,7 @@ def multi_label_classification_metrics(
594
666
  ax_roc.plot(fpr, tpr, label=f'AUC = {auc:.2f}', color=format_config.ROC_PR_line) # Use config color
595
667
  ax_roc.plot([0, 1], [0, 1], 'k--')
596
668
 
597
- ax_roc.set_title(f'ROC Curve for "{name}"', pad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size + 2)
669
+ ax_roc.set_title(f'ROC Curve - {name}', pad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size + 2)
598
670
  ax_roc.set_xlabel('False Positive Rate', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size)
599
671
  ax_roc.set_ylabel('True Positive Rate', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size)
600
672
 
@@ -616,7 +688,7 @@ def multi_label_classification_metrics(
616
688
  ap_score = average_precision_score(true_i, prob_i)
617
689
  fig_pr, ax_pr = plt.subplots(figsize=CLASSIFICATION_PLOT_SIZE, dpi=DPI_value)
618
690
  ax_pr.plot(recall, precision, label=f'AP = {ap_score:.2f}', color=format_config.ROC_PR_line) # Use config color
619
- ax_pr.set_title(f'Precision-Recall Curve for "{name}"', pad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size + 2)
691
+ ax_pr.set_title(f'PR Curve - {name}', pad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size + 2)
620
692
  ax_pr.set_xlabel('Recall', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size)
621
693
  ax_pr.set_ylabel('Precision', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size)
622
694
 
@@ -659,10 +731,10 @@ def multi_label_classification_metrics(
659
731
  prob_true,
660
732
  marker='o',
661
733
  linewidth=2,
662
- label=f"Calibration for '{name}'",
734
+ label=f"Model Calibration",
663
735
  color=format_config.ROC_PR_line)
664
736
 
665
- ax_cal.set_title(f'Reliability Curve for "{name}"', pad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size + 2)
737
+ ax_cal.set_title(f'Calibration - {name}', pad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size + 2)
666
738
  ax_cal.set_xlabel('Mean Predicted Probability', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size)
667
739
  ax_cal.set_ylabel('Fraction of Positives', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size)
668
740
 
@@ -2,6 +2,7 @@ from ._analysis import (
2
2
  summarize_dataframe,
3
3
  show_null_columns,
4
4
  match_and_filter_columns_by_regex,
5
+ check_class_balance,
5
6
  )
6
7
 
7
8
  from ._cleaning import (
@@ -28,6 +29,7 @@ from ._features import (
28
29
  split_continuous_binary,
29
30
  split_continuous_categorical_targets,
30
31
  encode_categorical_features,
32
+ encode_classification_target,
31
33
  reconstruct_one_hot,
32
34
  reconstruct_binary,
33
35
  reconstruct_multibinary,
@@ -44,7 +46,6 @@ from .._core import _imprimir_disponibles
44
46
 
45
47
  __all__ = [
46
48
  "summarize_dataframe",
47
- "show_null_columns",
48
49
  "drop_constant_columns",
49
50
  "drop_rows_with_missing_data",
50
51
  "drop_columns_with_missing_data",
@@ -61,10 +62,13 @@ __all__ = [
61
62
  "plot_categorical_vs_target",
62
63
  "plot_correlation_heatmap",
63
64
  "encode_categorical_features",
65
+ "encode_classification_target",
64
66
  "finalize_feature_schema",
65
67
  "apply_feature_schema",
66
68
  "reconstruct_from_schema",
67
69
  "match_and_filter_columns_by_regex",
70
+ "show_null_columns",
71
+ "check_class_balance",
68
72
  "standardize_percentages",
69
73
  "reconstruct_one_hot",
70
74
  "reconstruct_binary",
@@ -16,6 +16,7 @@ __all__ = [
16
16
  "summarize_dataframe",
17
17
  "show_null_columns",
18
18
  "match_and_filter_columns_by_regex",
19
+ "check_class_balance",
19
20
  ]
20
21
 
21
22
 
@@ -212,3 +213,151 @@ def match_and_filter_columns_by_regex(
212
213
 
213
214
  return filtered_df, matched_columns
214
215
 
216
+
217
+ def check_class_balance(
218
+ df: pd.DataFrame,
219
+ target: Union[str, list[str]],
220
+ plot_to_dir: Optional[Union[str, Path]] = None,
221
+ plot_filename: str = "Class_Balance"
222
+ ) -> pd.DataFrame:
223
+ """
224
+ Analyzes the class balance for classification targets.
225
+
226
+ Handles two cases:
227
+ 1. Single Column (Binary/Multi-class): Calculates frequency of each unique value.
228
+ 2. List of Columns (Multi-label Binary): Calculates the frequency of positive values (1) per column.
229
+
230
+ Args:
231
+ df (pd.DataFrame): The input DataFrame.
232
+ target (str | list[str]): The target column name (for single/multi-class classification)
233
+ or list of column names (for multi-label-binary classification).
234
+ plot_to_dir (str | Path | None): Directory to save the balance plot.
235
+ plot_filename (str): Filename for the plot (without extension).
236
+
237
+ Returns:
238
+ pd.DataFrame: Summary table of counts and percentages.
239
+ """
240
+ # Early fail for empty DataFrame and handle list of targets with only one item
241
+ if df.empty:
242
+ _LOGGER.error("Input DataFrame is empty.")
243
+ raise ValueError()
244
+
245
+ if isinstance(target, list):
246
+ if len(target) == 0:
247
+ _LOGGER.error("Target list is empty.")
248
+ raise ValueError()
249
+ elif len(target) == 1:
250
+ target = target[0] # Simplify to single column case
251
+
252
+ # Case 1: Single Target (Binary or Multi-class)
253
+ if isinstance(target, str):
254
+ if target not in df.columns:
255
+ _LOGGER.error(f"Target column '{target}' not found in DataFrame.")
256
+ raise ValueError()
257
+
258
+ # Calculate stats
259
+ counts = df[target].value_counts(dropna=False).sort_index()
260
+ percents = df[target].value_counts(normalize=True, dropna=False).sort_index() * 100
261
+
262
+ summary = pd.DataFrame({
263
+ 'Count': counts,
264
+ 'Percentage': percents.round(2)
265
+ })
266
+ summary.index.name = "Class"
267
+
268
+ # Plotting
269
+ if plot_to_dir:
270
+ try:
271
+ save_path = make_fullpath(plot_to_dir, make=True, enforce="directory")
272
+
273
+ plt.figure(figsize=(10, 6))
274
+ # Convert index to str to handle numeric classes cleanly on x-axis
275
+ x_labels = summary.index.astype(str)
276
+ bars = plt.bar(x_labels, summary['Count'], color='lightgreen', edgecolor='black', alpha=0.7)
277
+
278
+ plt.title(f"Class Balance: {target}")
279
+ plt.xlabel(target)
280
+ plt.ylabel("Count")
281
+ plt.grid(axis='y', linestyle='--', alpha=0.5)
282
+
283
+ # Add percentage labels on top of bars
284
+ for bar, pct in zip(bars, summary['Percentage']):
285
+ height = bar.get_height()
286
+ plt.text(bar.get_x() + bar.get_width()/2, height,
287
+ f'{pct:.1f}%', ha='center', va='bottom', fontsize=10)
288
+
289
+ plt.tight_layout()
290
+ full_filename = sanitize_filename(plot_filename) + ".svg"
291
+ plt.savefig(save_path / full_filename, format='svg', bbox_inches="tight")
292
+ plt.close()
293
+ _LOGGER.info(f"Saved class balance plot: '{full_filename}'")
294
+ except Exception as e:
295
+ _LOGGER.error(f"Failed to plot class balance. Error: {e}")
296
+ plt.close()
297
+
298
+ return summary
299
+
300
+ # Case 2: Multi-label (List of binary columns)
301
+ elif isinstance(target, list):
302
+ missing_cols = [t for t in target if t not in df.columns]
303
+ if missing_cols:
304
+ _LOGGER.error(f"Target columns not found: {missing_cols}")
305
+ raise ValueError()
306
+
307
+ stats = []
308
+ for col in target:
309
+ # Assume 0/1 or False/True. Sum gives the count of positives.
310
+ # We enforce numeric to be safe
311
+ try:
312
+ numeric_series = pd.to_numeric(df[col], errors='coerce').fillna(0)
313
+ pos_count = numeric_series.sum()
314
+ total_count = len(df)
315
+ pct = (pos_count / total_count) * 100
316
+ except Exception:
317
+ _LOGGER.warning(f"Column '{col}' could not be processed as numeric. Assuming 0 positives.")
318
+ pos_count = 0
319
+ pct = 0.0
320
+
321
+ stats.append({
322
+ 'Label': col,
323
+ 'Positive_Count': int(pos_count),
324
+ 'Positive_Percentage': round(pct, 2)
325
+ })
326
+
327
+ summary = pd.DataFrame(stats).set_index("Label").sort_values("Positive_Percentage", ascending=True)
328
+
329
+ # Plotting
330
+ if plot_to_dir:
331
+ try:
332
+ save_path = make_fullpath(plot_to_dir, make=True, enforce="directory")
333
+
334
+ # Dynamic height for many labels
335
+ height = max(6, len(target) * 0.4)
336
+ plt.figure(figsize=(10, height))
337
+
338
+ bars = plt.barh(summary.index, summary['Positive_Percentage'], color='lightgreen', edgecolor='black', alpha=0.7)
339
+
340
+ plt.title(f"Multi-label Binary Class Balance")
341
+ plt.xlabel("Positive Class Percentage (%)")
342
+ plt.xlim(0, 100)
343
+ plt.grid(axis='x', linestyle='--', alpha=0.5)
344
+
345
+ # Add count labels at the end of bars
346
+ for bar, count in zip(bars, summary['Positive_Count']):
347
+ width = bar.get_width()
348
+ plt.text(width + 1, bar.get_y() + bar.get_height()/2, f'{width:.1f}%', ha='left', va='center', fontsize=9)
349
+
350
+ plt.tight_layout()
351
+ full_filename = sanitize_filename(plot_filename) + ".svg"
352
+ plt.savefig(save_path / full_filename, format='svg', bbox_inches="tight")
353
+ plt.close()
354
+ _LOGGER.info(f"Saved multi-label balance plot: '{full_filename}'")
355
+ except Exception as e:
356
+ _LOGGER.error(f"Failed to plot class balance. Error: {e}")
357
+ plt.close()
358
+
359
+ return summary.sort_values("Positive_Percentage", ascending=False)
360
+
361
+ else:
362
+ _LOGGER.error("Target must be a string or a list of strings.")
363
+ raise TypeError()
@@ -3,7 +3,10 @@ from pandas.api.types import is_numeric_dtype, is_object_dtype
3
3
  import numpy as np
4
4
  from typing import Any, Optional, Union
5
5
  import re
6
+ import json
7
+ from pathlib import Path
6
8
 
9
+ from ..path_manager import make_fullpath
7
10
  from .._core import get_logger
8
11
 
9
12
 
@@ -15,6 +18,7 @@ __all__ = [
15
18
  "split_continuous_binary",
16
19
  "split_continuous_categorical_targets",
17
20
  "encode_categorical_features",
21
+ "encode_classification_target",
18
22
  "reconstruct_one_hot",
19
23
  "reconstruct_binary",
20
24
  "reconstruct_multibinary",
@@ -263,6 +267,78 @@ def encode_categorical_features(
263
267
  return df_encoded, mappings
264
268
 
265
269
 
270
+ def encode_classification_target(
271
+ df: pd.DataFrame,
272
+ target_col: str,
273
+ save_dir: Union[str, Path],
274
+ verbose: int = 2
275
+ ) -> tuple[pd.DataFrame, dict[str, int]]:
276
+ """
277
+ Encodes a target classification column into integers (0, 1, 2...) and saves the mapping to a JSON file.
278
+
279
+ This ensures that the target variable is in the correct numeric format for training
280
+ and provides a persistent artifact (the JSON file) to map predictions back to labels later.
281
+
282
+ Args:
283
+ df (pd.DataFrame): Input DataFrame.
284
+ target_col (str): Name of the target column to encode.
285
+ save_dir (str | Path): Directory where the class map JSON will be saved.
286
+ verbose (int): Verbosity level for logging.
287
+
288
+ Returns:
289
+ Tuple (Dataframe, Dict):
290
+ - A new DataFrame with the target column encoded as integers.
291
+ - The dictionary mapping original labels (str) to integers (int).
292
+ """
293
+ if target_col not in df.columns:
294
+ _LOGGER.error(f"Target column '{target_col}' not found in DataFrame.")
295
+ raise ValueError()
296
+
297
+ # Validation: Check for missing values in target
298
+ if df[target_col].isnull().any():
299
+ n_missing = df[target_col].isnull().sum()
300
+ _LOGGER.error(f"Target column '{target_col}' contains {n_missing} missing values. Please handle them before encoding.")
301
+ raise ValueError()
302
+
303
+ # Ensure directory exists
304
+ save_path = make_fullpath(save_dir, make=True, enforce="directory")
305
+ file_path = save_path / "class_map.json"
306
+
307
+ # Get unique values and sort them to ensure deterministic encoding (0, 1, 2...)
308
+ # Convert to string to ensure the keys in JSON are strings
309
+ unique_labels = sorted(df[target_col].astype(str).unique())
310
+
311
+ # Create mapping: { Label -> Integer }
312
+ class_map = {label: idx for idx, label in enumerate(unique_labels)}
313
+
314
+ # Apply mapping
315
+ # cast column to string to match the keys in class_map
316
+ df_encoded = df.copy()
317
+ df_encoded[target_col] = df_encoded[target_col].astype(str).map(class_map)
318
+
319
+ # Save to JSON
320
+ try:
321
+ with open(file_path, 'w', encoding='utf-8') as f:
322
+ json.dump(class_map, f, indent=4)
323
+
324
+ if verbose >= 2:
325
+ _LOGGER.info(f"Class mapping saved to: '{file_path}'")
326
+
327
+ if verbose >= 3:
328
+ _LOGGER.info(f"Target '{target_col}' encoded with {len(class_map)} classes.")
329
+ # Print a preview
330
+ if len(class_map) <= 10:
331
+ print(f" Mapping: {class_map}")
332
+ else:
333
+ print(f" Mapping (first 5): {dict(list(class_map.items())[:5])} ...")
334
+
335
+ except Exception as e:
336
+ _LOGGER.error(f"Failed to save class map JSON. Error: {e}")
337
+ raise IOError()
338
+
339
+ return df_encoded, class_map
340
+
341
+
266
342
  def reconstruct_one_hot(
267
343
  df: pd.DataFrame,
268
344
  features_to_reconstruct: list[Union[str, tuple[str, Optional[str]]]],
@@ -306,6 +306,7 @@ class _EvaluationConfig:
306
306
  LOSS_PLOT_LEGEND_SIZE = 24
307
307
  # CM settings
308
308
  CM_SIZE = (9, 8) # used for multi label binary classification confusion matrix
309
+ NAME_LIMIT = 20 # max number of characters for feature/label names in plots
309
310
 
310
311
  class _OneHotOtherPlaceholder:
311
312
  """Used internally by GUI_tools."""
@@ -0,0 +1,19 @@
1
+ from ._single_resampling import (
2
+ DragonResampler,
3
+ )
4
+
5
+ from ._multi_resampling import (
6
+ DragonMultiResampler,
7
+ )
8
+
9
+ from .._core import _imprimir_disponibles
10
+
11
+
12
+ __all__ = [
13
+ "DragonResampler",
14
+ "DragonMultiResampler",
15
+ ]
16
+
17
+
18
+ def info():
19
+ _imprimir_disponibles(__all__)
@@ -0,0 +1,49 @@
1
+ import polars as pl
2
+ import pandas as pd
3
+ from typing import Union
4
+ from abc import ABC, abstractmethod
5
+
6
+
7
+ __all__ = ["_DragonBaseResampler"]
8
+
9
+
10
+ class _DragonBaseResampler(ABC):
11
+ """
12
+ Base class for Dragon resamplers handling common I/O and state.
13
+ """
14
+ def __init__(self,
15
+ return_pandas: bool = False,
16
+ seed: int = 42):
17
+ self.return_pandas = return_pandas
18
+ self.seed = seed
19
+
20
+ def _convert_to_polars(self, df: Union[pd.DataFrame, pl.DataFrame]) -> pl.DataFrame:
21
+ """Standardizes input to Polars DataFrame."""
22
+ if isinstance(df, pd.DataFrame):
23
+ return pl.from_pandas(df)
24
+ return df
25
+
26
+ def _convert_to_pandas(self, df: pl.DataFrame) -> pd.DataFrame:
27
+ """Converts Polars DataFrame back to Pandas."""
28
+ return df.to_pandas(use_pyarrow_extension_array=False)
29
+
30
+ def _process_return(self, df: pl.DataFrame, shuffle: bool = True) -> Union[pd.DataFrame, pl.DataFrame]:
31
+ """
32
+ Finalizes the DataFrame:
33
+ 1. Global Shuffle (optional but recommended for ML).
34
+ 2. Conversion to Pandas (if requested).
35
+ """
36
+ if shuffle:
37
+ # Random shuffle of the final dataset
38
+ df = df.sample(fraction=1.0, seed=self.seed, with_replacement=False)
39
+
40
+ if self.return_pandas:
41
+ return self._convert_to_pandas(df)
42
+ return df
43
+
44
+ @abstractmethod
45
+ def describe_balance(self, df: Union[pd.DataFrame, pl.DataFrame], top_n: int = 10) -> None:
46
+ """
47
+ Prints a statistical summary of the target distribution.
48
+ """
49
+ pass