dragon-ml-toolbox 20.5.0__tar.gz → 20.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. {dragon_ml_toolbox-20.5.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-20.7.0}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +1 -0
  4. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ETL_cleaning/__init__.py +3 -1
  5. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ETL_cleaning/_clean_tools.py +109 -0
  6. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ETL_cleaning/_dragon_cleaner.py +72 -19
  7. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_configuration/_metrics.py +16 -8
  8. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_evaluation/_classification.py +76 -30
  9. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/keys/_keys.py +1 -0
  10. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/utilities/__init__.py +10 -0
  11. dragon_ml_toolbox-20.7.0/ml_tools/utilities/_translate.py +292 -0
  12. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/pyproject.toml +1 -1
  13. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/LICENSE +0 -0
  14. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/LICENSE-THIRD-PARTY.md +0 -0
  15. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/README.md +0 -0
  16. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  17. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  18. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  19. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ETL_cleaning/_basic_clean.py +0 -0
  20. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ETL_engineering/__init__.py +0 -0
  21. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ETL_engineering/_dragon_engineering.py +0 -0
  22. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ETL_engineering/_transforms.py +0 -0
  23. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/GUI_tools/_GUI_tools.py +0 -0
  24. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/GUI_tools/__init__.py +0 -0
  25. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/IO_tools/_IO_loggers.py +0 -0
  26. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/IO_tools/_IO_save_load.py +0 -0
  27. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/IO_tools/_IO_utils.py +0 -0
  28. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/IO_tools/__init__.py +0 -0
  29. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/MICE/_MICE_imputation.py +0 -0
  30. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/MICE/__init__.py +0 -0
  31. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/MICE/_dragon_mice.py +0 -0
  32. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_callbacks/__init__.py +0 -0
  33. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_callbacks/_base.py +0 -0
  34. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_callbacks/_checkpoint.py +0 -0
  35. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_callbacks/_early_stop.py +0 -0
  36. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_callbacks/_scheduler.py +0 -0
  37. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_chain/__init__.py +0 -0
  38. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_chain/_chaining_tools.py +0 -0
  39. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_chain/_dragon_chain.py +0 -0
  40. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_chain/_update_schema.py +0 -0
  41. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_configuration/__init__.py +0 -0
  42. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_configuration/_base_model_config.py +0 -0
  43. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_configuration/_finalize.py +0 -0
  44. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_configuration/_models.py +0 -0
  45. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_configuration/_training.py +0 -0
  46. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_datasetmaster/__init__.py +0 -0
  47. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_datasetmaster/_base_datasetmaster.py +0 -0
  48. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_datasetmaster/_datasetmaster.py +0 -0
  49. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_datasetmaster/_sequence_datasetmaster.py +0 -0
  50. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_datasetmaster/_vision_datasetmaster.py +0 -0
  51. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_evaluation/__init__.py +0 -0
  52. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_evaluation/_feature_importance.py +0 -0
  53. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_evaluation/_loss.py +0 -0
  54. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_evaluation/_regression.py +0 -0
  55. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_evaluation/_sequence.py +0 -0
  56. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_evaluation/_vision.py +0 -0
  57. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_evaluation_captum/_ML_evaluation_captum.py +0 -0
  58. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_evaluation_captum/__init__.py +0 -0
  59. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_finalize_handler/_ML_finalize_handler.py +0 -0
  60. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_finalize_handler/__init__.py +0 -0
  61. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_inference/__init__.py +0 -0
  62. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_inference/_base_inference.py +0 -0
  63. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_inference/_chain_inference.py +0 -0
  64. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_inference/_dragon_inference.py +0 -0
  65. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_inference/_multi_inference.py +0 -0
  66. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_inference_sequence/__init__.py +0 -0
  67. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_inference_sequence/_sequence_inference.py +0 -0
  68. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_inference_vision/__init__.py +0 -0
  69. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_inference_vision/_vision_inference.py +0 -0
  70. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_models/__init__.py +0 -0
  71. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_models/_base_mlp_attention.py +0 -0
  72. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_models/_base_save_load.py +0 -0
  73. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_models/_dragon_autoint.py +0 -0
  74. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_models/_dragon_gate.py +0 -0
  75. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_models/_dragon_node.py +0 -0
  76. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_models/_dragon_tabnet.py +0 -0
  77. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_models/_dragon_tabular.py +0 -0
  78. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_models/_mlp_attention.py +0 -0
  79. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_models/_models_advanced_helpers.py +0 -0
  80. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_models_sequence/__init__.py +0 -0
  81. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_models_sequence/_sequence_models.py +0 -0
  82. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_models_vision/__init__.py +0 -0
  83. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_models_vision/_base_wrapper.py +0 -0
  84. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_models_vision/_image_classification.py +0 -0
  85. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_models_vision/_image_segmentation.py +0 -0
  86. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_models_vision/_object_detection.py +0 -0
  87. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_optimization/__init__.py +0 -0
  88. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_optimization/_multi_dragon.py +0 -0
  89. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_optimization/_single_dragon.py +0 -0
  90. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_optimization/_single_manual.py +0 -0
  91. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_scaler/_ML_scaler.py +0 -0
  92. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_scaler/__init__.py +0 -0
  93. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_trainer/__init__.py +0 -0
  94. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_trainer/_base_trainer.py +0 -0
  95. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_trainer/_dragon_detection_trainer.py +0 -0
  96. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_trainer/_dragon_sequence_trainer.py +0 -0
  97. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_trainer/_dragon_trainer.py +0 -0
  98. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_utilities/__init__.py +0 -0
  99. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_utilities/_artifact_finder.py +0 -0
  100. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_utilities/_inspection.py +0 -0
  101. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_utilities/_train_tools.py +0 -0
  102. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_vision_transformers/__init__.py +0 -0
  103. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_vision_transformers/_core_transforms.py +0 -0
  104. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ML_vision_transformers/_offline_augmentation.py +0 -0
  105. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/PSO_optimization/_PSO.py +0 -0
  106. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/PSO_optimization/__init__.py +0 -0
  107. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/SQL/__init__.py +0 -0
  108. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/SQL/_dragon_SQL.py +0 -0
  109. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/VIF/_VIF_factor.py +0 -0
  110. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/VIF/__init__.py +0 -0
  111. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/__init__.py +0 -0
  112. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/_core/__init__.py +0 -0
  113. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/_core/_logger.py +0 -0
  114. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/_core/_schema_load_ops.py +0 -0
  115. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/_core/_script_info.py +0 -0
  116. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/constants.py +0 -0
  117. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/data_exploration/__init__.py +0 -0
  118. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/data_exploration/_analysis.py +0 -0
  119. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/data_exploration/_cleaning.py +0 -0
  120. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/data_exploration/_features.py +0 -0
  121. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/data_exploration/_plotting.py +0 -0
  122. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/data_exploration/_schema_ops.py +0 -0
  123. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ensemble_evaluation/__init__.py +0 -0
  124. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ensemble_evaluation/_ensemble_evaluation.py +0 -0
  125. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ensemble_inference/__init__.py +0 -0
  126. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ensemble_inference/_ensemble_inference.py +0 -0
  127. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ensemble_learning/__init__.py +0 -0
  128. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/ensemble_learning/_ensemble_learning.py +0 -0
  129. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/excel_handler/__init__.py +0 -0
  130. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/excel_handler/_excel_handler.py +0 -0
  131. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/keys/__init__.py +0 -0
  132. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/math_utilities/__init__.py +0 -0
  133. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/math_utilities/_math_utilities.py +0 -0
  134. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/optimization_tools/__init__.py +0 -0
  135. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/optimization_tools/_optimization_bounds.py +0 -0
  136. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/optimization_tools/_optimization_plots.py +0 -0
  137. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/path_manager/__init__.py +0 -0
  138. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/path_manager/_dragonmanager.py +0 -0
  139. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/path_manager/_path_tools.py +0 -0
  140. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/plot_fonts/__init__.py +0 -0
  141. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/plot_fonts/_plot_fonts.py +0 -0
  142. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/schema/__init__.py +0 -0
  143. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/schema/_feature_schema.py +0 -0
  144. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/schema/_gui_schema.py +0 -0
  145. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/serde/__init__.py +0 -0
  146. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/serde/_serde.py +0 -0
  147. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/utilities/_utility_save_load.py +0 -0
  148. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/ml_tools/utilities/_utility_tools.py +0 -0
  149. {dragon_ml_toolbox-20.5.0 → dragon_ml_toolbox-20.7.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 20.5.0
3
+ Version: 20.7.0
4
4
  Summary: Complete pipelines and helper tools for data science and machine learning projects.
5
5
  Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 20.5.0
3
+ Version: 20.7.0
4
4
  Summary: Complete pipelines and helper tools for data science and machine learning projects.
5
5
  Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -142,5 +142,6 @@ ml_tools/schema/_gui_schema.py
142
142
  ml_tools/serde/__init__.py
143
143
  ml_tools/serde/_serde.py
144
144
  ml_tools/utilities/__init__.py
145
+ ml_tools/utilities/_translate.py
145
146
  ml_tools/utilities/_utility_save_load.py
146
147
  ml_tools/utilities/_utility_tools.py
@@ -10,7 +10,8 @@ from ._dragon_cleaner import (
10
10
  )
11
11
 
12
12
  from ._clean_tools import (
13
- save_unique_values
13
+ save_unique_values,
14
+ save_category_counts,
14
15
  )
15
16
 
16
17
  from .._core import _imprimir_disponibles
@@ -20,6 +21,7 @@ __all__ = [
20
21
  "DragonColumnCleaner",
21
22
  "DragonDataFrameCleaner",
22
23
  "save_unique_values",
24
+ "save_category_counts",
23
25
  "basic_clean",
24
26
  "basic_clean_drop",
25
27
  "drop_macro_polars",
@@ -13,6 +13,7 @@ _LOGGER = get_logger("ETL Clean Tools")
13
13
 
14
14
  __all__ = [
15
15
  "save_unique_values",
16
+ "save_category_counts",
16
17
  ]
17
18
 
18
19
 
@@ -126,3 +127,111 @@ def save_unique_values(csv_path_or_df: Union[str, Path, pl.DataFrame],
126
127
  counter += 1
127
128
 
128
129
  _LOGGER.info(f"{counter} files of unique values created.")
130
+
131
+
132
+ ################ Category Counts per column #################
133
+ def save_category_counts(csv_path_or_df: Union[str, Path, pl.DataFrame],
134
+ output_dir: Union[str, Path],
135
+ use_columns: Optional[list[str]] = None,
136
+ verbose: bool = False,
137
+ keep_column_order: bool = True) -> None:
138
+ """
139
+ Calculates the frequency and percentage of each unique value in the specified columns
140
+ and saves the distribution report to a text file.
141
+
142
+ Useful for checking class balance or identifying rare categories.
143
+
144
+ Args:
145
+ csv_path_or_df (str | Path | pl.DataFrame):
146
+ The file path to the input CSV file or a Polars DataFrame.
147
+ output_dir (str | Path):
148
+ The directory where the report files will be saved.
149
+ use_columns (List[str] | None):
150
+ Columns to analyze. If None, all columns are processed.
151
+ verbose (bool):
152
+ If True, prints progress info.
153
+ keep_column_order (bool):
154
+ If True, prepends a numeric prefix to filenames to maintain order.
155
+ """
156
+ # 1. Handle Input
157
+ if isinstance(csv_path_or_df, pl.DataFrame):
158
+ df = csv_path_or_df
159
+ if use_columns:
160
+ valid_cols = [c for c in use_columns if c in df.columns]
161
+ if not valid_cols:
162
+ _LOGGER.error("None of the specified columns in 'use_columns' exist in the provided DataFrame.")
163
+ raise ValueError()
164
+ df = df.select(valid_cols)
165
+ else:
166
+ csv_path = make_fullpath(input_path=csv_path_or_df, enforce="file")
167
+ df = load_dataframe(df_path=csv_path, use_columns=use_columns, kind="polars", all_strings=True)[0]
168
+
169
+ output_path = make_fullpath(input_path=output_dir, make=True, enforce='directory')
170
+ total_rows = df.height
171
+
172
+ if total_rows == 0:
173
+ _LOGGER.warning("Input DataFrame is empty. No counts to save.")
174
+ return
175
+
176
+ counter = 0
177
+
178
+ # 2. Process Each Column
179
+ for i, col_name in enumerate(df.columns):
180
+ try:
181
+ # Group by, count, and calculate percentage
182
+ # We treat nulls as a category here to see missing data frequency
183
+ stats = (
184
+ df.select(pl.col(col_name))
185
+ .group_by(col_name, maintain_order=False)
186
+ .len(name="count")
187
+ .with_columns(
188
+ (pl.col("count") / total_rows * 100).alias("pct")
189
+ )
190
+ .sort("count", descending=True)
191
+ )
192
+
193
+ # Collect to python list of dicts for writing
194
+ rows = stats.iter_rows(named=True)
195
+ unique_count = stats.height
196
+
197
+ # Check thresholds for warning
198
+ is_high_cardinality = (unique_count > 300) or ((unique_count / total_rows) > 0.5)
199
+
200
+ except Exception:
201
+ _LOGGER.error(f"Could not calculate counts for column '{col_name}'.")
202
+ continue
203
+
204
+ # 3. Write to File
205
+ sanitized_name = sanitize_filename(col_name)
206
+ if not sanitized_name.strip('_'):
207
+ sanitized_name = f'column_{i}'
208
+
209
+ prefix = f"{i + 1}_" if keep_column_order else ''
210
+ file_path = output_path / f"{prefix}{sanitized_name}_counts.txt"
211
+
212
+ try:
213
+ with open(file_path, 'w', encoding='utf-8') as f:
214
+ f.write(f"# Distribution for column: '{col_name}'\n")
215
+ f.write(f"# Total Rows: {total_rows} | Unique Values: {unique_count}\n")
216
+
217
+ if is_high_cardinality:
218
+ f.write(f"# WARNING: High cardinality detected (Unique/Total ratio: {unique_count/total_rows:.2%}).\n")
219
+
220
+ f.write("-" * 65 + "\n")
221
+ f.write(f"{'Count':<10} | {'Percentage':<12} | {'Value'}\n")
222
+ f.write("-" * 65 + "\n")
223
+
224
+ for row in rows:
225
+ val = str(row[col_name])
226
+ count = row["count"]
227
+ pct = row["pct"]
228
+ f.write(f"{count:<10} | {pct:>10.2f}% | {val}\n")
229
+
230
+ except IOError:
231
+ _LOGGER.exception(f"Error writing to file {file_path}.")
232
+ else:
233
+ if verbose:
234
+ print(f" Saved distribution for '{col_name}'.")
235
+ counter += 1
236
+
237
+ _LOGGER.info(f"{counter} distribution files created.")
@@ -1,13 +1,13 @@
1
1
  import polars as pl
2
2
  from pathlib import Path
3
- from typing import Union
3
+ from typing import Union, Optional
4
4
 
5
5
  from ..utilities import save_dataframe_filename, load_dataframe
6
6
 
7
7
  from .._core import get_logger
8
8
  from ..path_manager import make_fullpath
9
9
 
10
- from ._clean_tools import save_unique_values
10
+ from ._clean_tools import save_unique_values, save_category_counts
11
11
 
12
12
 
13
13
  _LOGGER = get_logger("DragonCleaner")
@@ -33,12 +33,18 @@ class DragonColumnCleaner:
33
33
  """
34
34
  def __init__(self,
35
35
  column_name: str,
36
- rules: Union[dict[str, Union[str, None]], dict[str, str]],
36
+ exact_matches: Optional[Union[dict[str, Union[str, None]], dict[str, str]]] = None,
37
+ rules: Optional[Union[dict[str, Union[str, None]], dict[str, str]]] = None,
37
38
  case_insensitive: bool = False):
38
39
  """
39
40
  Args:
40
41
  column_name (str):
41
42
  The name of the column to be cleaned.
43
+ exact_matches (Dict[str, str | None]):
44
+ A dictionary of EXACT string matches to replacement strings.
45
+ - Uses a hash map, which is significantly faster than regex.
46
+ - Used for simple 1-to-1 mappings (e.g., {'Aluminum': 'Al'}).
47
+ - Runs BEFORE the regex rules.
42
48
  rules (Dict[str, str | None]):
43
49
  A dictionary of regex patterns to replacement strings.
44
50
  - Replacement can be None to indicate that matching values should be converted to null.
@@ -61,25 +67,47 @@ class DragonColumnCleaner:
61
67
  if not isinstance(column_name, str) or not column_name:
62
68
  _LOGGER.error("The 'column_name' must be a non-empty string.")
63
69
  raise TypeError()
64
- if not isinstance(rules, dict):
65
- _LOGGER.error("The 'rules' argument must be a dictionary.")
66
- raise TypeError()
67
- # validate rules
68
- for pattern, replacement in rules.items():
69
- if not isinstance(pattern, str):
70
- _LOGGER.error("All keys in 'rules' must be strings representing regex patterns.")
70
+
71
+ # Validate Regex Rules
72
+ if rules is not None:
73
+ if not isinstance(rules, dict):
74
+ _LOGGER.error("The 'rules' argument must be a dictionary.")
71
75
  raise TypeError()
72
- if replacement is not None and not isinstance(replacement, str):
73
- _LOGGER.error("All values in 'rules' must be strings or None (for nullification).")
76
+ for pattern, replacement in rules.items():
77
+ if not isinstance(pattern, str):
78
+ _LOGGER.error("All keys in 'rules' must be strings representing regex patterns.")
79
+ raise TypeError()
80
+ if replacement is not None and not isinstance(replacement, str):
81
+ _LOGGER.error("All values in 'rules' must be strings or None (for nullification).")
82
+ raise TypeError()
83
+
84
+ # Validate Exact Matches
85
+ if exact_matches is not None:
86
+ if not isinstance(exact_matches, dict):
87
+ _LOGGER.error("The 'exact_matches' argument must be a dictionary.")
74
88
  raise TypeError()
89
+ for key, val in exact_matches.items():
90
+ if not isinstance(key, str):
91
+ _LOGGER.error("All keys in 'exact_matches' must be strings.")
92
+ raise TypeError()
93
+ if val is not None and not isinstance(val, str):
94
+ _LOGGER.error("All values in 'exact_matches' must be strings or None.")
95
+ raise TypeError()
96
+
97
+ # Raise if both are None or empty
98
+ if not rules and not exact_matches:
99
+ _LOGGER.error("At least one of 'rules' or 'exact_matches' must be provided.")
100
+ raise ValueError()
75
101
 
76
102
  self.column_name = column_name
77
- self.rules = rules
103
+ self.rules = rules if rules else {}
104
+ self.exact_matches = exact_matches if exact_matches else {}
78
105
  self.case_insensitive = case_insensitive
79
106
 
80
107
  def preview(self,
81
108
  csv_path: Union[str, Path],
82
109
  report_dir: Union[str, Path],
110
+ show_distribution: bool = True,
83
111
  add_value_separator: bool=False,
84
112
  rule_batch_size: int = 150):
85
113
  """
@@ -90,6 +118,8 @@ class DragonColumnCleaner:
90
118
  The path to the CSV file containing the data to clean.
91
119
  report_dir (str | Path):
92
120
  The directory where the preview report will be saved.
121
+ show_distribution (bool):
122
+ If True, generates a category count report for the column after cleaning.
93
123
  add_value_separator (bool):
94
124
  If True, adds a separator line between each unique value in the report.
95
125
  rule_batch_size (int):
@@ -101,13 +131,21 @@ class DragonColumnCleaner:
101
131
  preview_cleaner = DragonDataFrameCleaner(cleaners=[self])
102
132
  df_preview = preview_cleaner.clean(df, rule_batch_size=rule_batch_size)
103
133
 
104
- # Apply cleaning rules to a copy of the column for preview
134
+ # Apply cleaning rules and save reports
105
135
  save_unique_values(csv_path_or_df=df_preview,
106
136
  output_dir=report_dir,
107
137
  use_columns=[self.column_name],
108
138
  verbose=False,
109
139
  keep_column_order=False,
110
140
  add_value_separator=add_value_separator)
141
+
142
+ # Optionally save category counts
143
+ if show_distribution:
144
+ save_category_counts(csv_path_or_df=df_preview,
145
+ output_dir=report_dir,
146
+ use_columns=[self.column_name],
147
+ verbose=False,
148
+ keep_column_order=False)
111
149
 
112
150
 
113
151
  class DragonDataFrameCleaner:
@@ -181,16 +219,23 @@ class DragonDataFrameCleaner:
181
219
  for cleaner in self.cleaners:
182
220
  col_name = cleaner.column_name
183
221
 
184
- # Get all rules as a list of items
222
+ # Start expression for this batch
223
+ col_expr = pl.col(col_name).cast(pl.String)
224
+
225
+ # --- PHASE 1: EXACT MATCHES ---
226
+ # Apply dictionary-based replacement first (faster than regex)
227
+ if cleaner.exact_matches:
228
+ # 'replace' handles dictionary mapping safely. If value is mapped to None, it becomes null.
229
+ col_expr = col_expr.replace(cleaner.exact_matches)
230
+
231
+ # --- PHASE 2: REGEX PATTERNS ---
185
232
  all_rules = list(cleaner.rules.items())
186
233
 
187
234
  # Process in batches of 'rule_batch_size'
188
235
  for i in range(0, len(all_rules), rule_batch_size):
189
236
  rule_batch = all_rules[i : i + rule_batch_size]
190
237
 
191
- # Start expression for this batch
192
- col_expr = pl.col(col_name).cast(pl.String)
193
-
238
+ # continue chaining operations on the same col_expr
194
239
  for pattern, replacement in rule_batch:
195
240
  final_pattern = f"(?i){pattern}" if cleaner.case_insensitive else pattern
196
241
 
@@ -202,6 +247,15 @@ class DragonDataFrameCleaner:
202
247
  col_expr = col_expr.str.replace_all(final_pattern, replacement)
203
248
 
204
249
  # Apply this batch of rules to the LazyFrame
250
+ # apply partially here to keep the logical plan size under control
251
+ final_lf = final_lf.with_columns(col_expr.alias(col_name))
252
+
253
+ # Reset col_expr for the next batch, but pointing to the 'new' column
254
+ # This ensures the next batch works on the result of the previous batch
255
+ col_expr = pl.col(col_name)
256
+
257
+ # If we had exact matches but NO regex rules, we still need to apply the expression once
258
+ if cleaner.exact_matches and not all_rules:
205
259
  final_lf = final_lf.with_columns(col_expr.alias(col_name))
206
260
 
207
261
  # 3. Collect Results
@@ -242,4 +296,3 @@ class DragonDataFrameCleaner:
242
296
  save_dataframe_filename(df=df_clean, save_dir=output_filepath.parent, filename=output_filepath.name)
243
297
 
244
298
  return None
245
-
@@ -1,4 +1,4 @@
1
- from typing import Union
1
+ from typing import Union, Literal
2
2
 
3
3
 
4
4
  __all__ = [
@@ -26,7 +26,7 @@ class _BaseClassificationFormat:
26
26
  def __init__(self,
27
27
  cmap: str="BuGn",
28
28
  ROC_PR_line: str='darkorange',
29
- calibration_bins: int=15,
29
+ calibration_bins: Union[int, Literal['auto']]='auto',
30
30
  xtick_size: int=22,
31
31
  ytick_size: int=22,
32
32
  legend_size: int=26,
@@ -46,8 +46,8 @@ class _BaseClassificationFormat:
46
46
  - Common color names: 'darkorange', 'cornflowerblue', 'crimson', 'forestgreen'
47
47
  - Hex codes: '#FF6347', '#4682B4'
48
48
 
49
- calibration_bins (int): The number of bins to use when
50
- creating the calibration (reliability) plot.
49
+ calibration_bins (int | 'auto'): The number of bins to use when creating the calibration (reliability) plot. If 'auto', the number will be dynamically determined based on the number of samples.
50
+ - Typical int values: 10, 15, 20
51
51
 
52
52
  font_size (int): The base font size to apply to the plots.
53
53
 
@@ -97,6 +97,7 @@ class _BaseMultiLabelFormat:
97
97
  def __init__(self,
98
98
  cmap: str = "BuGn",
99
99
  ROC_PR_line: str='darkorange',
100
+ calibration_bins: Union[int, Literal['auto']]='auto',
100
101
  font_size: int = 25,
101
102
  xtick_size: int=20,
102
103
  ytick_size: int=20,
@@ -115,6 +116,9 @@ class _BaseMultiLabelFormat:
115
116
  - Common color names: 'darkorange', 'cornflowerblue', 'crimson', 'forestgreen'
116
117
  - Hex codes: '#FF6347', '#4682B4'
117
118
 
119
+ calibration_bins (int | 'auto'): The number of bins to use when creating the calibration (reliability) plots for each label. If 'auto', the number will be dynamically determined based on the number of samples.
120
+ - Typical int values: 10, 15, 20
121
+
118
122
  font_size (int): The base font size to apply to the plots.
119
123
 
120
124
  xtick_size (int): Font size for x-axis tick labels.
@@ -133,6 +137,7 @@ class _BaseMultiLabelFormat:
133
137
  """
134
138
  self.cmap = cmap
135
139
  self.ROC_PR_line = ROC_PR_line
140
+ self.calibration_bins = calibration_bins
136
141
  self.font_size = font_size
137
142
  self.xtick_size = xtick_size
138
143
  self.ytick_size = ytick_size
@@ -142,6 +147,7 @@ class _BaseMultiLabelFormat:
142
147
  parts = [
143
148
  f"cmap='{self.cmap}'",
144
149
  f"ROC_PR_line='{self.ROC_PR_line}'",
150
+ f"calibration_bins={self.calibration_bins}",
145
151
  f"font_size={self.font_size}",
146
152
  f"xtick_size={self.xtick_size}",
147
153
  f"ytick_size={self.ytick_size}",
@@ -416,7 +422,7 @@ class FormatBinaryClassificationMetrics(_BaseClassificationFormat):
416
422
  def __init__(self,
417
423
  cmap: str="BuGn",
418
424
  ROC_PR_line: str='darkorange',
419
- calibration_bins: int=15,
425
+ calibration_bins: Union[int, Literal['auto']]='auto',
420
426
  font_size: int=26,
421
427
  xtick_size: int=22,
422
428
  ytick_size: int=22,
@@ -440,7 +446,7 @@ class FormatMultiClassClassificationMetrics(_BaseClassificationFormat):
440
446
  def __init__(self,
441
447
  cmap: str="BuGn",
442
448
  ROC_PR_line: str='darkorange',
443
- calibration_bins: int=15,
449
+ calibration_bins: Union[int, Literal['auto']]='auto',
444
450
  font_size: int=26,
445
451
  xtick_size: int=22,
446
452
  ytick_size: int=22,
@@ -464,7 +470,7 @@ class FormatBinaryImageClassificationMetrics(_BaseClassificationFormat):
464
470
  def __init__(self,
465
471
  cmap: str="BuGn",
466
472
  ROC_PR_line: str='darkorange',
467
- calibration_bins: int=15,
473
+ calibration_bins: Union[int, Literal['auto']]='auto',
468
474
  font_size: int=26,
469
475
  xtick_size: int=22,
470
476
  ytick_size: int=22,
@@ -488,7 +494,7 @@ class FormatMultiClassImageClassificationMetrics(_BaseClassificationFormat):
488
494
  def __init__(self,
489
495
  cmap: str="BuGn",
490
496
  ROC_PR_line: str='darkorange',
491
- calibration_bins: int=15,
497
+ calibration_bins: Union[int, Literal['auto']]='auto',
492
498
  font_size: int=26,
493
499
  xtick_size: int=22,
494
500
  ytick_size: int=22,
@@ -513,6 +519,7 @@ class FormatMultiLabelBinaryClassificationMetrics(_BaseMultiLabelFormat):
513
519
  def __init__(self,
514
520
  cmap: str = "BuGn",
515
521
  ROC_PR_line: str='darkorange',
522
+ calibration_bins: Union[int, Literal['auto']]='auto',
516
523
  font_size: int = 25,
517
524
  xtick_size: int=20,
518
525
  ytick_size: int=20,
@@ -520,6 +527,7 @@ class FormatMultiLabelBinaryClassificationMetrics(_BaseMultiLabelFormat):
520
527
  ) -> None:
521
528
  super().__init__(cmap=cmap,
522
529
  ROC_PR_line=ROC_PR_line,
530
+ calibration_bins=calibration_bins,
523
531
  font_size=font_size,
524
532
  xtick_size=xtick_size,
525
533
  ytick_size=ytick_size,
@@ -2,7 +2,7 @@ import numpy as np
2
2
  import pandas as pd
3
3
  import matplotlib.pyplot as plt
4
4
  import seaborn as sns
5
- from sklearn.calibration import CalibrationDisplay
5
+ from sklearn.calibration import calibration_curve
6
6
  from sklearn.metrics import (
7
7
  classification_report,
8
8
  ConfusionMatrixDisplay,
@@ -378,42 +378,42 @@ def classification_metrics(save_dir: Union[str, Path],
378
378
 
379
379
  # --- Save Calibration Plot ---
380
380
  fig_cal, ax_cal = plt.subplots(figsize=CLASSIFICATION_PLOT_SIZE, dpi=DPI_value)
381
+
382
+ user_chosen_bins = format_config.calibration_bins
383
+
384
+ # --- Automate Bin Selection ---
385
+ if not isinstance(user_chosen_bins, int) or user_chosen_bins <= 0:
386
+ # Determine bins based on number of samples
387
+ n_samples = y_true.shape[0]
388
+ if n_samples < 200:
389
+ dynamic_bins = 5
390
+ elif n_samples < 1000:
391
+ dynamic_bins = 10
392
+ else:
393
+ dynamic_bins = 15
394
+ else:
395
+ dynamic_bins = user_chosen_bins
396
+
397
+ # --- Step 1: Get binned data directly ---
398
+ # calculates reliability diagram data without needing a temporary plot
399
+ prob_true, prob_pred = calibration_curve(y_true_binary, y_score, n_bins=dynamic_bins)
381
400
 
382
- # --- Step 1: Get binned data *without* plotting ---
383
- with plt.ioff(): # Suppress showing the temporary plot
384
- fig_temp, ax_temp = plt.subplots()
385
- cal_display_temp = CalibrationDisplay.from_predictions(
386
- y_true_binary, # Use binarized labels
387
- y_score,
388
- n_bins=format_config.calibration_bins,
389
- ax=ax_temp,
390
- name="temp" # Add a name to suppress potential warnings
391
- )
392
- # Get the x, y coordinates of the binned data
393
- line_x, line_y = cal_display_temp.line_.get_data() # type: ignore
394
- plt.close(fig_temp) # Close the temporary plot
395
-
396
- # --- Step 2: Build the plot from scratch ---
401
+ # --- Step 2: Plot ---
397
402
  ax_cal.plot([0, 1], [0, 1], 'k--', label='Perfectly calibrated')
398
403
 
399
- sns.regplot(
400
- x=line_x,
401
- y=line_y,
402
- ax=ax_cal,
403
- scatter=False,
404
- label=f"Model calibration",
405
- line_kws={
406
- 'color': format_config.ROC_PR_line,
407
- 'linestyle': '--',
408
- 'linewidth': 2,
409
- }
410
- )
404
+ # Plot the actual calibration curve (connect points with a line)
405
+ ax_cal.plot(prob_pred,
406
+ prob_true,
407
+ marker='o', # Add markers to see bin locations
408
+ linewidth=2,
409
+ label="Model calibration",
410
+ color=format_config.ROC_PR_line)
411
411
 
412
412
  ax_cal.set_title(f'Reliability Curve{plot_title}', pad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size + 2)
413
413
  ax_cal.set_xlabel('Mean Predicted Probability', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size)
414
414
  ax_cal.set_ylabel('Fraction of Positives', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size)
415
415
 
416
- # --- Step 3: Set final limits *after* plotting ---
416
+ # --- Step 3: Set final limits ---
417
417
  ax_cal.set_ylim(0.0, 1.0)
418
418
  ax_cal.set_xlim(0.0, 1.0)
419
419
 
@@ -428,7 +428,7 @@ def classification_metrics(save_dir: Union[str, Path],
428
428
  cal_path = save_dir_path / f"calibration_plot{save_suffix}.svg"
429
429
  plt.savefig(cal_path)
430
430
  plt.close(fig_cal)
431
-
431
+
432
432
  _LOGGER.info(f"📈 Saved {len(class_indices_to_plot)} sets of ROC, Precision-Recall, and Calibration plots.")
433
433
 
434
434
 
@@ -632,6 +632,52 @@ def multi_label_classification_metrics(
632
632
  pr_path = save_dir_path / f"pr_curve_{sanitized_name}.svg"
633
633
  plt.savefig(pr_path)
634
634
  plt.close(fig_pr)
635
+
636
+ # --- Save Calibration Plot (New Feature) ---
637
+ fig_cal, ax_cal = plt.subplots(figsize=CLASSIFICATION_PLOT_SIZE, dpi=DPI_value)
638
+
639
+ user_chosen_bins = format_config.calibration_bins
640
+
641
+ # --- Automate Bin Selection ---
642
+ if not isinstance(user_chosen_bins, int) or user_chosen_bins <= 0:
643
+ # Determine bins based on number of samples
644
+ n_samples = y_true.shape[0]
645
+ if n_samples < 200:
646
+ dynamic_bins = 5
647
+ elif n_samples < 1000:
648
+ dynamic_bins = 10
649
+ else:
650
+ dynamic_bins = 15
651
+ else:
652
+ dynamic_bins = user_chosen_bins
653
+
654
+ # Calculate calibration curve for this specific label
655
+ prob_true, prob_pred = calibration_curve(true_i, prob_i, n_bins=dynamic_bins)
656
+
657
+ ax_cal.plot([0, 1], [0, 1], 'k--', label='Perfectly calibrated')
658
+ ax_cal.plot(prob_pred,
659
+ prob_true,
660
+ marker='o',
661
+ linewidth=2,
662
+ label=f"Calibration for '{name}'",
663
+ color=format_config.ROC_PR_line)
664
+
665
+ ax_cal.set_title(f'Reliability Curve for "{name}"', pad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size + 2)
666
+ ax_cal.set_xlabel('Mean Predicted Probability', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size)
667
+ ax_cal.set_ylabel('Fraction of Positives', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=base_font_size)
668
+
669
+ ax_cal.set_ylim(0.0, 1.0)
670
+ ax_cal.set_xlim(0.0, 1.0)
671
+
672
+ ax_cal.tick_params(axis='x', labelsize=xtick_size)
673
+ ax_cal.tick_params(axis='y', labelsize=ytick_size)
674
+ ax_cal.legend(loc='lower right', fontsize=legend_size)
675
+ ax_cal.grid(True)
676
+
677
+ plt.tight_layout()
678
+ cal_path = save_dir_path / f"calibration_plot_{sanitized_name}.svg"
679
+ plt.savefig(cal_path)
680
+ plt.close(fig_cal)
635
681
 
636
682
  _LOGGER.info(f"All individual label reports and plots saved to '{save_dir_path.name}'")
637
683
 
@@ -4,6 +4,7 @@ class MagicWords:
4
4
  CURRENT = "current"
5
5
  RENAME = "rename"
6
6
  UNKNOWN = "unknown"
7
+ AUTO = "auto"
7
8
 
8
9
 
9
10
  class PyTorchLogKeys:
@@ -15,6 +15,13 @@ from ._utility_tools import (
15
15
  train_dataset_yielder
16
16
  )
17
17
 
18
+ from ._translate import (
19
+ translate_dataframe_columns,
20
+ create_translation_template,
21
+ audit_column_translation
22
+ )
23
+
24
+
18
25
  from .._core import _imprimir_disponibles
19
26
 
20
27
 
@@ -27,6 +34,9 @@ __all__ = [
27
34
  "save_dataframe",
28
35
  "save_dataframe_with_schema",
29
36
  "merge_dataframes",
37
+ "translate_dataframe_columns",
38
+ "create_translation_template",
39
+ "audit_column_translation",
30
40
  "distribute_dataset_by_target",
31
41
  "train_dataset_orchestrator",
32
42
  "train_dataset_yielder"