dragon-ml-toolbox 19.13.0__py3-none-any.whl → 20.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/METADATA +29 -46
  2. dragon_ml_toolbox-20.0.0.dist-info/RECORD +178 -0
  3. ml_tools/{ETL_cleaning.py → ETL_cleaning/__init__.py} +13 -5
  4. ml_tools/ETL_cleaning/_basic_clean.py +351 -0
  5. ml_tools/ETL_cleaning/_clean_tools.py +128 -0
  6. ml_tools/ETL_cleaning/_dragon_cleaner.py +245 -0
  7. ml_tools/ETL_cleaning/_imprimir.py +13 -0
  8. ml_tools/{ETL_engineering.py → ETL_engineering/__init__.py} +8 -4
  9. ml_tools/ETL_engineering/_dragon_engineering.py +261 -0
  10. ml_tools/ETL_engineering/_imprimir.py +24 -0
  11. ml_tools/{_core/_ETL_engineering.py → ETL_engineering/_transforms.py} +14 -267
  12. ml_tools/{_core → GUI_tools}/_GUI_tools.py +37 -40
  13. ml_tools/{GUI_tools.py → GUI_tools/__init__.py} +7 -5
  14. ml_tools/GUI_tools/_imprimir.py +12 -0
  15. ml_tools/IO_tools/_IO_loggers.py +235 -0
  16. ml_tools/IO_tools/_IO_save_load.py +151 -0
  17. ml_tools/IO_tools/_IO_utils.py +140 -0
  18. ml_tools/{IO_tools.py → IO_tools/__init__.py} +13 -5
  19. ml_tools/IO_tools/_imprimir.py +14 -0
  20. ml_tools/MICE/_MICE_imputation.py +132 -0
  21. ml_tools/{MICE_imputation.py → MICE/__init__.py} +6 -7
  22. ml_tools/{_core/_MICE_imputation.py → MICE/_dragon_mice.py} +243 -322
  23. ml_tools/MICE/_imprimir.py +11 -0
  24. ml_tools/{ML_callbacks.py → ML_callbacks/__init__.py} +12 -4
  25. ml_tools/ML_callbacks/_base.py +101 -0
  26. ml_tools/ML_callbacks/_checkpoint.py +232 -0
  27. ml_tools/ML_callbacks/_early_stop.py +208 -0
  28. ml_tools/ML_callbacks/_imprimir.py +12 -0
  29. ml_tools/ML_callbacks/_scheduler.py +197 -0
  30. ml_tools/{ML_chaining_utilities.py → ML_chain/__init__.py} +8 -3
  31. ml_tools/{_core/_ML_chaining_utilities.py → ML_chain/_chaining_tools.py} +5 -129
  32. ml_tools/ML_chain/_dragon_chain.py +140 -0
  33. ml_tools/ML_chain/_imprimir.py +11 -0
  34. ml_tools/ML_configuration/__init__.py +90 -0
  35. ml_tools/ML_configuration/_base_model_config.py +69 -0
  36. ml_tools/ML_configuration/_finalize.py +366 -0
  37. ml_tools/ML_configuration/_imprimir.py +47 -0
  38. ml_tools/ML_configuration/_metrics.py +593 -0
  39. ml_tools/ML_configuration/_models.py +206 -0
  40. ml_tools/ML_configuration/_training.py +124 -0
  41. ml_tools/ML_datasetmaster/__init__.py +28 -0
  42. ml_tools/ML_datasetmaster/_base_datasetmaster.py +337 -0
  43. ml_tools/{_core/_ML_datasetmaster.py → ML_datasetmaster/_datasetmaster.py} +9 -329
  44. ml_tools/ML_datasetmaster/_imprimir.py +15 -0
  45. ml_tools/{_core/_ML_sequence_datasetmaster.py → ML_datasetmaster/_sequence_datasetmaster.py} +13 -15
  46. ml_tools/{_core/_ML_vision_datasetmaster.py → ML_datasetmaster/_vision_datasetmaster.py} +63 -65
  47. ml_tools/ML_evaluation/__init__.py +53 -0
  48. ml_tools/ML_evaluation/_classification.py +629 -0
  49. ml_tools/ML_evaluation/_feature_importance.py +409 -0
  50. ml_tools/ML_evaluation/_imprimir.py +25 -0
  51. ml_tools/ML_evaluation/_loss.py +92 -0
  52. ml_tools/ML_evaluation/_regression.py +273 -0
  53. ml_tools/{_core/_ML_sequence_evaluation.py → ML_evaluation/_sequence.py} +8 -11
  54. ml_tools/{_core/_ML_vision_evaluation.py → ML_evaluation/_vision.py} +12 -17
  55. ml_tools/{_core → ML_evaluation_captum}/_ML_evaluation_captum.py +11 -38
  56. ml_tools/{ML_evaluation_captum.py → ML_evaluation_captum/__init__.py} +6 -4
  57. ml_tools/ML_evaluation_captum/_imprimir.py +10 -0
  58. ml_tools/{_core → ML_finalize_handler}/_ML_finalize_handler.py +3 -7
  59. ml_tools/ML_finalize_handler/__init__.py +10 -0
  60. ml_tools/ML_finalize_handler/_imprimir.py +8 -0
  61. ml_tools/ML_inference/__init__.py +22 -0
  62. ml_tools/ML_inference/_base_inference.py +166 -0
  63. ml_tools/{_core/_ML_chaining_inference.py → ML_inference/_chain_inference.py} +14 -17
  64. ml_tools/ML_inference/_dragon_inference.py +332 -0
  65. ml_tools/ML_inference/_imprimir.py +11 -0
  66. ml_tools/ML_inference/_multi_inference.py +180 -0
  67. ml_tools/ML_inference_sequence/__init__.py +10 -0
  68. ml_tools/ML_inference_sequence/_imprimir.py +8 -0
  69. ml_tools/{_core/_ML_sequence_inference.py → ML_inference_sequence/_sequence_inference.py} +11 -15
  70. ml_tools/ML_inference_vision/__init__.py +10 -0
  71. ml_tools/ML_inference_vision/_imprimir.py +8 -0
  72. ml_tools/{_core/_ML_vision_inference.py → ML_inference_vision/_vision_inference.py} +15 -19
  73. ml_tools/ML_models/__init__.py +32 -0
  74. ml_tools/{_core/_ML_models_advanced.py → ML_models/_advanced_models.py} +22 -18
  75. ml_tools/ML_models/_base_mlp_attention.py +198 -0
  76. ml_tools/{_core/_models_advanced_base.py → ML_models/_base_save_load.py} +73 -49
  77. ml_tools/ML_models/_dragon_tabular.py +248 -0
  78. ml_tools/ML_models/_imprimir.py +18 -0
  79. ml_tools/ML_models/_mlp_attention.py +134 -0
  80. ml_tools/{_core → ML_models}/_models_advanced_helpers.py +13 -13
  81. ml_tools/ML_models_sequence/__init__.py +10 -0
  82. ml_tools/ML_models_sequence/_imprimir.py +8 -0
  83. ml_tools/{_core/_ML_sequence_models.py → ML_models_sequence/_sequence_models.py} +5 -8
  84. ml_tools/ML_models_vision/__init__.py +29 -0
  85. ml_tools/ML_models_vision/_base_wrapper.py +254 -0
  86. ml_tools/ML_models_vision/_image_classification.py +182 -0
  87. ml_tools/ML_models_vision/_image_segmentation.py +108 -0
  88. ml_tools/ML_models_vision/_imprimir.py +16 -0
  89. ml_tools/ML_models_vision/_object_detection.py +135 -0
  90. ml_tools/ML_optimization/__init__.py +21 -0
  91. ml_tools/ML_optimization/_imprimir.py +13 -0
  92. ml_tools/{_core/_ML_optimization_pareto.py → ML_optimization/_multi_dragon.py} +18 -24
  93. ml_tools/ML_optimization/_single_dragon.py +203 -0
  94. ml_tools/{_core/_ML_optimization.py → ML_optimization/_single_manual.py} +75 -213
  95. ml_tools/{_core → ML_scaler}/_ML_scaler.py +8 -11
  96. ml_tools/ML_scaler/__init__.py +10 -0
  97. ml_tools/ML_scaler/_imprimir.py +8 -0
  98. ml_tools/ML_trainer/__init__.py +20 -0
  99. ml_tools/ML_trainer/_base_trainer.py +297 -0
  100. ml_tools/ML_trainer/_dragon_detection_trainer.py +402 -0
  101. ml_tools/ML_trainer/_dragon_sequence_trainer.py +540 -0
  102. ml_tools/ML_trainer/_dragon_trainer.py +1160 -0
  103. ml_tools/ML_trainer/_imprimir.py +10 -0
  104. ml_tools/{ML_utilities.py → ML_utilities/__init__.py} +14 -6
  105. ml_tools/ML_utilities/_artifact_finder.py +382 -0
  106. ml_tools/ML_utilities/_imprimir.py +16 -0
  107. ml_tools/ML_utilities/_inspection.py +325 -0
  108. ml_tools/ML_utilities/_train_tools.py +205 -0
  109. ml_tools/{ML_vision_transformers.py → ML_vision_transformers/__init__.py} +9 -6
  110. ml_tools/{_core/_ML_vision_transformers.py → ML_vision_transformers/_core_transforms.py} +11 -155
  111. ml_tools/ML_vision_transformers/_imprimir.py +14 -0
  112. ml_tools/ML_vision_transformers/_offline_augmentation.py +159 -0
  113. ml_tools/{_core/_PSO_optimization.py → PSO_optimization/_PSO.py} +58 -15
  114. ml_tools/{PSO_optimization.py → PSO_optimization/__init__.py} +5 -3
  115. ml_tools/PSO_optimization/_imprimir.py +10 -0
  116. ml_tools/SQL/__init__.py +7 -0
  117. ml_tools/{_core/_SQL.py → SQL/_dragon_SQL.py} +7 -11
  118. ml_tools/SQL/_imprimir.py +8 -0
  119. ml_tools/{_core → VIF}/_VIF_factor.py +5 -8
  120. ml_tools/{VIF_factor.py → VIF/__init__.py} +4 -2
  121. ml_tools/VIF/_imprimir.py +10 -0
  122. ml_tools/_core/__init__.py +7 -1
  123. ml_tools/_core/_logger.py +8 -18
  124. ml_tools/_core/_schema_load_ops.py +43 -0
  125. ml_tools/_core/_script_info.py +2 -2
  126. ml_tools/{data_exploration.py → data_exploration/__init__.py} +32 -16
  127. ml_tools/data_exploration/_analysis.py +214 -0
  128. ml_tools/data_exploration/_cleaning.py +566 -0
  129. ml_tools/data_exploration/_features.py +583 -0
  130. ml_tools/data_exploration/_imprimir.py +32 -0
  131. ml_tools/data_exploration/_plotting.py +487 -0
  132. ml_tools/data_exploration/_schema_ops.py +176 -0
  133. ml_tools/{ensemble_evaluation.py → ensemble_evaluation/__init__.py} +6 -4
  134. ml_tools/{_core → ensemble_evaluation}/_ensemble_evaluation.py +3 -7
  135. ml_tools/ensemble_evaluation/_imprimir.py +14 -0
  136. ml_tools/{ensemble_inference.py → ensemble_inference/__init__.py} +5 -3
  137. ml_tools/{_core → ensemble_inference}/_ensemble_inference.py +15 -18
  138. ml_tools/ensemble_inference/_imprimir.py +9 -0
  139. ml_tools/{ensemble_learning.py → ensemble_learning/__init__.py} +4 -6
  140. ml_tools/{_core → ensemble_learning}/_ensemble_learning.py +7 -10
  141. ml_tools/ensemble_learning/_imprimir.py +10 -0
  142. ml_tools/{excel_handler.py → excel_handler/__init__.py} +5 -3
  143. ml_tools/{_core → excel_handler}/_excel_handler.py +6 -10
  144. ml_tools/excel_handler/_imprimir.py +13 -0
  145. ml_tools/{keys.py → keys/__init__.py} +4 -1
  146. ml_tools/keys/_imprimir.py +11 -0
  147. ml_tools/{_core → keys}/_keys.py +2 -0
  148. ml_tools/{math_utilities.py → math_utilities/__init__.py} +5 -2
  149. ml_tools/math_utilities/_imprimir.py +11 -0
  150. ml_tools/{_core → math_utilities}/_math_utilities.py +1 -5
  151. ml_tools/{optimization_tools.py → optimization_tools/__init__.py} +9 -4
  152. ml_tools/optimization_tools/_imprimir.py +13 -0
  153. ml_tools/optimization_tools/_optimization_bounds.py +236 -0
  154. ml_tools/optimization_tools/_optimization_plots.py +218 -0
  155. ml_tools/{path_manager.py → path_manager/__init__.py} +6 -3
  156. ml_tools/{_core/_path_manager.py → path_manager/_dragonmanager.py} +11 -347
  157. ml_tools/path_manager/_imprimir.py +15 -0
  158. ml_tools/path_manager/_path_tools.py +346 -0
  159. ml_tools/plot_fonts/__init__.py +8 -0
  160. ml_tools/plot_fonts/_imprimir.py +8 -0
  161. ml_tools/{_core → plot_fonts}/_plot_fonts.py +2 -5
  162. ml_tools/schema/__init__.py +15 -0
  163. ml_tools/schema/_feature_schema.py +223 -0
  164. ml_tools/schema/_gui_schema.py +191 -0
  165. ml_tools/schema/_imprimir.py +10 -0
  166. ml_tools/{serde.py → serde/__init__.py} +4 -2
  167. ml_tools/serde/_imprimir.py +10 -0
  168. ml_tools/{_core → serde}/_serde.py +3 -8
  169. ml_tools/{utilities.py → utilities/__init__.py} +11 -6
  170. ml_tools/utilities/_imprimir.py +18 -0
  171. ml_tools/{_core/_utilities.py → utilities/_utility_save_load.py} +13 -190
  172. ml_tools/utilities/_utility_tools.py +192 -0
  173. dragon_ml_toolbox-19.13.0.dist-info/RECORD +0 -111
  174. ml_tools/ML_chaining_inference.py +0 -8
  175. ml_tools/ML_configuration.py +0 -86
  176. ml_tools/ML_configuration_pytab.py +0 -14
  177. ml_tools/ML_datasetmaster.py +0 -10
  178. ml_tools/ML_evaluation.py +0 -16
  179. ml_tools/ML_evaluation_multi.py +0 -12
  180. ml_tools/ML_finalize_handler.py +0 -8
  181. ml_tools/ML_inference.py +0 -12
  182. ml_tools/ML_models.py +0 -14
  183. ml_tools/ML_models_advanced.py +0 -14
  184. ml_tools/ML_models_pytab.py +0 -14
  185. ml_tools/ML_optimization.py +0 -14
  186. ml_tools/ML_optimization_pareto.py +0 -8
  187. ml_tools/ML_scaler.py +0 -8
  188. ml_tools/ML_sequence_datasetmaster.py +0 -8
  189. ml_tools/ML_sequence_evaluation.py +0 -10
  190. ml_tools/ML_sequence_inference.py +0 -8
  191. ml_tools/ML_sequence_models.py +0 -8
  192. ml_tools/ML_trainer.py +0 -12
  193. ml_tools/ML_vision_datasetmaster.py +0 -12
  194. ml_tools/ML_vision_evaluation.py +0 -10
  195. ml_tools/ML_vision_inference.py +0 -8
  196. ml_tools/ML_vision_models.py +0 -18
  197. ml_tools/SQL.py +0 -8
  198. ml_tools/_core/_ETL_cleaning.py +0 -694
  199. ml_tools/_core/_IO_tools.py +0 -498
  200. ml_tools/_core/_ML_callbacks.py +0 -702
  201. ml_tools/_core/_ML_configuration.py +0 -1332
  202. ml_tools/_core/_ML_configuration_pytab.py +0 -102
  203. ml_tools/_core/_ML_evaluation.py +0 -867
  204. ml_tools/_core/_ML_evaluation_multi.py +0 -544
  205. ml_tools/_core/_ML_inference.py +0 -646
  206. ml_tools/_core/_ML_models.py +0 -668
  207. ml_tools/_core/_ML_models_pytab.py +0 -693
  208. ml_tools/_core/_ML_trainer.py +0 -2323
  209. ml_tools/_core/_ML_utilities.py +0 -886
  210. ml_tools/_core/_ML_vision_models.py +0 -644
  211. ml_tools/_core/_data_exploration.py +0 -1901
  212. ml_tools/_core/_optimization_tools.py +0 -493
  213. ml_tools/_core/_schema.py +0 -359
  214. ml_tools/plot_fonts.py +0 -8
  215. ml_tools/schema.py +0 -12
  216. {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/WHEEL +0 -0
  217. {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE +0 -0
  218. {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
  219. {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,218 @@
1
+ from typing import Union, Optional
2
+ from pathlib import Path
3
+ import pandas as pd
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+
7
+ from ..utilities import yield_dataframes_from_dir
8
+
9
+ from ..path_manager import sanitize_filename, make_fullpath, list_csv_paths
10
+ from .._core import get_logger
11
+
12
+
13
+ _LOGGER = get_logger("Optimization Plots")
14
+
15
+
16
+ __all__ = [
17
+ "plot_optimal_feature_distributions",
18
+ "plot_optimal_feature_distributions_from_dataframe",
19
+ ]
20
+
21
+
22
+ def plot_optimal_feature_distributions(results_dir: Union[str, Path],
23
+ verbose: bool=False,
24
+ target_columns: Optional[list[str]] = None):
25
+ """
26
+ Analyzes optimization results and plots the distribution of optimal values.
27
+
28
+ This function is compatible with mixed-type CSVs (strings for
29
+ categorical features, numbers for continuous). It automatically
30
+ detects the data type for each feature and generates:
31
+
32
+ - A Bar Plot for categorical (string) features.
33
+ - A KDE Plot for continuous (numeric) features.
34
+
35
+ Plots are saved in a subdirectory inside the source directory.
36
+
37
+ Parameters
38
+ ----------
39
+ results_dir : str | Path
40
+ The path to the directory containing the optimization result CSV files.
41
+ target_columns (list[str] | None):
42
+ A list of target column names to explicitly exclude from plotting. If None, it defaults to excluding only the last column (assumed as the target).
43
+ """
44
+ # Check results_dir and create output path
45
+ results_path = make_fullpath(results_dir, enforce="directory")
46
+ output_path = make_fullpath(results_path / "DistributionPlots", make=True)
47
+
48
+ # Check that the directory contains csv files
49
+ list_csv_paths(results_path, verbose=False, raise_on_empty=True)
50
+
51
+ # --- Data Loading and Preparation ---
52
+ _LOGGER.debug(f"📁 Starting analysis from results in: '{results_dir}'")
53
+
54
+ data_to_plot = []
55
+ for df, df_name in yield_dataframes_from_dir(results_path, verbose=True):
56
+ if df.shape[1] < 2:
57
+ _LOGGER.warning(f"Skipping '{df_name}': must have at least 2 columns (feature + target).")
58
+ continue
59
+
60
+ # --- Column selection logic ---
61
+ if target_columns:
62
+ # 1. Explicitly drop known targets to isolate features
63
+ existing_targets = [c for c in target_columns if c in df.columns]
64
+ features_df = df.drop(columns=existing_targets)
65
+
66
+ if features_df.empty:
67
+ _LOGGER.warning(f"Skipping '{df_name}': All columns were dropped based on target_columns list.")
68
+ continue
69
+ else:
70
+ # 2. Fallback: Assume the last column is the only target
71
+ features_df = df.iloc[:, :-1]
72
+
73
+ # 3. Melt the filtered dataframe
74
+ melted_df = features_df.melt(var_name='feature', value_name='value')
75
+
76
+ # Set target as the filename (or joined target names) to differentiate sources
77
+ melted_df['target'] = '\n'.join(target_columns) if target_columns else df_name
78
+ data_to_plot.append(melted_df)
79
+
80
+ if not data_to_plot:
81
+ _LOGGER.error("No valid data to plot after processing all CSVs.")
82
+ return
83
+
84
+ long_df = pd.concat(data_to_plot, ignore_index=True)
85
+
86
+ # --- Delegate to Helper ---
87
+ _generate_and_save_feature_plots(long_df, output_path, verbose)
88
+
89
+
90
+ def plot_optimal_feature_distributions_from_dataframe(dataframe: pd.DataFrame,
91
+ save_dir: Union[str, Path],
92
+ verbose: bool=False,
93
+ target_columns: Optional[list[str]] = None):
94
+ """
95
+ Analyzes a single dataframe of optimization results and plots the distribution of optimal values.
96
+
97
+ This function is compatible with mixed-type data (strings for categorical features,
98
+ numbers for continuous). It automatically detects the data type for each feature
99
+ and generates:
100
+
101
+ - A Bar Plot for categorical (string) features.
102
+ - A KDE Plot for continuous (numeric) features.
103
+
104
+ Plots are saved in a 'DistributionPlots' subdirectory inside the save_dir.
105
+
106
+ Parameters
107
+ ----------
108
+ dataframe : pd.DataFrame
109
+ The dataframe containing the optimization results (features + target/s).
110
+ save_dir : str | Path
111
+ The directory where the 'DistributionPlots' folder will be created.
112
+ verbose : bool, optional
113
+ If True, logs details about which plot type is chosen for each feature.
114
+ target_columns : list[str] | None
115
+ A list of target column names to explicitly exclude from plotting.
116
+ If None, it defaults to excluding only the last column (assumed as the target).
117
+ """
118
+ # Check results_dir and create output path
119
+ root_path = make_fullpath(save_dir, make=True, enforce="directory")
120
+ output_path = make_fullpath(root_path / "DistributionPlots", make=True, enforce="directory")
121
+
122
+ _LOGGER.debug(f"📁 Starting analysis from provided DataFrame. Output: '{output_path}'")
123
+
124
+ if dataframe.empty:
125
+ _LOGGER.error("Provided dataframe is empty.")
126
+ return
127
+
128
+ if dataframe.shape[1] < 2:
129
+ _LOGGER.warning("DataFrame has fewer than 2 columns. Expecting at least one feature and one target.")
130
+
131
+ # --- Data Preparation ---
132
+ if target_columns:
133
+ # Explicitly drop known targets to isolate features
134
+ existing_targets = [c for c in target_columns if c in dataframe.columns]
135
+ features_df = dataframe.drop(columns=existing_targets)
136
+ target_label = '\n'.join(target_columns)
137
+ else:
138
+ # Fallback: Assume the last column is the only target
139
+ features_df = dataframe.iloc[:, :-1]
140
+ target_label = "Optimization Result"
141
+
142
+ if features_df.empty:
143
+ _LOGGER.warning("Skipping plotting: All columns were dropped based on target_columns list.")
144
+ return
145
+
146
+ # Melt and assign static target label
147
+ long_df = features_df.melt(var_name='feature', value_name='value')
148
+ long_df['target'] = target_label
149
+
150
+ # --- Delegate to Helper ---
151
+ _generate_and_save_feature_plots(long_df, output_path, verbose)
152
+
153
+
154
+ def _generate_and_save_feature_plots(long_df: pd.DataFrame, output_path: Path, verbose: bool) -> None:
155
+ """
156
+ Private helper: iterates over a melted DataFrame (columns: feature, value, target)
157
+ and generates/saves the appropriate plot (Bar or KDE) for each feature.
158
+ """
159
+ features = long_df['feature'].unique()
160
+ unique_targets = long_df['target'].unique()
161
+
162
+ _LOGGER.info(f"📊 Found data for {len(features)} features. Generating plots...")
163
+
164
+ for feature_name in features:
165
+ plt.figure(figsize=(12, 7))
166
+
167
+ # .copy() to ensure we are working with a distinct object
168
+ feature_df = long_df[long_df['feature'] == feature_name].copy()
169
+
170
+ # --- Type-checking logic ---
171
+ feature_df['numeric_value'] = pd.to_numeric(feature_df['value'], errors='coerce')
172
+
173
+ # If *any* value failed conversion (is NaN), treat it as categorical.
174
+ if feature_df['numeric_value'].isna().any():
175
+
176
+ # --- PLOT 1: CATEGORICAL (String-based) ---
177
+ if verbose:
178
+ print(f" Plotting '{feature_name}' as categorical (bar plot).")
179
+
180
+ # Calculate percentages for a clean bar plot
181
+ norm_df = (feature_df.groupby('target')['value']
182
+ .value_counts(normalize=True)
183
+ .mul(100)
184
+ .rename('percent')
185
+ .reset_index())
186
+
187
+ ax = sns.barplot(data=norm_df, x='value', y='percent', hue='target')
188
+ plt.ylabel("Frequency (%)", fontsize=12)
189
+ ax.set_ylim(0, 100)
190
+
191
+ # always rotate x-ticks for categorical clarity
192
+ plt.xticks(rotation=45, ha='right')
193
+
194
+ else:
195
+ # --- PLOT 2: CONTINUOUS (Numeric-based) ---
196
+ if verbose:
197
+ print(f" Plotting '{feature_name}' as continuous (KDE plot).")
198
+
199
+ ax = sns.kdeplot(data=feature_df, x='numeric_value', hue='target',
200
+ fill=True, alpha=0.1, warn_singular=False)
201
+
202
+ plt.xlabel("Feature Value", fontsize=12)
203
+ plt.ylabel("Density", fontsize=12)
204
+
205
+ # --- Common settings for both plot types ---
206
+ plt.title(f"Optimal Value Distribution for '{feature_name}'", fontsize=16)
207
+ plt.grid(axis='y', alpha=0.5, linestyle='--')
208
+
209
+ legend = ax.get_legend()
210
+ if legend:
211
+ legend.set_title('Target')
212
+
213
+ sanitized_feature_name = sanitize_filename(feature_name)
214
+ plot_filename = output_path / f"Distribution_{sanitized_feature_name}.svg"
215
+ plt.savefig(plot_filename, bbox_inches='tight')
216
+ plt.close()
217
+
218
+ _LOGGER.info(f"All plots saved successfully to: '{output_path}'")
@@ -1,5 +1,6 @@
1
- from ._core._path_manager import (
2
- DragonPathManager,
1
+ from ._dragonmanager import DragonPathManager
2
+
3
+ from ._path_tools import (
3
4
  make_fullpath,
4
5
  sanitize_filename,
5
6
  list_csv_paths,
@@ -7,9 +8,11 @@ from ._core._path_manager import (
7
8
  list_subdirectories,
8
9
  clean_directory,
9
10
  safe_move,
10
- info
11
11
  )
12
12
 
13
+ from ._imprimir import info
14
+
15
+
13
16
  __all__ = [
14
17
  "DragonPathManager",
15
18
  "make_fullpath",
@@ -1,25 +1,17 @@
1
- from typing import Optional, List, Dict, Union, Literal
1
+ from typing import Optional, Union
2
2
  from pathlib import Path
3
- import re
4
3
  import sys
5
- import shutil
6
4
 
7
- from ._script_info import _script_info
8
- from ._logger import get_logger
5
+ from .._core import get_logger
9
6
 
7
+ from ._path_tools import sanitize_filename
10
8
 
11
- _LOGGER = get_logger("Path Manager")
9
+
10
+ _LOGGER = get_logger("DragonPathManager")
12
11
 
13
12
 
14
13
  __all__ = [
15
- "DragonPathManager",
16
- "make_fullpath",
17
- "sanitize_filename",
18
- "list_csv_paths",
19
- "list_files_by_extension",
20
- "list_subdirectories",
21
- "clean_directory",
22
- "safe_move",
14
+ "DragonPathManager"
23
15
  ]
24
16
 
25
17
 
@@ -37,7 +29,7 @@ class DragonPathManager:
37
29
  def __init__(
38
30
  self,
39
31
  anchor_file: str,
40
- base_directories: Optional[List[str]] = None,
32
+ base_directories: Optional[list[str]] = None,
41
33
  strict_to_root: bool = True
42
34
  ):
43
35
  """
@@ -61,7 +53,7 @@ class DragonPathManager:
61
53
  resolved_anchor_path = Path(anchor_file).resolve()
62
54
  self._package_name = resolved_anchor_path.parent.name
63
55
  self._is_bundled, bundle_root = self._get_bundle_root()
64
- self._paths: Dict[str, Path] = {}
56
+ self._paths: dict[str, Path] = {}
65
57
  self._strict_to_root = strict_to_root
66
58
 
67
59
  if self._is_bundled:
@@ -111,12 +103,12 @@ class DragonPathManager:
111
103
  _LOGGER.error(f"Path key '{key}' cannot start with underscores.")
112
104
  raise ValueError()
113
105
 
114
- def update(self, new_paths: Dict[str, Union[str, Path]]) -> None:
106
+ def update(self, new_paths: dict[str, Union[str, Path]]) -> None:
115
107
  """
116
108
  Adds new paths in the manager.
117
109
 
118
110
  Args:
119
- new_paths (Dict[str, Union[str, Path]]): A dictionary where keys are
111
+ new_paths (dict[str, Union[str, Path]]): A dictionary where keys are
120
112
  the identifiers and values are the
121
113
  Path objects to store.
122
114
  """
@@ -135,7 +127,7 @@ class DragonPathManager:
135
127
  def _sanitize_key(self, key: str):
136
128
  return sanitize_filename(key)
137
129
 
138
- def make_dirs(self, keys: Optional[List[str]] = None, verbose: bool = False) -> None:
130
+ def make_dirs(self, keys: Optional[list[str]] = None, verbose: bool = False) -> None:
139
131
  """
140
132
  Creates directory structures for registered paths in writable locations.
141
133
 
@@ -325,331 +317,3 @@ class DragonPathManager:
325
317
  # Store absolute Path.
326
318
  self._paths[sanitized_name] = new_path
327
319
 
328
-
329
- def make_fullpath(
330
- input_path: Union[str, Path],
331
- make: bool = False,
332
- verbose: bool = False,
333
- enforce: Optional[Literal["directory", "file"]] = None
334
- ) -> Path:
335
- """
336
- Resolves a string or Path into an absolute Path, optionally creating it.
337
-
338
- - If the path exists, it is returned.
339
- - If the path does not exist and `make=True`, it will:
340
- - Create the file if the path has a suffix
341
- - Create the directory if it has no suffix
342
- - If `make=False` and the path does not exist, an error is raised.
343
- - If `enforce`, raises an error if the resolved path is not what was enforced.
344
- - Optionally prints whether the resolved path is a file or directory.
345
-
346
- Parameters:
347
- input_path (str | Path):
348
- Path to resolve.
349
- make (bool):
350
- If True, attempt to create file or directory.
351
- verbose (bool):
352
- Print classification after resolution.
353
- enforce ("directory" | "file" | None):
354
- Raises an error if the resolved path is not what was enforced.
355
-
356
- Returns:
357
- Path: Resolved absolute path.
358
-
359
- Raises:
360
- ValueError: If the path doesn't exist and can't be created.
361
- TypeError: If the final path does not match the `enforce` parameter.
362
-
363
- ## 🗒️ Note:
364
-
365
- Directories with dots will be treated as files.
366
-
367
- Files without extension will be treated as directories.
368
- """
369
- path = Path(input_path).expanduser()
370
-
371
- is_file = path.suffix != ""
372
-
373
- try:
374
- resolved = path.resolve(strict=True)
375
- except FileNotFoundError:
376
- if not make:
377
- _LOGGER.error(f"Path does not exist: '{path}'.")
378
- raise FileNotFoundError()
379
-
380
- try:
381
- if is_file:
382
- # Create parent directories first
383
- path.parent.mkdir(parents=True, exist_ok=True)
384
- path.touch(exist_ok=False)
385
- else:
386
- path.mkdir(parents=True, exist_ok=True)
387
- resolved = path.resolve(strict=True)
388
- except Exception:
389
- _LOGGER.exception(f"Failed to create {'file' if is_file else 'directory'} '{path}'.")
390
- raise IOError()
391
-
392
- if enforce == "file" and not resolved.is_file():
393
- _LOGGER.error(f"Path was enforced as a file, but it is not: '{resolved}'")
394
- raise TypeError()
395
-
396
- if enforce == "directory" and not resolved.is_dir():
397
- _LOGGER.error(f"Path was enforced as a directory, but it is not: '{resolved}'")
398
- raise TypeError()
399
-
400
- if verbose:
401
- if resolved.is_file():
402
- print("📄 Path is a File")
403
- elif resolved.is_dir():
404
- print("📁 Path is a Directory")
405
- else:
406
- print("❓ Path exists but is neither file nor directory")
407
-
408
- return resolved
409
-
410
-
411
- def sanitize_filename(filename: str) -> str:
412
- """
413
- Sanitizes the name by:
414
- - Stripping leading/trailing whitespace.
415
- - Replacing all internal whitespace characters with underscores.
416
- - Removing or replacing characters invalid in filenames.
417
-
418
- Args:
419
- filename (str): Base filename.
420
-
421
- Returns:
422
- str: A sanitized string suitable to use as a filename.
423
- """
424
- # Strip leading/trailing whitespace
425
- sanitized = filename.strip()
426
-
427
- # Replace all whitespace sequences (space, tab, etc.) with underscores
428
- sanitized = re.sub(r'\s+', '_', sanitized)
429
-
430
- # Conservative filter to keep filenames safe across platforms
431
- sanitized = re.sub(r'[^\w\-.]', '', sanitized)
432
-
433
- # Check for empty string after sanitization
434
- if not sanitized:
435
- _LOGGER.error("The sanitized filename is empty. The original input may have contained only invalid characters.")
436
- raise ValueError()
437
-
438
- return sanitized
439
-
440
-
441
- def list_csv_paths(directory: Union[str, Path], verbose: bool = True, raise_on_empty: bool = True) -> dict[str, Path]:
442
- """
443
- Lists all `.csv` files in the specified directory and returns a mapping: filenames (without extensions) to their absolute paths.
444
-
445
- Parameters:
446
- directory (str | Path): Path to the directory containing `.csv` files.
447
- verbose (bool): If True, prints found files.
448
- raise_on_empty (bool): If True, raises IOError if no files are found.
449
-
450
- Returns:
451
- (dict[str, Path]): Dictionary mapping {filename: filepath}.
452
- """
453
- # wraps the more general function
454
- return list_files_by_extension(directory=directory, extension="csv", verbose=verbose, raise_on_empty=raise_on_empty)
455
-
456
-
457
- def list_files_by_extension(
458
- directory: Union[str, Path],
459
- extension: str,
460
- verbose: bool = True,
461
- raise_on_empty: bool = True
462
- ) -> dict[str, Path]:
463
- """
464
- Lists all files with the specified extension in the given directory and returns a mapping:
465
- filenames (without extensions) to their absolute paths.
466
-
467
- Parameters:
468
- directory (str | Path): Path to the directory to search in.
469
- extension (str): File extension to search for (e.g., 'json', 'txt').
470
- verbose (bool): If True, logs the files found.
471
- raise_on_empty (bool): If True, raises IOError if no matching files are found.
472
-
473
- Returns:
474
- (dict[str, Path]): Dictionary mapping {filename: filepath}. Returns empty dict if none found and raise_on_empty is False.
475
- """
476
- dir_path = make_fullpath(directory, enforce="directory")
477
-
478
- # Normalize the extension (remove leading dot if present)
479
- normalized_ext = extension.lstrip(".").lower()
480
- pattern = f"*.{normalized_ext}"
481
-
482
- matched_paths = list(dir_path.glob(pattern))
483
-
484
- if not matched_paths:
485
- msg = f"No '.{normalized_ext}' files found in directory: {dir_path}."
486
- if raise_on_empty:
487
- _LOGGER.error(msg)
488
- raise IOError()
489
- else:
490
- if verbose:
491
- _LOGGER.warning(msg)
492
- return {}
493
-
494
- name_path_dict = {p.stem: p for p in matched_paths}
495
-
496
- if verbose:
497
- _LOGGER.info(f"📂 '{normalized_ext.upper()}' files found:")
498
- for name in name_path_dict:
499
- print(f"\t{name}")
500
-
501
- return name_path_dict
502
-
503
-
504
- def list_subdirectories(
505
- root_dir: Union[str, Path],
506
- verbose: bool = True,
507
- raise_on_empty: bool = True
508
- ) -> dict[str, Path]:
509
- """
510
- Scans a directory and returns a dictionary of its immediate subdirectories.
511
-
512
- Args:
513
- root_dir (str | Path): The path to the directory to scan.
514
- verbose (bool): If True, prints the number of directories found.
515
- raise_on_empty (bool): If True, raises IOError if no subdirectories are found.
516
-
517
- Returns:
518
- dict[str, Path]: A dictionary mapping subdirectory names (str) to their full Path objects.
519
- """
520
- root_path = make_fullpath(root_dir, enforce="directory")
521
-
522
- directories = [p.resolve() for p in root_path.iterdir() if p.is_dir()]
523
-
524
- if len(directories) < 1:
525
- msg = f"No subdirectories found inside '{root_path}'"
526
- if raise_on_empty:
527
- _LOGGER.error(msg)
528
- raise IOError()
529
- else:
530
- if verbose:
531
- _LOGGER.warning(msg)
532
- return {}
533
-
534
- if verbose:
535
- count = len(directories)
536
- # Use pluralization for better readability
537
- plural = 'ies' if count != 1 else 'y'
538
- print(f"Found {count} subdirector{plural} in '{root_path.name}'.")
539
-
540
- # Create a dictionary where the key is the directory's name (a string)
541
- # and the value is the full Path object.
542
- dir_map = {p.name: p for p in directories}
543
-
544
- return dir_map
545
-
546
-
547
- def clean_directory(directory: Union[str, Path], verbose: bool = False) -> None:
548
- """
549
- ⚠️ DANGER: DESTRUCTIVE OPERATION ⚠️
550
-
551
- Deletes all files and subdirectories inside the specified directory. It is designed to empty a folder, not delete the folder itself.
552
-
553
- Safety: It skips hidden files and directories (those starting with a period '.'). This works for macOS/Linux hidden files and dot-config folders on Windows.
554
-
555
- Args:
556
- directory (str | Path): The directory path to clean.
557
- verbose (bool): If True, prints the name of each top-level item deleted.
558
- """
559
- target_dir = make_fullpath(directory, enforce="directory")
560
-
561
- if verbose:
562
- _LOGGER.warning(f"Starting cleanup of directory: {target_dir}")
563
-
564
- for item in target_dir.iterdir():
565
- # Safety Check: Skip hidden files/dirs
566
- if item.name.startswith("."):
567
- continue
568
-
569
- try:
570
- if item.is_file() or item.is_symlink():
571
- item.unlink()
572
- if verbose:
573
- print(f" 🗑️ Deleted file: {item.name}")
574
- elif item.is_dir():
575
- shutil.rmtree(item)
576
- if verbose:
577
- print(f" 🗑️ Deleted directory: {item.name}")
578
- except Exception as e:
579
- _LOGGER.warning(f"Failed to delete item '{item.name}': {e}")
580
- continue
581
-
582
-
583
- def safe_move(
584
- source: Union[str, Path],
585
- final_destination: Union[str, Path],
586
- rename: Optional[str] = None,
587
- overwrite: bool = False
588
- ) -> Path:
589
- """
590
- Moves a file or directory to a destination directory with safety checks.
591
-
592
- Features:
593
- - Supports optional renaming (sanitized automatically).
594
- - PRESERVES file extensions during renaming (cannot be modified).
595
- - Prevents accidental overwrites unless explicit.
596
-
597
- Args:
598
- source (str | Path): The file or directory to move.
599
- final_destination (str | Path): The destination DIRECTORY where the item will be moved. It will be created if it does not exist.
600
- rename (Optional[str]): If provided, the moved item will be renamed to this. Note: For files, the extension is strictly preserved.
601
- overwrite (bool): If True, overwrites the destination path if it exists.
602
-
603
- Returns:
604
- Path: The new absolute path of the moved item.
605
- """
606
- # 1. Validation and Setup
607
- src_path = make_fullpath(source, make=False)
608
-
609
- # Ensure destination directory exists
610
- dest_dir_path = make_fullpath(final_destination, make=True, enforce="directory")
611
-
612
- # 2. Determine Target Name
613
- if rename:
614
- sanitized_name = sanitize_filename(rename)
615
- if src_path.is_file():
616
- # Strict Extension Preservation
617
- final_name = f"{sanitized_name}{src_path.suffix}"
618
- else:
619
- final_name = sanitized_name
620
- else:
621
- final_name = src_path.name
622
-
623
- final_path = dest_dir_path / final_name
624
-
625
- # 3. Safety Checks (Collision Detection)
626
- if final_path.exists():
627
- if not overwrite:
628
- _LOGGER.error(f"Destination already exists: '{final_path}'. Use overwrite=True to force.")
629
- raise FileExistsError()
630
-
631
- # Smart Overwrite Handling
632
- if final_path.is_dir():
633
- if src_path.is_file():
634
- _LOGGER.error(f"Cannot overwrite directory '{final_path}' with file '{src_path}'")
635
- raise IsADirectoryError()
636
- # If overwriting a directory, we must remove the old one first to avoid nesting/errors
637
- shutil.rmtree(final_path)
638
- else:
639
- # Destination is a file
640
- if src_path.is_dir():
641
- _LOGGER.error(f"Cannot overwrite file '{final_path}' with directory '{src_path}'")
642
- raise FileExistsError()
643
- final_path.unlink()
644
-
645
- # 4. Perform Move
646
- try:
647
- shutil.move(str(src_path), str(final_path))
648
- return final_path
649
- except Exception as e:
650
- _LOGGER.exception(f"Failed to move '{src_path}' to '{final_path}'")
651
- raise e
652
-
653
-
654
- def info():
655
- _script_info(__all__)
@@ -0,0 +1,15 @@
1
+ from .._core import _imprimir_disponibles
2
+
3
+ _GRUPOS = [
4
+ "DragonPathManager",
5
+ "make_fullpath",
6
+ "sanitize_filename",
7
+ "list_csv_paths",
8
+ "list_files_by_extension",
9
+ "list_subdirectories",
10
+ "clean_directory",
11
+ "safe_move",
12
+ ]
13
+
14
+ def info():
15
+ _imprimir_disponibles(_GRUPOS)