dragon-ml-toolbox 19.14.0__py3-none-any.whl → 20.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/METADATA +29 -46
  2. dragon_ml_toolbox-20.0.0.dist-info/RECORD +178 -0
  3. ml_tools/{ETL_cleaning.py → ETL_cleaning/__init__.py} +13 -5
  4. ml_tools/ETL_cleaning/_basic_clean.py +351 -0
  5. ml_tools/ETL_cleaning/_clean_tools.py +128 -0
  6. ml_tools/ETL_cleaning/_dragon_cleaner.py +245 -0
  7. ml_tools/ETL_cleaning/_imprimir.py +13 -0
  8. ml_tools/{ETL_engineering.py → ETL_engineering/__init__.py} +8 -4
  9. ml_tools/ETL_engineering/_dragon_engineering.py +261 -0
  10. ml_tools/ETL_engineering/_imprimir.py +24 -0
  11. ml_tools/{_core/_ETL_engineering.py → ETL_engineering/_transforms.py} +14 -267
  12. ml_tools/{_core → GUI_tools}/_GUI_tools.py +37 -40
  13. ml_tools/{GUI_tools.py → GUI_tools/__init__.py} +7 -5
  14. ml_tools/GUI_tools/_imprimir.py +12 -0
  15. ml_tools/IO_tools/_IO_loggers.py +235 -0
  16. ml_tools/IO_tools/_IO_save_load.py +151 -0
  17. ml_tools/IO_tools/_IO_utils.py +140 -0
  18. ml_tools/{IO_tools.py → IO_tools/__init__.py} +13 -5
  19. ml_tools/IO_tools/_imprimir.py +14 -0
  20. ml_tools/MICE/_MICE_imputation.py +132 -0
  21. ml_tools/{MICE_imputation.py → MICE/__init__.py} +6 -7
  22. ml_tools/{_core/_MICE_imputation.py → MICE/_dragon_mice.py} +243 -322
  23. ml_tools/MICE/_imprimir.py +11 -0
  24. ml_tools/{ML_callbacks.py → ML_callbacks/__init__.py} +12 -4
  25. ml_tools/ML_callbacks/_base.py +101 -0
  26. ml_tools/ML_callbacks/_checkpoint.py +232 -0
  27. ml_tools/ML_callbacks/_early_stop.py +208 -0
  28. ml_tools/ML_callbacks/_imprimir.py +12 -0
  29. ml_tools/ML_callbacks/_scheduler.py +197 -0
  30. ml_tools/{ML_chaining_utilities.py → ML_chain/__init__.py} +8 -3
  31. ml_tools/{_core/_ML_chaining_utilities.py → ML_chain/_chaining_tools.py} +5 -129
  32. ml_tools/ML_chain/_dragon_chain.py +140 -0
  33. ml_tools/ML_chain/_imprimir.py +11 -0
  34. ml_tools/ML_configuration/__init__.py +90 -0
  35. ml_tools/ML_configuration/_base_model_config.py +69 -0
  36. ml_tools/ML_configuration/_finalize.py +366 -0
  37. ml_tools/ML_configuration/_imprimir.py +47 -0
  38. ml_tools/ML_configuration/_metrics.py +593 -0
  39. ml_tools/ML_configuration/_models.py +206 -0
  40. ml_tools/ML_configuration/_training.py +124 -0
  41. ml_tools/ML_datasetmaster/__init__.py +28 -0
  42. ml_tools/ML_datasetmaster/_base_datasetmaster.py +337 -0
  43. ml_tools/{_core/_ML_datasetmaster.py → ML_datasetmaster/_datasetmaster.py} +9 -329
  44. ml_tools/ML_datasetmaster/_imprimir.py +15 -0
  45. ml_tools/{_core/_ML_sequence_datasetmaster.py → ML_datasetmaster/_sequence_datasetmaster.py} +13 -15
  46. ml_tools/{_core/_ML_vision_datasetmaster.py → ML_datasetmaster/_vision_datasetmaster.py} +63 -65
  47. ml_tools/ML_evaluation/__init__.py +53 -0
  48. ml_tools/ML_evaluation/_classification.py +629 -0
  49. ml_tools/ML_evaluation/_feature_importance.py +409 -0
  50. ml_tools/ML_evaluation/_imprimir.py +25 -0
  51. ml_tools/ML_evaluation/_loss.py +92 -0
  52. ml_tools/ML_evaluation/_regression.py +273 -0
  53. ml_tools/{_core/_ML_sequence_evaluation.py → ML_evaluation/_sequence.py} +8 -11
  54. ml_tools/{_core/_ML_vision_evaluation.py → ML_evaluation/_vision.py} +12 -17
  55. ml_tools/{_core → ML_evaluation_captum}/_ML_evaluation_captum.py +11 -38
  56. ml_tools/{ML_evaluation_captum.py → ML_evaluation_captum/__init__.py} +6 -4
  57. ml_tools/ML_evaluation_captum/_imprimir.py +10 -0
  58. ml_tools/{_core → ML_finalize_handler}/_ML_finalize_handler.py +3 -7
  59. ml_tools/ML_finalize_handler/__init__.py +10 -0
  60. ml_tools/ML_finalize_handler/_imprimir.py +8 -0
  61. ml_tools/ML_inference/__init__.py +22 -0
  62. ml_tools/ML_inference/_base_inference.py +166 -0
  63. ml_tools/{_core/_ML_chaining_inference.py → ML_inference/_chain_inference.py} +14 -17
  64. ml_tools/ML_inference/_dragon_inference.py +332 -0
  65. ml_tools/ML_inference/_imprimir.py +11 -0
  66. ml_tools/ML_inference/_multi_inference.py +180 -0
  67. ml_tools/ML_inference_sequence/__init__.py +10 -0
  68. ml_tools/ML_inference_sequence/_imprimir.py +8 -0
  69. ml_tools/{_core/_ML_sequence_inference.py → ML_inference_sequence/_sequence_inference.py} +11 -15
  70. ml_tools/ML_inference_vision/__init__.py +10 -0
  71. ml_tools/ML_inference_vision/_imprimir.py +8 -0
  72. ml_tools/{_core/_ML_vision_inference.py → ML_inference_vision/_vision_inference.py} +15 -19
  73. ml_tools/ML_models/__init__.py +32 -0
  74. ml_tools/{_core/_ML_models_advanced.py → ML_models/_advanced_models.py} +22 -18
  75. ml_tools/ML_models/_base_mlp_attention.py +198 -0
  76. ml_tools/{_core/_models_advanced_base.py → ML_models/_base_save_load.py} +73 -49
  77. ml_tools/ML_models/_dragon_tabular.py +248 -0
  78. ml_tools/ML_models/_imprimir.py +18 -0
  79. ml_tools/ML_models/_mlp_attention.py +134 -0
  80. ml_tools/{_core → ML_models}/_models_advanced_helpers.py +13 -13
  81. ml_tools/ML_models_sequence/__init__.py +10 -0
  82. ml_tools/ML_models_sequence/_imprimir.py +8 -0
  83. ml_tools/{_core/_ML_sequence_models.py → ML_models_sequence/_sequence_models.py} +5 -8
  84. ml_tools/ML_models_vision/__init__.py +29 -0
  85. ml_tools/ML_models_vision/_base_wrapper.py +254 -0
  86. ml_tools/ML_models_vision/_image_classification.py +182 -0
  87. ml_tools/ML_models_vision/_image_segmentation.py +108 -0
  88. ml_tools/ML_models_vision/_imprimir.py +16 -0
  89. ml_tools/ML_models_vision/_object_detection.py +135 -0
  90. ml_tools/ML_optimization/__init__.py +21 -0
  91. ml_tools/ML_optimization/_imprimir.py +13 -0
  92. ml_tools/{_core/_ML_optimization_pareto.py → ML_optimization/_multi_dragon.py} +18 -24
  93. ml_tools/ML_optimization/_single_dragon.py +203 -0
  94. ml_tools/{_core/_ML_optimization.py → ML_optimization/_single_manual.py} +75 -213
  95. ml_tools/{_core → ML_scaler}/_ML_scaler.py +8 -11
  96. ml_tools/ML_scaler/__init__.py +10 -0
  97. ml_tools/ML_scaler/_imprimir.py +8 -0
  98. ml_tools/ML_trainer/__init__.py +20 -0
  99. ml_tools/ML_trainer/_base_trainer.py +297 -0
  100. ml_tools/ML_trainer/_dragon_detection_trainer.py +402 -0
  101. ml_tools/ML_trainer/_dragon_sequence_trainer.py +540 -0
  102. ml_tools/ML_trainer/_dragon_trainer.py +1160 -0
  103. ml_tools/ML_trainer/_imprimir.py +10 -0
  104. ml_tools/{ML_utilities.py → ML_utilities/__init__.py} +14 -6
  105. ml_tools/ML_utilities/_artifact_finder.py +382 -0
  106. ml_tools/ML_utilities/_imprimir.py +16 -0
  107. ml_tools/ML_utilities/_inspection.py +325 -0
  108. ml_tools/ML_utilities/_train_tools.py +205 -0
  109. ml_tools/{ML_vision_transformers.py → ML_vision_transformers/__init__.py} +9 -6
  110. ml_tools/{_core/_ML_vision_transformers.py → ML_vision_transformers/_core_transforms.py} +11 -155
  111. ml_tools/ML_vision_transformers/_imprimir.py +14 -0
  112. ml_tools/ML_vision_transformers/_offline_augmentation.py +159 -0
  113. ml_tools/{_core/_PSO_optimization.py → PSO_optimization/_PSO.py} +58 -15
  114. ml_tools/{PSO_optimization.py → PSO_optimization/__init__.py} +5 -3
  115. ml_tools/PSO_optimization/_imprimir.py +10 -0
  116. ml_tools/SQL/__init__.py +7 -0
  117. ml_tools/{_core/_SQL.py → SQL/_dragon_SQL.py} +7 -11
  118. ml_tools/SQL/_imprimir.py +8 -0
  119. ml_tools/{_core → VIF}/_VIF_factor.py +5 -8
  120. ml_tools/{VIF_factor.py → VIF/__init__.py} +4 -2
  121. ml_tools/VIF/_imprimir.py +10 -0
  122. ml_tools/_core/__init__.py +7 -1
  123. ml_tools/_core/_logger.py +8 -18
  124. ml_tools/_core/_schema_load_ops.py +43 -0
  125. ml_tools/_core/_script_info.py +2 -2
  126. ml_tools/{data_exploration.py → data_exploration/__init__.py} +32 -16
  127. ml_tools/data_exploration/_analysis.py +214 -0
  128. ml_tools/data_exploration/_cleaning.py +566 -0
  129. ml_tools/data_exploration/_features.py +583 -0
  130. ml_tools/data_exploration/_imprimir.py +32 -0
  131. ml_tools/data_exploration/_plotting.py +487 -0
  132. ml_tools/data_exploration/_schema_ops.py +176 -0
  133. ml_tools/{ensemble_evaluation.py → ensemble_evaluation/__init__.py} +6 -4
  134. ml_tools/{_core → ensemble_evaluation}/_ensemble_evaluation.py +3 -7
  135. ml_tools/ensemble_evaluation/_imprimir.py +14 -0
  136. ml_tools/{ensemble_inference.py → ensemble_inference/__init__.py} +5 -3
  137. ml_tools/{_core → ensemble_inference}/_ensemble_inference.py +15 -18
  138. ml_tools/ensemble_inference/_imprimir.py +9 -0
  139. ml_tools/{ensemble_learning.py → ensemble_learning/__init__.py} +4 -6
  140. ml_tools/{_core → ensemble_learning}/_ensemble_learning.py +7 -10
  141. ml_tools/ensemble_learning/_imprimir.py +10 -0
  142. ml_tools/{excel_handler.py → excel_handler/__init__.py} +5 -3
  143. ml_tools/{_core → excel_handler}/_excel_handler.py +6 -10
  144. ml_tools/excel_handler/_imprimir.py +13 -0
  145. ml_tools/{keys.py → keys/__init__.py} +4 -1
  146. ml_tools/keys/_imprimir.py +11 -0
  147. ml_tools/{_core → keys}/_keys.py +2 -0
  148. ml_tools/{math_utilities.py → math_utilities/__init__.py} +5 -2
  149. ml_tools/math_utilities/_imprimir.py +11 -0
  150. ml_tools/{_core → math_utilities}/_math_utilities.py +1 -5
  151. ml_tools/{optimization_tools.py → optimization_tools/__init__.py} +9 -4
  152. ml_tools/optimization_tools/_imprimir.py +13 -0
  153. ml_tools/optimization_tools/_optimization_bounds.py +236 -0
  154. ml_tools/optimization_tools/_optimization_plots.py +218 -0
  155. ml_tools/{path_manager.py → path_manager/__init__.py} +6 -3
  156. ml_tools/{_core/_path_manager.py → path_manager/_dragonmanager.py} +11 -347
  157. ml_tools/path_manager/_imprimir.py +15 -0
  158. ml_tools/path_manager/_path_tools.py +346 -0
  159. ml_tools/plot_fonts/__init__.py +8 -0
  160. ml_tools/plot_fonts/_imprimir.py +8 -0
  161. ml_tools/{_core → plot_fonts}/_plot_fonts.py +2 -5
  162. ml_tools/schema/__init__.py +15 -0
  163. ml_tools/schema/_feature_schema.py +223 -0
  164. ml_tools/schema/_gui_schema.py +191 -0
  165. ml_tools/schema/_imprimir.py +10 -0
  166. ml_tools/{serde.py → serde/__init__.py} +4 -2
  167. ml_tools/serde/_imprimir.py +10 -0
  168. ml_tools/{_core → serde}/_serde.py +3 -8
  169. ml_tools/{utilities.py → utilities/__init__.py} +11 -6
  170. ml_tools/utilities/_imprimir.py +18 -0
  171. ml_tools/{_core/_utilities.py → utilities/_utility_save_load.py} +13 -190
  172. ml_tools/utilities/_utility_tools.py +192 -0
  173. dragon_ml_toolbox-19.14.0.dist-info/RECORD +0 -111
  174. ml_tools/ML_chaining_inference.py +0 -8
  175. ml_tools/ML_configuration.py +0 -86
  176. ml_tools/ML_configuration_pytab.py +0 -14
  177. ml_tools/ML_datasetmaster.py +0 -10
  178. ml_tools/ML_evaluation.py +0 -16
  179. ml_tools/ML_evaluation_multi.py +0 -12
  180. ml_tools/ML_finalize_handler.py +0 -8
  181. ml_tools/ML_inference.py +0 -12
  182. ml_tools/ML_models.py +0 -14
  183. ml_tools/ML_models_advanced.py +0 -14
  184. ml_tools/ML_models_pytab.py +0 -14
  185. ml_tools/ML_optimization.py +0 -14
  186. ml_tools/ML_optimization_pareto.py +0 -8
  187. ml_tools/ML_scaler.py +0 -8
  188. ml_tools/ML_sequence_datasetmaster.py +0 -8
  189. ml_tools/ML_sequence_evaluation.py +0 -10
  190. ml_tools/ML_sequence_inference.py +0 -8
  191. ml_tools/ML_sequence_models.py +0 -8
  192. ml_tools/ML_trainer.py +0 -12
  193. ml_tools/ML_vision_datasetmaster.py +0 -12
  194. ml_tools/ML_vision_evaluation.py +0 -10
  195. ml_tools/ML_vision_inference.py +0 -8
  196. ml_tools/ML_vision_models.py +0 -18
  197. ml_tools/SQL.py +0 -8
  198. ml_tools/_core/_ETL_cleaning.py +0 -694
  199. ml_tools/_core/_IO_tools.py +0 -498
  200. ml_tools/_core/_ML_callbacks.py +0 -702
  201. ml_tools/_core/_ML_configuration.py +0 -1332
  202. ml_tools/_core/_ML_configuration_pytab.py +0 -102
  203. ml_tools/_core/_ML_evaluation.py +0 -867
  204. ml_tools/_core/_ML_evaluation_multi.py +0 -544
  205. ml_tools/_core/_ML_inference.py +0 -646
  206. ml_tools/_core/_ML_models.py +0 -668
  207. ml_tools/_core/_ML_models_pytab.py +0 -693
  208. ml_tools/_core/_ML_trainer.py +0 -2323
  209. ml_tools/_core/_ML_utilities.py +0 -886
  210. ml_tools/_core/_ML_vision_models.py +0 -644
  211. ml_tools/_core/_data_exploration.py +0 -1909
  212. ml_tools/_core/_optimization_tools.py +0 -493
  213. ml_tools/_core/_schema.py +0 -359
  214. ml_tools/plot_fonts.py +0 -8
  215. ml_tools/schema.py +0 -12
  216. {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/WHEEL +0 -0
  217. {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE +0 -0
  218. {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
  219. {dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/top_level.txt +0 -0
@@ -1,22 +1,16 @@
1
1
  import polars as pl
2
2
  import re
3
- from pathlib import Path
4
- from typing import Literal, Union, Optional, Any, Callable, List, Dict, Tuple
3
+ from typing import Literal, Union, Optional
5
4
 
6
- from ._utilities import load_dataframe, save_dataframe_filename
7
- from ._path_manager import make_fullpath
8
- from ._script_info import _script_info
9
- from ._logger import get_logger
10
5
  from ..constants import CHEMICAL_ELEMENT_SYMBOLS
11
- from ._keys import MagicWords
12
6
 
7
+ from .._core import get_logger
13
8
 
14
- _LOGGER = get_logger("ETL Engineering")
9
+
10
+ _LOGGER = get_logger("ETL Transforms")
15
11
 
16
12
 
17
13
  __all__ = [
18
- "DragonTransformRecipe",
19
- "DragonProcessor",
20
14
  "BinaryTransformer",
21
15
  "MultiBinaryDummifier",
22
16
  "AutoDummifier",
@@ -34,250 +28,6 @@ __all__ = [
34
28
  "MolecularFormulaTransformer"
35
29
  ]
36
30
 
37
- ############ TRANSFORM MAIN ####################
38
-
39
- class DragonTransformRecipe:
40
- """
41
- A builder class for creating a data transformation recipe.
42
-
43
- This class provides a structured way to define a series of transformation
44
- steps, with validation performed at the time of addition. It is designed
45
- to be passed to a `DragonProcessor`.
46
-
47
- Use the method `add()` to add recipes.
48
- """
49
- def __init__(self):
50
- self._steps: List[Dict[str, Any]] = []
51
-
52
- def add(
53
- self,
54
- input_col_name: str,
55
- transform: Union[str, Callable],
56
- output_col_names: Optional[Union[str, List[str]]] = None
57
- ) -> "DragonTransformRecipe":
58
- """
59
- Adds a new transformation step to the recipe.
60
-
61
- Args:
62
- input_col_name: The name of the column from the source DataFrame.
63
- output_col_names: The desired name(s) for the output column(s).
64
- - A string for a 1-to-1 mapping.
65
- - A list of strings for a 1-to-many mapping.
66
- - A string prefix for 1-to-many mapping.
67
- - If None, the input name is used for 1-to-1 transforms,
68
- or the transformer's default names are used for 1-to-many.
69
- transform: The transformation to apply:
70
- - Use "rename" for simple column renaming
71
- - If callable, must accept a `pl.Series` as the only parameter and return either a `pl.Series` or `pl.DataFrame`.
72
-
73
- Returns:
74
- The instance of the recipe itself to allow for method chaining.
75
- """
76
- # --- Validation ---
77
- if not isinstance(input_col_name, str) or not input_col_name:
78
- _LOGGER.error("'input_col' must be a non-empty string.")
79
- raise TypeError()
80
-
81
- if transform == MagicWords.RENAME:
82
- if not isinstance(output_col_names, str):
83
- _LOGGER.error("For a RENAME operation, 'output_col' must be a string.")
84
- raise TypeError()
85
- elif not isinstance(transform, Callable):
86
- _LOGGER.error(f"'transform' must be a callable function or the string '{MagicWords.RENAME}'.")
87
- raise TypeError()
88
-
89
- # --- Add Step ---
90
- step = {
91
- "input_col": input_col_name,
92
- "output_col": output_col_names,
93
- "transform": transform,
94
- }
95
- self._steps.append(step)
96
- return self # Allow chaining: recipe.add(...).add(...)
97
-
98
- def __iter__(self):
99
- """Allows the class to be iterated over, like a list."""
100
- return iter(self._steps)
101
-
102
- def __len__(self):
103
- """Allows the len() function to be used on an instance."""
104
- return len(self._steps)
105
-
106
-
107
- class DragonProcessor:
108
- """
109
- Transforms a Polars DataFrame based on a provided `DragonTransformRecipe` object.
110
-
111
- Use the methods `transform()` or `load_transform_save()`.
112
- """
113
- def __init__(self, recipe: DragonTransformRecipe):
114
- """
115
- Initializes the DragonProcessor with a transformation recipe.
116
-
117
- Args:
118
- recipe: An instance of the `DragonTransformRecipe` class that has
119
- been populated with transformation steps.
120
- """
121
- if not isinstance(recipe, DragonTransformRecipe):
122
- _LOGGER.error("The recipe must be an instance of DragonTransformRecipe.")
123
- raise TypeError()
124
- if len(recipe) == 0:
125
- _LOGGER.error("The recipe cannot be empty.")
126
- raise ValueError()
127
- self._recipe = recipe
128
-
129
- def transform(self, df: pl.DataFrame) -> pl.DataFrame:
130
- """
131
- Applies the transformation recipe to the input DataFrame.
132
- """
133
- processed_columns = []
134
- # Recipe object is iterable
135
- for step in self._recipe:
136
- input_col_name = step["input_col"]
137
- output_col_spec = step["output_col"]
138
- transform_action = step["transform"]
139
-
140
- if input_col_name not in df.columns:
141
- _LOGGER.error(f"Input column '{input_col_name}' not found in DataFrame.")
142
- raise ValueError()
143
-
144
- input_series = df.get_column(input_col_name)
145
-
146
- if transform_action == MagicWords.RENAME:
147
- processed_columns.append(input_series.alias(output_col_spec))
148
- continue
149
-
150
- if isinstance(transform_action, Callable):
151
- result = transform_action(input_series)
152
-
153
- if isinstance(result, pl.Series):
154
- # Default to input name if spec is None
155
- output_name = output_col_spec if output_col_spec is not None else input_col_name
156
-
157
- if not isinstance(output_name, str):
158
- _LOGGER.error(f"Function for '{input_col_name}' returned a Series but 'output_col' must be a string or None.")
159
- raise TypeError()
160
- processed_columns.append(result.alias(output_name))
161
-
162
- elif isinstance(result, pl.DataFrame):
163
- # 1. Handle None in output names
164
- if output_col_spec is None:
165
- # Use the column names generated by the transformer directly
166
- processed_columns.extend(result.get_columns())
167
-
168
- # 2. Handle list-based renaming
169
- elif isinstance(output_col_spec, list):
170
- if len(result.columns) != len(output_col_spec):
171
- _LOGGER.error(f"Mismatch in '{input_col_name}': function produced {len(result.columns)} columns, but recipe specifies {len(output_col_spec)} output names.")
172
- raise ValueError()
173
-
174
- renamed_df = result.rename(dict(zip(result.columns, output_col_spec)))
175
- processed_columns.extend(renamed_df.get_columns())
176
-
177
- # 3. Global logic for adding a single prefix to all columns.
178
- elif isinstance(output_col_spec, str):
179
- prefix = output_col_spec
180
- new_names = {}
181
-
182
- for col in result.columns:
183
- # Case 1: Transformer's output column name contains the input name.
184
- # Action: Replace the input name with the desired prefix.
185
- # Example: input='color', output='color_red', prefix='spec' -> 'spec_red'
186
- # if input_col_name in col:
187
- if col.startswith(input_col_name):
188
- new_names[col] = col.replace(input_col_name, prefix, 1)
189
-
190
- # Case 2: Transformer's output is an independent name.
191
- # Action: Prepend the prefix to the output name.
192
- # Example: input='ratio', output='A_B', prefix='spec' -> 'spec_A_B'
193
- else:
194
- new_names[col] = f"{prefix}_{col}"
195
-
196
- renamed_df = result.rename(new_names)
197
- processed_columns.extend(renamed_df.get_columns())
198
-
199
-
200
- else:
201
- _LOGGER.error(f"Function for '{input_col_name}' returned a DataFrame, so 'output_col' must be a list of names, a string prefix, or None.")
202
- raise TypeError()
203
-
204
- else:
205
- _LOGGER.error(f"Function for '{input_col_name}' returned an unexpected type: {type(result)}.")
206
- raise TypeError()
207
-
208
- else: # This case is unlikely due to builder validation.
209
- _LOGGER.error(f"Invalid 'transform' action for '{input_col_name}': {transform_action}")
210
- raise TypeError()
211
-
212
- if not processed_columns:
213
- _LOGGER.error("The transformation resulted in an empty DataFrame.")
214
- return pl.DataFrame()
215
-
216
- _LOGGER.info(f"Processed dataframe with {len(processed_columns)} columns.")
217
-
218
- return pl.DataFrame(processed_columns)
219
-
220
- def load_transform_save(self, input_path: Union[str,Path], output_path: Union[str,Path]):
221
- """
222
- Convenience wrapper for the transform method that includes automatic dataframe loading and saving.
223
- """
224
- # Validate paths
225
- in_path = make_fullpath(input_path, enforce="file")
226
- out_path = make_fullpath(output_path, make=True, enforce="file")
227
-
228
- # load df
229
- df, _ = load_dataframe(df_path=in_path, kind="polars", all_strings=True)
230
-
231
- # Process
232
- df_processed = self.transform(df)
233
-
234
- # save processed df
235
- save_dataframe_filename(df=df_processed, save_dir=out_path.parent, filename=out_path.name)
236
-
237
- def __str__(self) -> str:
238
- """
239
- Provides a detailed, human-readable string representation of the
240
- entire processing pipeline.
241
- """
242
- header = "DragonProcessor Pipeline"
243
- divider = "-" * len(header)
244
- num_steps = len(self._recipe)
245
-
246
- lines = [
247
- header,
248
- divider,
249
- f"Number of steps: {num_steps}\n"
250
- ]
251
-
252
- if num_steps == 0:
253
- lines.append("No transformation steps defined.")
254
- return "\n".join(lines)
255
-
256
- for i, step in enumerate(self._recipe, 1):
257
- transform_action = step["transform"]
258
-
259
- # Get a clean name for the transformation action
260
- if transform_action == MagicWords.RENAME: # "rename"
261
- transform_name = "Rename"
262
- else:
263
- # This works for both functions and class instances
264
- transform_name = type(transform_action).__name__
265
-
266
- lines.append(f"[{i}] Input: '{step['input_col']}'")
267
- lines.append(f" - Transform: {transform_name}")
268
- lines.append(f" - Output(s): {step['output_col']}")
269
- if i < num_steps:
270
- lines.append("") # Add a blank line between steps
271
-
272
- return "\n".join(lines)
273
-
274
- def inspect(self) -> None:
275
- """
276
- Prints the detailed string representation of the pipeline to the console.
277
- """
278
- print(self)
279
-
280
- ############ TRANSFORMERS ####################
281
31
 
282
32
  class BinaryTransformer:
283
33
  """
@@ -285,8 +35,8 @@ class BinaryTransformer:
285
35
  """
286
36
  def __init__(
287
37
  self,
288
- true_keywords: Optional[List[str]] = None,
289
- false_keywords: Optional[List[str]] = None,
38
+ true_keywords: Optional[list[str]] = None,
39
+ false_keywords: Optional[list[str]] = None,
290
40
  case_insensitive: bool = True,
291
41
  use_regex: bool = False
292
42
  ):
@@ -407,7 +157,7 @@ class MultiBinaryDummifier:
407
157
  text column based on a list of keywords or regex patterns.
408
158
  """
409
159
  def __init__(self,
410
- keywords: List[str],
160
+ keywords: list[str],
411
161
  case_insensitive: bool = True,
412
162
  use_regex: bool = False):
413
163
  """
@@ -499,8 +249,8 @@ class KeywordDummifier:
499
249
  """
500
250
  def __init__(
501
251
  self,
502
- group_names: List[str],
503
- group_keywords: List[List[str]],
252
+ group_names: list[str],
253
+ group_keywords: list[list[str]],
504
254
  case_insensitive: bool = True,
505
255
  use_regex: bool = False
506
256
  ):
@@ -1099,7 +849,7 @@ class CategoryMapper:
1099
849
  """
1100
850
  def __init__(
1101
851
  self,
1102
- mapping: Dict[str, Union[int, float]],
852
+ mapping: dict[str, Union[int, float]],
1103
853
  unseen_value: Optional[Union[int, float]] = None,
1104
854
  ):
1105
855
  if not isinstance(mapping, dict):
@@ -1160,7 +910,7 @@ class RegexMapper:
1160
910
  """
1161
911
  def __init__(
1162
912
  self,
1163
- mapping: Dict[str, Union[int, float]],
913
+ mapping: dict[str, Union[int, float]],
1164
914
  unseen_value: Optional[Union[int, float]] = None,
1165
915
  case_insensitive: bool = True,
1166
916
  ):
@@ -1173,7 +923,7 @@ class RegexMapper:
1173
923
 
1174
924
  # --- Process and validate patterns ---
1175
925
  # Process patterns here to be more efficient, avoiding reprocessing on every __call__.
1176
- self.processed_mapping: List[Tuple[str, Union[int, float]]] = []
926
+ self.processed_mapping: list[tuple[str, Union[int, float]]] = []
1177
927
  for pattern, value in mapping.items():
1178
928
  final_pattern = f"(?i){pattern}" if case_insensitive else pattern
1179
929
 
@@ -1224,7 +974,7 @@ class ValueBinner:
1224
974
  """
1225
975
  def __init__(
1226
976
  self,
1227
- breaks: List[Union[int, float]],
977
+ breaks: list[Union[int, float]],
1228
978
  left_closed: bool = False,
1229
979
  ):
1230
980
  """
@@ -1294,7 +1044,7 @@ class DateFeatureExtractor:
1294
1044
 
1295
1045
  def __init__(
1296
1046
  self,
1297
- features: List[str],
1047
+ features: list[str],
1298
1048
  format: Optional[str] = None,
1299
1049
  ):
1300
1050
  """
@@ -1422,6 +1172,3 @@ class MolecularFormulaTransformer:
1422
1172
 
1423
1173
  return base_df.select(select_expressions)
1424
1174
 
1425
-
1426
- def info():
1427
- _script_info(__all__)
@@ -3,14 +3,13 @@ from pathlib import Path
3
3
  import traceback
4
4
  import FreeSimpleGUI as sg
5
5
  from functools import wraps
6
- from typing import Any, Dict, Tuple, List, Literal, Union, Optional, Callable
6
+ from typing import Any, Literal, Union, Optional, Callable
7
7
  import numpy as np
8
8
  import json
9
9
 
10
- from ._script_info import _script_info
11
- from ._path_manager import make_fullpath
12
- from ._logger import get_logger
13
- from ._keys import _OneHotOtherPlaceholder, SchemaKeys
10
+ from ..path_manager import make_fullpath
11
+ from .._core import get_logger
12
+ from ..keys._keys import _OneHotOtherPlaceholder, SchemaKeys
14
13
 
15
14
 
16
15
  _LOGGER = get_logger("GUI Tools")
@@ -19,9 +18,9 @@ _LOGGER = get_logger("GUI Tools")
19
18
  __all__ = [
20
19
  "DragonGUIConfig",
21
20
  "DragonGUIFactory",
22
- "catch_exceptions",
23
21
  "DragonFeatureMaster",
24
- "DragonGUIHandler"
22
+ "DragonGUIHandler",
23
+ "catch_exceptions",
25
24
  ]
26
25
 
27
26
  # --- Configuration Management ---
@@ -197,7 +196,7 @@ class DragonGUIFactory:
197
196
  }
198
197
  return sg.Button(text.title(), key=key, **style_args)
199
198
 
200
- def make_frame(self, title: str, layout: List[List[Union[sg.Element, sg.Column]]], center_layout: bool = False, **kwargs) -> sg.Frame:
199
+ def make_frame(self, title: str, layout: list[list[Union[sg.Element, sg.Column]]], center_layout: bool = False, **kwargs) -> sg.Frame:
201
200
  """
202
201
  Creates a styled frame around a given layout.
203
202
 
@@ -226,12 +225,12 @@ class DragonGUIFactory:
226
225
  # --- General-Purpose Layout Generators ---
227
226
  def generate_continuous_layout(
228
227
  self,
229
- data_dict: Dict[str, Union[Tuple[Union[int,float,None], Union[int,float,None]],List[Union[int,float,None]]]],
228
+ data_dict: dict[str, Union[tuple[Union[int,float,None], Union[int,float,None]],list[Union[int,float,None]]]],
230
229
  is_target: bool = False,
231
230
  layout_mode: Literal["grid", "row"] = 'grid',
232
231
  number_columns: int = 5,
233
232
  center_layout: bool = True
234
- ) -> List[List[sg.Column]]:
233
+ ) -> list[list[sg.Column]]:
235
234
  """
236
235
  Generates a layout for continuous features or targets.
237
236
 
@@ -292,11 +291,11 @@ class DragonGUIFactory:
292
291
 
293
292
  def generate_combo_layout(
294
293
  self,
295
- data_dict: Dict[str, Union[List[Any],Tuple[Any,...]]],
294
+ data_dict: dict[str, Union[list[Any],tuple[Any,...]]],
296
295
  layout_mode: Literal["grid", "row"] = 'grid',
297
296
  number_columns: int = 5,
298
297
  center_layout: bool = True
299
- ) -> List[List[sg.Column]]:
298
+ ) -> list[list[sg.Column]]:
300
299
  """
301
300
  Generates a layout for categorical or binary features using Combo boxes.
302
301
 
@@ -334,11 +333,11 @@ class DragonGUIFactory:
334
333
 
335
334
  def generate_multiselect_layout(
336
335
  self,
337
- data_dict: Dict[str, Union[List[Any], Tuple[Any, ...]]],
336
+ data_dict: dict[str, Union[list[Any], tuple[Any, ...]]],
338
337
  layout_mode: Literal["grid", "row"] = 'grid',
339
338
  number_columns: int = 5,
340
339
  center_layout: bool = True
341
- ) -> List[List[sg.Column]]:
340
+ ) -> list[list[sg.Column]]:
342
341
  """
343
342
  Generates a layout for features using Listbox elements for multiple selections.
344
343
 
@@ -386,7 +385,7 @@ class DragonGUIFactory:
386
385
  return self._build_grid_layout(all_feature_layouts, number_columns, bg_color, center_layout) # type: ignore
387
386
 
388
387
  # --- Window Creation ---
389
- def create_window(self, title: str, layout: List[List[sg.Element]], **kwargs) -> sg.Window:
388
+ def create_window(self, title: str, layout: list[list[sg.Element]], **kwargs) -> sg.Window:
390
389
  """
391
390
  Creates and finalizes the main application window.
392
391
 
@@ -413,7 +412,7 @@ class DragonGUIFactory:
413
412
 
414
413
  return window
415
414
 
416
- def _build_grid_layout(self, all_feature_layouts: List[sg.Column], num_columns: int, bg_color: str, center_layout: bool = True) -> List[List[sg.Column]]:
415
+ def _build_grid_layout(self, all_feature_layouts: list[sg.Column], num_columns: int, bg_color: str, center_layout: bool = True) -> list[list[sg.Column]]:
417
416
  """
418
417
  Private helper to distribute feature layouts vertically into a grid of columns.
419
418
  """
@@ -485,12 +484,12 @@ class DragonFeatureMaster:
485
484
  for each feature type.
486
485
  """
487
486
  def __init__(self,
488
- targets: Dict[str, str],
489
- continuous_features: Optional[Dict[str, Tuple[str, float, float]]] = None,
490
- binary_features: Optional[Dict[str, str]] = None,
491
- multi_binary_features: Optional[Dict[str, Dict[str, str]]] = None,
492
- one_hot_features: Optional[Dict[str, Dict[str, str]]] = None,
493
- categorical_features: Optional[List[Tuple[str, str, Dict[str, int]]]] = None,
487
+ targets: dict[str, str],
488
+ continuous_features: Optional[dict[str, tuple[str, float, float]]] = None,
489
+ binary_features: Optional[dict[str, str]] = None,
490
+ multi_binary_features: Optional[dict[str, dict[str, str]]] = None,
491
+ one_hot_features: Optional[dict[str, dict[str, str]]] = None,
492
+ categorical_features: Optional[list[tuple[str, str, dict[str, int]]]] = None,
494
493
  add_one_hot_other_placeholder: bool = True) -> None:
495
494
  """
496
495
  Initializes the DragonFeatureMaster instance by processing feature and target definitions.
@@ -690,26 +689,26 @@ class DragonFeatureMaster:
690
689
  add_one_hot_other_placeholder=False
691
690
  )
692
691
 
693
- def _handle_targets(self, targets: Dict[str, str]):
692
+ def _handle_targets(self, targets: dict[str, str]):
694
693
  # Make dictionary GUI name: range values
695
694
  gui_values: dict[str, tuple[None,None]] = {gui_key: (None, None) for gui_key in targets.keys()}
696
695
  # Map GUI name to Model name (same as input)
697
696
  return gui_values
698
697
 
699
- def _handle_continuous_features(self, continuous_features: Dict[str, Tuple[str, float, float]]):
698
+ def _handle_continuous_features(self, continuous_features: dict[str, tuple[str, float, float]]):
700
699
  # Make dictionary GUI name: range values
701
700
  gui_values: dict[str, tuple[float,float]] = {gui_key: (tuple_values[1], tuple_values[2]) for gui_key, tuple_values in continuous_features.items()}
702
701
  # Map GUI name to Model name
703
702
  gui_to_model: dict[str,str] = {gui_key: tuple_values[0] for gui_key, tuple_values in continuous_features.items()}
704
703
  return gui_values, gui_to_model
705
704
 
706
- def _handle_binary_features(self, binary_features: Dict[str, str]):
705
+ def _handle_binary_features(self, binary_features: dict[str, str]):
707
706
  # Make dictionary GUI name: range values
708
707
  gui_values: dict[str, tuple[Literal["False"],Literal["True"]]] = {gui_key: ("False", "True") for gui_key in binary_features.keys()}
709
708
  # Map GUI name to Model name (same as input)
710
709
  return gui_values
711
710
 
712
- def _handle_multi_binary_features(self, multi_binary_features: Dict[str, Dict[str, str]]):
711
+ def _handle_multi_binary_features(self, multi_binary_features: dict[str, dict[str, str]]):
713
712
  # Make dictionary GUI name: range values
714
713
  gui_values: dict[str, tuple[str,...]] = {
715
714
  gui_key: tuple(nested_dict.keys())
@@ -717,13 +716,13 @@ class DragonFeatureMaster:
717
716
  # Map GUI name to Model name and preserve internal mapping (same as input)
718
717
  return gui_values
719
718
 
720
- def _handle_one_hot_features(self, one_hot_features: Dict[str, Dict[str,str]]):
719
+ def _handle_one_hot_features(self, one_hot_features: dict[str, dict[str,str]]):
721
720
  # Make dictionary GUI name: range values
722
721
  gui_values: dict[str, tuple[str,...]] = {gui_key: tuple(nested_dict.keys()) for gui_key, nested_dict in one_hot_features.items()}
723
722
  # Map GUI name to Model name and preserve internal mapping (same as input)
724
723
  return gui_values
725
724
 
726
- def _handle_categorical_features(self, categorical_features: List[Tuple[str, str, Dict[str, int]]]):
725
+ def _handle_categorical_features(self, categorical_features: list[tuple[str, str, dict[str, int]]]):
727
726
  # Make dictionary GUI name: range values
728
727
  gui_values: dict[str, tuple[str,...]] = {gui_key: tuple(gui_options.keys()) for gui_key, _, gui_options in categorical_features}
729
728
  # Map GUI name to Model name and preserve internal mapping
@@ -939,7 +938,7 @@ class DragonGUIHandler:
939
938
  # Feature master instance
940
939
  self.master = feature_handler
941
940
 
942
- def _process_continuous(self, gui_feature: str, chosen_value: Any) -> Tuple[str,float]:
941
+ def _process_continuous(self, gui_feature: str, chosen_value: Any) -> tuple[str,float]:
943
942
  """
944
943
  Maps GUI name to model expected name and casts the value to float.
945
944
  """
@@ -955,7 +954,7 @@ class DragonGUIHandler:
955
954
  else:
956
955
  return model_name, float_value
957
956
 
958
- def _process_binary(self, gui_feature: str, chosen_value: str) -> Tuple[str,int]:
957
+ def _process_binary(self, gui_feature: str, chosen_value: str) -> tuple[str,int]:
959
958
  """
960
959
  Maps GUI name to model expected name and casts the value to binary (0,1).
961
960
  """
@@ -996,7 +995,7 @@ class DragonGUIHandler:
996
995
 
997
996
  return results
998
997
 
999
- def _process_one_hot(self, gui_feature: str, chosen_value: str) -> Dict[str,int]:
998
+ def _process_one_hot(self, gui_feature: str, chosen_value: str) -> dict[str,int]:
1000
999
  """
1001
1000
  Maps GUI names to model expected names and casts values to one-hot encoding.
1002
1001
  """
@@ -1019,7 +1018,7 @@ class DragonGUIHandler:
1019
1018
 
1020
1019
  return results
1021
1020
 
1022
- def _process_categorical(self, gui_feature: str, chosen_value: str) -> Tuple[str,int]:
1021
+ def _process_categorical(self, gui_feature: str, chosen_value: str) -> tuple[str,int]:
1023
1022
  """
1024
1023
  Maps GUI name to model expected name and casts the value to a categorical number.
1025
1024
  """
@@ -1034,7 +1033,7 @@ class DragonGUIHandler:
1034
1033
  result = categorical_mapping[chosen_value]
1035
1034
  return model_name, result
1036
1035
 
1037
- def update_target_fields(self, window: sg.Window, inference_results: Dict[str, Any]):
1036
+ def update_target_fields(self, window: sg.Window, inference_results: dict[str, Any]):
1038
1037
  """
1039
1038
  Updates the GUI's target fields with inference results.
1040
1039
 
@@ -1051,8 +1050,8 @@ class DragonGUIHandler:
1051
1050
  display_value = f"{result:.2f}" if isinstance(result, (int, float)) else result
1052
1051
  window[gui_key].update(display_value) # type: ignore
1053
1052
 
1054
- def _call_subprocess(self, window_values: Dict[str,Any], master_feature: Dict[str,str], processor: Callable) -> Dict[str, Union[float,int]]:
1055
- processed_features_subset: Dict[str, Union[float,int]] = dict()
1053
+ def _call_subprocess(self, window_values: dict[str,Any], master_feature: dict[str,str], processor: Callable) -> dict[str, Union[float,int]]:
1054
+ processed_features_subset: dict[str, Union[float,int]] = dict()
1056
1055
 
1057
1056
  for gui_name in master_feature.keys():
1058
1057
  chosen_value = window_values.get(gui_name)
@@ -1071,12 +1070,12 @@ class DragonGUIHandler:
1071
1070
 
1072
1071
  return processed_features_subset
1073
1072
 
1074
- def process_features(self, window_values: Dict[str, Any]) -> np.ndarray:
1073
+ def process_features(self, window_values: dict[str, Any]) -> np.ndarray:
1075
1074
  """
1076
1075
  Translates GUI values to a model-expected input array, returning a 1D numpy array.
1077
1076
  """
1078
1077
  # Stage 1: Process GUI inputs into a dictionary
1079
- processed_features: Dict[str, Union[float,int]] = {}
1078
+ processed_features: dict[str, Union[float,int]] = {}
1080
1079
 
1081
1080
  if self.master.has_continuous:
1082
1081
  processed_subset = self._call_subprocess(window_values=window_values,
@@ -1109,7 +1108,7 @@ class DragonGUIHandler:
1109
1108
  processed_features.update(processed_subset)
1110
1109
 
1111
1110
  # Stage 2: Assemble the final vector using the model's required order
1112
- final_vector: List[float] = list()
1111
+ final_vector: list[float] = list()
1113
1112
 
1114
1113
  try:
1115
1114
  for feature_name in self.model_expected_features:
@@ -1119,5 +1118,3 @@ class DragonGUIHandler:
1119
1118
 
1120
1119
  return np.array(final_vector, dtype=np.float32)
1121
1120
 
1122
- def info():
1123
- _script_info(__all__)
@@ -1,16 +1,18 @@
1
- from ._core._GUI_tools import (
1
+ from ._GUI_tools import (
2
2
  DragonGUIConfig,
3
3
  DragonGUIFactory,
4
- catch_exceptions,
5
4
  DragonFeatureMaster,
6
5
  DragonGUIHandler,
7
- info
6
+ catch_exceptions,
8
7
  )
9
8
 
9
+ from ._imprimir import info
10
+
11
+
10
12
  __all__ = [
11
13
  "DragonGUIConfig",
12
14
  "DragonGUIFactory",
13
- "catch_exceptions",
14
15
  "DragonFeatureMaster",
15
- "DragonGUIHandler"
16
+ "DragonGUIHandler",
17
+ "catch_exceptions",
16
18
  ]
@@ -0,0 +1,12 @@
1
+ from .._core import _imprimir_disponibles
2
+
3
+ _GRUPOS = [
4
+ "DragonGUIConfig",
5
+ "DragonGUIFactory",
6
+ "DragonFeatureMaster",
7
+ "DragonGUIHandler",
8
+ "catch_exceptions"
9
+ ]
10
+
11
+ def info():
12
+ _imprimir_disponibles(_GRUPOS)