dragon-ml-toolbox 14.3.1__py3-none-any.whl → 16.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (44) hide show
  1. {dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/METADATA +10 -5
  2. dragon_ml_toolbox-16.0.0.dist-info/RECORD +51 -0
  3. ml_tools/ETL_cleaning.py +20 -20
  4. ml_tools/ETL_engineering.py +23 -25
  5. ml_tools/GUI_tools.py +20 -20
  6. ml_tools/MICE_imputation.py +3 -3
  7. ml_tools/ML_callbacks.py +43 -26
  8. ml_tools/ML_configuration.py +309 -0
  9. ml_tools/ML_datasetmaster.py +220 -260
  10. ml_tools/ML_evaluation.py +317 -81
  11. ml_tools/ML_evaluation_multi.py +127 -36
  12. ml_tools/ML_inference.py +249 -207
  13. ml_tools/ML_models.py +13 -102
  14. ml_tools/ML_models_advanced.py +1 -1
  15. ml_tools/ML_optimization.py +12 -12
  16. ml_tools/ML_scaler.py +11 -11
  17. ml_tools/ML_sequence_datasetmaster.py +341 -0
  18. ml_tools/ML_sequence_evaluation.py +215 -0
  19. ml_tools/ML_sequence_inference.py +391 -0
  20. ml_tools/ML_sequence_models.py +139 -0
  21. ml_tools/ML_trainer.py +1247 -338
  22. ml_tools/ML_utilities.py +51 -2
  23. ml_tools/ML_vision_datasetmaster.py +262 -118
  24. ml_tools/ML_vision_evaluation.py +26 -6
  25. ml_tools/ML_vision_inference.py +117 -140
  26. ml_tools/ML_vision_models.py +15 -1
  27. ml_tools/ML_vision_transformers.py +233 -7
  28. ml_tools/PSO_optimization.py +6 -6
  29. ml_tools/SQL.py +4 -4
  30. ml_tools/{keys.py → _keys.py} +45 -1
  31. ml_tools/_schema.py +1 -1
  32. ml_tools/ensemble_evaluation.py +54 -11
  33. ml_tools/ensemble_inference.py +7 -33
  34. ml_tools/ensemble_learning.py +1 -1
  35. ml_tools/optimization_tools.py +2 -2
  36. ml_tools/path_manager.py +5 -5
  37. ml_tools/utilities.py +1 -2
  38. dragon_ml_toolbox-14.3.1.dist-info/RECORD +0 -48
  39. ml_tools/RNN_forecast.py +0 -56
  40. ml_tools/_ML_vision_recipe.py +0 -88
  41. {dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/WHEEL +0 -0
  42. {dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/licenses/LICENSE +0 -0
  43. {dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
  44. {dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 14.3.1
4
- Summary: A collection of tools for data science and machine learning projects.
5
- Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
3
+ Version: 16.0.0
4
+ Summary: Complete pipelines and helper tools for data science and machine learning projects.
5
+ Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://github.com/DrAg0n-BoRn/ML_tools
8
8
  Project-URL: Changelog, https://github.com/DrAg0n-BoRn/ML_tools/blob/master/CHANGELOG.md
@@ -79,7 +79,7 @@ Dynamic: license-file
79
79
 
80
80
  # dragon-ml-toolbox
81
81
 
82
- A collection of Python utilities for data science and machine learning, structured as a modular package for easy reuse and installation. This package has no base dependencies, allowing for lightweight and customized virtual environments.
82
+ A collection of machine learning pipelines and utilities, structured as modular packages for easy reuse and installation. This package has no base dependencies, allowing for lightweight and customized virtual environments.
83
83
 
84
84
  ### Features:
85
85
 
@@ -141,6 +141,7 @@ ETL_cleaning
141
141
  ETL_engineering
142
142
  math_utilities
143
143
  ML_callbacks
144
+ ML_configuration
144
145
  ML_datasetmaster
145
146
  ML_evaluation_multi
146
147
  ML_evaluation
@@ -149,6 +150,10 @@ ML_models
149
150
  ML_models_advanced # Requires the extra flag [py-tab]
150
151
  ML_optimization
151
152
  ML_scaler
153
+ ML_sequence_datasetmaster
154
+ ML_sequence_evaluation
155
+ ML_sequence_inference
156
+ ML_sequence_models
152
157
  ML_trainer
153
158
  ML_utilities
154
159
  ML_vision_datasetmaster
@@ -159,7 +164,6 @@ ML_vision_transformers
159
164
  optimization_tools
160
165
  path_manager
161
166
  PSO_optimization
162
- RNN_forecast
163
167
  serde
164
168
  SQL
165
169
  utilities
@@ -245,6 +249,7 @@ custom_logger
245
249
  GUI_tools
246
250
  ML_models
247
251
  ML_inference
252
+ ML_sequence_inference
248
253
  ML_scaler
249
254
  path_manager
250
255
  ```
@@ -0,0 +1,51 @@
1
+ dragon_ml_toolbox-16.0.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
+ dragon_ml_toolbox-16.0.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=gkOdNDbKYpIJezwSo2CEnISkLeYfYHv9t8b5K2-P69A,2687
3
+ ml_tools/ETL_cleaning.py,sha256=Bg0nTmpNzQKDdezK3m0NjYT7N8_ANGlmD9mDXjggqkA,20522
4
+ ml_tools/ETL_engineering.py,sha256=PGXvlvMWa05J1rsMNXxnHzXIe2K68qhtigSn74W8kFI,54961
5
+ ml_tools/GUI_tools.py,sha256=QMSu-8eSNminD6A6Yg9sXo4ff6GNPThwRBVgQQwAAbY,45508
6
+ ml_tools/MICE_imputation.py,sha256=2MsHeKTd8MSBIYmj0q671Fm4wCBvMGjpxULp__jDNgo,20812
7
+ ml_tools/ML_callbacks.py,sha256=EF7Px_IV3IIJpfaT0Nwbv4-_0C6IUlJ_xjzHOekXwq0,16410
8
+ ml_tools/ML_configuration.py,sha256=8O6qaCbufL9q0I299E9gNeSgZUoqKuSTIiAdKHUldaA,13177
9
+ ml_tools/ML_datasetmaster.py,sha256=j9ZPYULpeJ-XkngNnNHUju49RJD36zFV_HHs58s0c0U,27000
10
+ ml_tools/ML_evaluation.py,sha256=60Fh4IejdXMC-W6hT0MNR5Y3K7L3xYd2aY535Jsf6V4,29207
11
+ ml_tools/ML_evaluation_multi.py,sha256=QDmifXlSzwINUtVTrUyw6g801oYY2INQgKsMUnj3XRc,20155
12
+ ml_tools/ML_inference.py,sha256=CVvX55yhydaSUbwo7Ej63VFK-aeDZ5ttXU4sXY1bGQ0,27482
13
+ ml_tools/ML_models.py,sha256=OEiuUduu2KqsfXZIfzJHR3uop_Zo6dzdKtvaOeRt1G0,27932
14
+ ml_tools/ML_models_advanced.py,sha256=5Y-Kda3P972F9zyfqCS4ndqOL-XXri010nhNp_bhHvY,12411
15
+ ml_tools/ML_optimization.py,sha256=2EwaKHKoZPnvN02d4q0tLO7aBMXSO8cEuhLl0bx28bg,22692
16
+ ml_tools/ML_scaler.py,sha256=Rp6h6U013UK56XhiV_Rmj1CSMI7OSIJLqC0vn6RkiQY,7527
17
+ ml_tools/ML_sequence_datasetmaster.py,sha256=WLkZ_yBcT5bjnbZ1SaecSXBms9IqC596lOCq14D48bc,15569
18
+ ml_tools/ML_sequence_evaluation.py,sha256=AfPkl-92AZ0HYD1xfTSYJSYClvgLYUEW-7CI9ifKB08,7686
19
+ ml_tools/ML_sequence_inference.py,sha256=k9Q8nSvUGdNrmnS4uXh3DkfrxQAqJI68Zs3oaiz2daY,17876
20
+ ml_tools/ML_sequence_models.py,sha256=PVmk7nK-lIl2asR4r6XgT0TIYSJKY4Um4D65gsyE_Qw,5597
21
+ ml_tools/ML_trainer.py,sha256=pZs5rUXgrYt55KX7FoQn-xO14ByDeOf0uS7h329Wn9g,99302
22
+ ml_tools/ML_utilities.py,sha256=kj4CoI7YyAj9fipzFdl9gSYIMzDAjcPqgupTBkB1BQg,22994
23
+ ml_tools/ML_vision_datasetmaster.py,sha256=JiaWfNm34mMQjVgSFOnG_Z4IIkqvvxWfDPQdwPCoN4k,64757
24
+ ml_tools/ML_vision_evaluation.py,sha256=1ZYbB3fuMD5-pynyJuJcF3M19hacepuqVfBbyFTguL8,11331
25
+ ml_tools/ML_vision_inference.py,sha256=8PRqufE2vz_X9NOQs4psI0M7nd3HaSB33vX4VEPCrvk,19618
26
+ ml_tools/ML_vision_models.py,sha256=NojhEZcQiIZ3iKCo5eFkcxetCEVuBKbujVUNruHes-U,26175
27
+ ml_tools/ML_vision_transformers.py,sha256=CEHPzkonub4-s21hjhj30O01dr5sVj9EEkrqmnFl03Y,10749
28
+ ml_tools/PSO_optimization.py,sha256=wAi7BaY-_QoRZ8ibHD6xpyhUABofrabHV7oiryBz5D0,22931
29
+ ml_tools/SQL.py,sha256=hBTKC_OotSuWc0DeD8sI-u2GJS7X_4oANjmLcY1YW_w,11210
30
+ ml_tools/VIF_factor.py,sha256=at5IVqPvicja2-DNSTSIIy3SkzDWCmLzo3qTG_qr5n8,10422
31
+ ml_tools/__init__.py,sha256=kJiankjz9_qXu7gU92mYqYg_anLvt-B6RtW0mMH8uGo,76
32
+ ml_tools/_keys.py,sha256=57_8N-aBxon9ITC7xc00mvJL5czYgx6NVhl0KuL2l3E,4719
33
+ ml_tools/_logger.py,sha256=dlp5cGbzooK9YSNSZYB4yjZrOaQUGW8PTrM411AOvL8,4717
34
+ ml_tools/_schema.py,sha256=bE2RhOhXZd2u8MEQLOM--01ILPDxLqQAhZ3hZpFTXAI,3909
35
+ ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
36
+ ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
37
+ ml_tools/custom_logger.py,sha256=TGc0Ww2Xlqj2XE3q4bP43hV7T3qnb5ci9f0pYHXF5TY,11226
38
+ ml_tools/data_exploration.py,sha256=bwHzFJ-IAo5GN3T53F-1J_pXUg8VHS91sG_90utAsfg,69911
39
+ ml_tools/ensemble_evaluation.py,sha256=-pxhmCMPjaqSjJxXxaD_asKtoamztATjXJL7YKlsvZk,28369
40
+ ml_tools/ensemble_inference.py,sha256=uQPJiBK1GcckmeWgZn7BzaaPKIAQIBglmbUOuK9WknY,8560
41
+ ml_tools/ensemble_learning.py,sha256=Bh5WupUF93yLM5IBaQBsOqTVjWKjyfz7jN9IuRaZQ_o,21965
42
+ ml_tools/handle_excel.py,sha256=pfdAPb9ywegFkM9T54bRssDOsX-K7rSeV0RaMz7lEAo,14006
43
+ ml_tools/math_utilities.py,sha256=xeKq1quR_3DYLgowcp4Uam_4s3JltUyOnqMOGuAiYWU,8802
44
+ ml_tools/optimization_tools.py,sha256=_sCLZy9LRIIqt1zkYyKNsSbDK3JjRIhC-sADq-JtegE,12751
45
+ ml_tools/path_manager.py,sha256=2lTnhfDNdYlrqP_LGDoP51LdUf9hlTsZKuZJoYq5W-U,18462
46
+ ml_tools/serde.py,sha256=c8uDYjYry_VrLvoG4ixqDj5pij88lVn6Tu4NHcPkwDU,6943
47
+ ml_tools/utilities.py,sha256=wFwdv7xFV8Sv6kNy4_tE7RNasRs_318Zm7s65Uwu2Us,22509
48
+ dragon_ml_toolbox-16.0.0.dist-info/METADATA,sha256=TT3uGtzGsQDxZkVk-2B6ldAL0f1Xl7mzCi7O9L5Ht9k,6591
49
+ dragon_ml_toolbox-16.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
50
+ dragon_ml_toolbox-16.0.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
51
+ dragon_ml_toolbox-16.0.0.dist-info/RECORD,,
ml_tools/ETL_cleaning.py CHANGED
@@ -14,8 +14,8 @@ __all__ = [
14
14
  "save_unique_values",
15
15
  "basic_clean",
16
16
  "basic_clean_drop",
17
- "ColumnCleaner",
18
- "DataFrameCleaner"
17
+ "DragonColumnCleaner",
18
+ "DragonDataFrameCleaner"
19
19
  ]
20
20
 
21
21
 
@@ -200,11 +200,11 @@ def _cleaner_core(df_in: pl.DataFrame, all_lowercase: bool) -> pl.DataFrame:
200
200
  # Create a cleaner for every column in the dataframe
201
201
  all_columns = df_in.columns
202
202
  column_cleaners = [
203
- ColumnCleaner(col, rules=cleaning_rules, case_insensitive=True) for col in all_columns
203
+ DragonColumnCleaner(col, rules=cleaning_rules, case_insensitive=True) for col in all_columns
204
204
  ]
205
205
 
206
206
  # Instantiate and run the main dataframe cleaner
207
- df_cleaner = DataFrameCleaner(cleaners=column_cleaners)
207
+ df_cleaner = DragonDataFrameCleaner(cleaners=column_cleaners)
208
208
  df_cleaned = df_cleaner.clean(df_in, clone_df=False) # Use clone_df=False for efficiency
209
209
 
210
210
  # apply lowercase to all string columns
@@ -335,12 +335,12 @@ def basic_clean_drop(input_filepath: Union[str,Path], output_filepath: Union[str
335
335
 
336
336
 
337
337
  ########## EXTRACT and CLEAN ##########
338
- class ColumnCleaner:
338
+ class DragonColumnCleaner:
339
339
  """
340
340
  A configuration object that defines cleaning rules for a single Polars DataFrame column.
341
341
 
342
342
  This class holds a dictionary of regex-to-replacement rules, the target column name,
343
- and the case-sensitivity setting. It is intended to be used with the DataFrameCleaner.
343
+ and the case-sensitivity setting. It is intended to be used with the DragonDataFrameCleaner.
344
344
 
345
345
  Notes:
346
346
  - Define rules from most specific to more general to create a fallback system.
@@ -365,8 +365,8 @@ class ColumnCleaner:
365
365
  r'ID[- ](\\d+)': r'ID:$1'
366
366
  }
367
367
 
368
- id_cleaner = ColumnCleaner(column_name='user_id', rules=id_rules)
369
- # This object would then be passed to a DataFrameCleaner.
368
+ id_cleaner = DragonColumnCleaner(column_name='user_id', rules=id_rules)
369
+ # This object would then be passed to a DragonDataFrameCleaner.
370
370
  ```
371
371
  """
372
372
  def __init__(self, column_name: str, rules: Dict[str, str], case_insensitive: bool = True):
@@ -382,34 +382,34 @@ class ColumnCleaner:
382
382
  self.case_insensitive = case_insensitive
383
383
 
384
384
 
385
- class DataFrameCleaner:
385
+ class DragonDataFrameCleaner:
386
386
  """
387
387
  Orchestrates cleaning multiple columns in a Polars DataFrame.
388
388
 
389
- This class takes a list of ColumnCleaner objects and applies their defined
389
+ This class takes a list of DragonColumnCleaner objects and applies their defined
390
390
  rules to the corresponding columns of a DataFrame using high-performance
391
391
  Polars expressions.
392
392
 
393
393
  Args:
394
- cleaners (List[ColumnCleaner]):
395
- A list of ColumnCleaner configuration objects.
394
+ cleaners (List[DragonColumnCleaner]):
395
+ A list of DragonColumnCleaner configuration objects.
396
396
 
397
397
  Raises:
398
- TypeError: If 'cleaners' is not a list or contains non-ColumnCleaner objects.
399
- ValueError: If multiple ColumnCleaner objects target the same column.
398
+ TypeError: If 'cleaners' is not a list or contains non-DragonColumnCleaner objects.
399
+ ValueError: If multiple DragonColumnCleaner objects target the same column.
400
400
  """
401
- def __init__(self, cleaners: List[ColumnCleaner]):
401
+ def __init__(self, cleaners: List[DragonColumnCleaner]):
402
402
  if not isinstance(cleaners, list):
403
- _LOGGER.error("The 'cleaners' argument must be a list of ColumnCleaner objects.")
403
+ _LOGGER.error("The 'cleaners' argument must be a list of DragonColumnCleaner objects.")
404
404
  raise TypeError()
405
405
 
406
406
  seen_columns = set()
407
407
  for cleaner in cleaners:
408
- if not isinstance(cleaner, ColumnCleaner):
409
- _LOGGER.error(f"All items in 'cleaners' list must be ColumnCleaner objects, but found an object of type {type(cleaner).__name__}.")
408
+ if not isinstance(cleaner, DragonColumnCleaner):
409
+ _LOGGER.error(f"All items in 'cleaners' list must be DragonColumnCleaner objects, but found an object of type {type(cleaner).__name__}.")
410
410
  raise TypeError()
411
411
  if cleaner.column_name in seen_columns:
412
- _LOGGER.error(f"Duplicate ColumnCleaner found for column '{cleaner.column_name}'. Each column should only have one cleaner.")
412
+ _LOGGER.error(f"Duplicate DragonColumnCleaner found for column '{cleaner.column_name}'. Each column should only have one cleaner.")
413
413
  raise ValueError()
414
414
  seen_columns.add(cleaner.column_name)
415
415
 
@@ -475,7 +475,7 @@ class DataFrameCleaner:
475
475
  """
476
476
  This convenience method encapsulates the entire cleaning process into a
477
477
  single call. It loads a DataFrame from a specified file, applies all
478
- cleaning rules configured in the `DataFrameCleaner` instance, and saves
478
+ cleaning rules configured in the `DragonDataFrameCleaner` instance, and saves
479
479
  the resulting cleaned DataFrame to a new file.
480
480
 
481
481
  The method ensures that all data is loaded as string types to prevent
@@ -8,11 +8,12 @@ from .path_manager import make_fullpath
8
8
  from ._script_info import _script_info
9
9
  from ._logger import _LOGGER
10
10
  from .constants import CHEMICAL_ELEMENT_SYMBOLS
11
+ from ._keys import MagicWords
11
12
 
12
13
 
13
14
  __all__ = [
14
- "TransformationRecipe",
15
- "DataProcessor",
15
+ "DragonTransformRecipe",
16
+ "DragonProcessor",
16
17
  "BinaryTransformer",
17
18
  "MultiBinaryDummifier",
18
19
  "AutoDummifier",
@@ -32,16 +33,13 @@ __all__ = [
32
33
 
33
34
  ############ TRANSFORM MAIN ####################
34
35
 
35
- # Magic word for rename-only transformation
36
- _RENAME = "rename"
37
-
38
- class TransformationRecipe:
36
+ class DragonTransformRecipe:
39
37
  """
40
38
  A builder class for creating a data transformation recipe.
41
39
 
42
40
  This class provides a structured way to define a series of transformation
43
41
  steps, with validation performed at the time of addition. It is designed
44
- to be passed to a `DataProcessor`.
42
+ to be passed to a `DragonProcessor`.
45
43
 
46
44
  Use the method `add()` to add recipes.
47
45
  """
@@ -53,7 +51,7 @@ class TransformationRecipe:
53
51
  input_col_name: str,
54
52
  transform: Union[str, Callable],
55
53
  output_col_names: Optional[Union[str, List[str]]] = None
56
- ) -> "TransformationRecipe":
54
+ ) -> "DragonTransformRecipe":
57
55
  """
58
56
  Adds a new transformation step to the recipe.
59
57
 
@@ -77,12 +75,12 @@ class TransformationRecipe:
77
75
  _LOGGER.error("'input_col' must be a non-empty string.")
78
76
  raise TypeError()
79
77
 
80
- if transform == _RENAME:
78
+ if transform == MagicWords.RENAME:
81
79
  if not isinstance(output_col_names, str):
82
80
  _LOGGER.error("For a RENAME operation, 'output_col' must be a string.")
83
81
  raise TypeError()
84
82
  elif not isinstance(transform, Callable):
85
- _LOGGER.error(f"'transform' must be a callable function or the string '{_RENAME}'.")
83
+ _LOGGER.error(f"'transform' must be a callable function or the string '{MagicWords.RENAME}'.")
86
84
  raise TypeError()
87
85
 
88
86
  # --- Add Step ---
@@ -103,22 +101,22 @@ class TransformationRecipe:
103
101
  return len(self._steps)
104
102
 
105
103
 
106
- class DataProcessor:
104
+ class DragonProcessor:
107
105
  """
108
- Transforms a Polars DataFrame based on a provided `TransformationRecipe` object.
106
+ Transforms a Polars DataFrame based on a provided `DragonTransformRecipe` object.
109
107
 
110
108
  Use the methods `transform()` or `load_transform_save()`.
111
109
  """
112
- def __init__(self, recipe: TransformationRecipe):
110
+ def __init__(self, recipe: DragonTransformRecipe):
113
111
  """
114
- Initializes the DataProcessor with a transformation recipe.
112
+ Initializes the DragonProcessor with a transformation recipe.
115
113
 
116
114
  Args:
117
- recipe: An instance of the `TransformationRecipe` class that has
115
+ recipe: An instance of the `DragonTransformRecipe` class that has
118
116
  been populated with transformation steps.
119
117
  """
120
- if not isinstance(recipe, TransformationRecipe):
121
- _LOGGER.error("The recipe must be an instance of TransformationRecipe.")
118
+ if not isinstance(recipe, DragonTransformRecipe):
119
+ _LOGGER.error("The recipe must be an instance of DragonTransformRecipe.")
122
120
  raise TypeError()
123
121
  if len(recipe) == 0:
124
122
  _LOGGER.error("The recipe cannot be empty.")
@@ -142,7 +140,7 @@ class DataProcessor:
142
140
 
143
141
  input_series = df.get_column(input_col_name)
144
142
 
145
- if transform_action == _RENAME:
143
+ if transform_action == MagicWords.RENAME:
146
144
  processed_columns.append(input_series.alias(output_col_spec))
147
145
  continue
148
146
 
@@ -237,7 +235,7 @@ class DataProcessor:
237
235
  Provides a detailed, human-readable string representation of the
238
236
  entire processing pipeline.
239
237
  """
240
- header = "DataProcessor Pipeline"
238
+ header = "DragonProcessor Pipeline"
241
239
  divider = "-" * len(header)
242
240
  num_steps = len(self._recipe)
243
241
 
@@ -255,7 +253,7 @@ class DataProcessor:
255
253
  transform_action = step["transform"]
256
254
 
257
255
  # Get a clean name for the transformation action
258
- if transform_action == _RENAME: # "rename"
256
+ if transform_action == MagicWords.RENAME: # "rename"
259
257
  transform_name = "Rename"
260
258
  else:
261
259
  # This works for both functions and class instances
@@ -394,7 +392,7 @@ class MultiBinaryDummifier:
394
392
 
395
393
  For each keyword provided, this transformer generates a corresponding column
396
394
  with a value of 1 if the keyword is present in the input string, and 0 otherwise.
397
- It is designed to be used within the DataProcessor pipeline.
395
+ It is designed to be used within the DragonProcessor pipeline.
398
396
 
399
397
  Args:
400
398
  keywords (List[str]):
@@ -443,7 +441,7 @@ class MultiBinaryDummifier:
443
441
  .when(str_column.str.contains(pattern))
444
442
  .then(pl.lit(1, dtype=pl.UInt8))
445
443
  .otherwise(pl.lit(0, dtype=pl.UInt8))
446
- .alias(f"{column_base_name}_{keyword}") # name for DataProcessor
444
+ .alias(f"{column_base_name}_{keyword}") # name for DragonProcessor
447
445
  )
448
446
  output_expressions.append(expr)
449
447
 
@@ -533,7 +531,7 @@ class NumberExtractor:
533
531
  A configurable transformer that extracts a single number from a Polars string series using a regular expression.
534
532
 
535
533
  An instance can be used as a 'transform' callable within the
536
- `DataProcessor` pipeline.
534
+ `DragonProcessor` pipeline.
537
535
 
538
536
  Args:
539
537
  regex_pattern (str):
@@ -872,7 +870,7 @@ class MultiTemperatureExtractor:
872
870
  pl.when(column.is_not_null())
873
871
  .then(final_expr)
874
872
  .otherwise(None)
875
- .alias(f"{column_base_name}_{i}") # Temporary name for DataProcessor
873
+ .alias(f"{column_base_name}_{i}") # Temporary name for DragonProcessor
876
874
  )
877
875
 
878
876
  output_expressions.append(final_expr)
@@ -1300,7 +1298,7 @@ class MolecularFormulaTransformer:
1300
1298
  each chemical element has its own column. The value in each column is the
1301
1299
  stoichiometric quantity of that element.
1302
1300
 
1303
- It is designed to be used within the DataProcessor pipeline.
1301
+ It is designed to be used within the DragonProcessor pipeline.
1304
1302
  """
1305
1303
 
1306
1304
  def __init__(self):
ml_tools/GUI_tools.py CHANGED
@@ -8,15 +8,15 @@ import numpy as np
8
8
 
9
9
  from ._script_info import _script_info
10
10
  from ._logger import _LOGGER
11
- from .keys import _OneHotOtherPlaceholder
11
+ from ._keys import _OneHotOtherPlaceholder
12
12
 
13
13
 
14
14
  __all__ = [
15
- "ConfigManager",
16
- "GUIFactory",
15
+ "DragonGUIConfig",
16
+ "DragonGUIFactory",
17
17
  "catch_exceptions",
18
- "FeatureMaster",
19
- "GUIHandler"
18
+ "DragonFeatureMaster",
19
+ "DragonGUIHandler"
20
20
  ]
21
21
 
22
22
  # --- Configuration Management ---
@@ -55,14 +55,14 @@ class _SectionProxy:
55
55
  # Fallback to the original string
56
56
  return value_str
57
57
 
58
- class ConfigManager:
58
+ class DragonGUIConfig:
59
59
  """
60
60
  Loads a .ini file and provides access to its values as object attributes.
61
61
  Includes a method to generate a default configuration template.
62
62
  """
63
63
  def __init__(self, config_path: str | Path):
64
64
  """
65
- Initializes the ConfigManager and dynamically creates attributes
65
+ Initializes the DragonGUIConfig and dynamically creates attributes
66
66
  based on the .ini file's sections and options.
67
67
  """
68
68
  config_path = Path(config_path)
@@ -78,7 +78,7 @@ class ConfigManager:
78
78
  @staticmethod
79
79
  def generate_template(file_path: str | Path):
80
80
  """
81
- Generates a complete, commented .ini template file that works with the GUIFactory.
81
+ Generates a complete, commented .ini template file that works with the DragonGUIFactory.
82
82
 
83
83
  Args:
84
84
  file_path (str | Path): The path where the .ini file will be saved.
@@ -155,12 +155,12 @@ class ConfigManager:
155
155
 
156
156
 
157
157
  # --- GUI Factory ---
158
- class GUIFactory:
158
+ class DragonGUIFactory:
159
159
  """
160
160
  Builds styled FreeSimpleGUI elements and layouts using a "building block"
161
- approach, driven by a ConfigManager instance.
161
+ approach, driven by a DragonGUIConfig instance.
162
162
  """
163
- def __init__(self, config: ConfigManager):
163
+ def __init__(self, config: DragonGUIConfig):
164
164
  """
165
165
  Initializes the factory with a configuration object.
166
166
  """
@@ -456,7 +456,7 @@ def catch_exceptions(show_popup: bool = True):
456
456
 
457
457
 
458
458
  # --- Feature Handler ---
459
- class FeatureMaster:
459
+ class DragonFeatureMaster:
460
460
  """
461
461
  Manages and organizes feature definitions for a machine learning model.
462
462
 
@@ -488,7 +488,7 @@ class FeatureMaster:
488
488
  categorical_features: Optional[List[Tuple[str, str, Dict[str, int]]]] = None,
489
489
  add_one_hot_other_placeholder: bool = True) -> None:
490
490
  """
491
- Initializes the FeatureMaster instance by processing feature and target definitions.
491
+ Initializes the DragonFeatureMaster instance by processing feature and target definitions.
492
492
 
493
493
  This constructor creates internal mappings to translate between GUI-friendly names and model-specific feature names. It also
494
494
  prepares data structures needed to populate UI components.
@@ -806,17 +806,17 @@ class FeatureMaster:
806
806
 
807
807
 
808
808
  # --- GUI-Model API ---
809
- class GUIHandler:
809
+ class DragonGUIHandler:
810
810
  """
811
811
  Translates data between a GUI and a machine learning model.
812
812
 
813
813
  This class acts as the primary interface between a user-facing application
814
- (FreeSimpleGUI) and the model's expected data format. It uses a `FeatureMaster` instance to correctly process
814
+ (FreeSimpleGUI) and the model's expected data format. It uses a `DragonFeatureMaster` instance to correctly process
815
815
  and encode user inputs.
816
816
 
817
817
  Its main responsibilities are:
818
818
  1. To take raw values from GUI elements and, using the definitions from
819
- `FeatureMaster`, convert them into a single, ordered `numpy.ndarray`
819
+ `DragonFeatureMaster`, convert them into a single, ordered `numpy.ndarray`
820
820
  that can be fed directly into a model for inference.
821
821
  2. To take the results of a model's inference and update the
822
822
  corresponding target fields in the GUI to display the prediction.
@@ -824,13 +824,13 @@ class GUIHandler:
824
824
  This handler ensures a clean separation of concerns, where the GUI is
825
825
  only responsible for presentation, and the model sees correctly formatted numerical data.
826
826
  """
827
- def __init__(self, feature_handler: FeatureMaster, model_expected_features: list[str]) -> None:
827
+ def __init__(self, feature_handler: DragonFeatureMaster, model_expected_features: list[str]) -> None:
828
828
  """
829
- Initializes the GUIHandler.
829
+ Initializes the DragonGUIHandler.
830
830
 
831
831
  Args:
832
- feature_handler (FeatureMaster):
833
- An initialized instance of the `FeatureMaster` class. This object
832
+ feature_handler (DragonFeatureMaster):
833
+ An initialized instance of the `DragonFeatureMaster` class. This object
834
834
  contains all the necessary mappings and definitions for the model's
835
835
  features and targets.
836
836
  model_expected_features (list[str]):
@@ -15,7 +15,7 @@ from ._schema import FeatureSchema
15
15
 
16
16
 
17
17
  __all__ = [
18
- "MiceImputer",
18
+ "DragonMICE",
19
19
  "apply_mice",
20
20
  "save_imputed_datasets",
21
21
  "get_convergence_diagnostic",
@@ -280,7 +280,7 @@ def _skip_targets(df: pd.DataFrame, target_cols: list[str]):
280
280
 
281
281
 
282
282
  # modern implementation
283
- class MiceImputer:
283
+ class DragonMICE:
284
284
  """
285
285
  A modern MICE imputation pipeline that uses a FeatureSchema
286
286
  to correctly discretize categorical features after imputation.
@@ -311,7 +311,7 @@ class MiceImputer:
311
311
  # 3. Names of categorical features
312
312
  self.categorical_features = list(self.schema.categorical_feature_names)
313
313
 
314
- _LOGGER.info(f"MiceImputer initialized. Found {len(self.cat_info)} categorical features to discretize.")
314
+ _LOGGER.info(f"DragonMICE initialized. Found {len(self.cat_info)} categorical features to discretize.")
315
315
 
316
316
  def _post_process(self, imputed_df: pd.DataFrame) -> pd.DataFrame:
317
317
  """