dragon-ml-toolbox 12.7.0__tar.gz → 12.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (46) hide show
  1. {dragon_ml_toolbox-12.7.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-12.8.0}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_utilities.py +13 -2
  4. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/data_exploration.py +1 -0
  5. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/optimization_tools.py +3 -3
  6. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/pyproject.toml +1 -1
  7. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/LICENSE +0 -0
  8. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/LICENSE-THIRD-PARTY.md +0 -0
  9. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/README.md +0 -0
  10. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  11. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  12. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  13. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  14. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ETL_cleaning.py +0 -0
  15. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ETL_engineering.py +0 -0
  16. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/GUI_tools.py +0 -0
  17. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/MICE_imputation.py +0 -0
  18. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_callbacks.py +0 -0
  19. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_datasetmaster.py +0 -0
  20. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_evaluation.py +0 -0
  21. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_evaluation_multi.py +0 -0
  22. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_inference.py +0 -0
  23. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_models.py +0 -0
  24. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_optimization.py +0 -0
  25. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_scaler.py +0 -0
  26. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_simple_optimization.py +0 -0
  27. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_trainer.py +0 -0
  28. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/PSO_optimization.py +0 -0
  29. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/RNN_forecast.py +0 -0
  30. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/SQL.py +0 -0
  31. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/VIF_factor.py +0 -0
  32. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/__init__.py +0 -0
  33. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/_logger.py +0 -0
  34. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/_script_info.py +0 -0
  35. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/constants.py +0 -0
  36. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/custom_logger.py +0 -0
  37. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ensemble_evaluation.py +0 -0
  38. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ensemble_inference.py +0 -0
  39. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ensemble_learning.py +0 -0
  40. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/handle_excel.py +0 -0
  41. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/keys.py +0 -0
  42. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/math_utilities.py +0 -0
  43. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/path_manager.py +0 -0
  44. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/serde.py +0 -0
  45. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/ml_tools/utilities.py +0 -0
  46. {dragon_ml_toolbox-12.7.0 → dragon_ml_toolbox-12.8.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 12.7.0
3
+ Version: 12.8.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 12.7.0
3
+ Version: 12.8.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,12 +1,13 @@
1
1
  import pandas as pd
2
2
  from pathlib import Path
3
- from typing import Union, Any
3
+ from typing import Union, Any, Optional
4
4
 
5
5
  from .path_manager import make_fullpath, list_subdirectories, list_files_by_extension
6
6
  from ._script_info import _script_info
7
7
  from ._logger import _LOGGER
8
8
  from .keys import DatasetKeys, PytorchModelArchitectureKeys, PytorchArtifactPathKeys, SHAPKeys
9
9
  from .utilities import load_dataframe
10
+ from .custom_logger import save_list_strings
10
11
 
11
12
 
12
13
  __all__ = [
@@ -139,6 +140,7 @@ def find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool, v
139
140
  def select_features_by_shap(
140
141
  root_directory: Union[str, Path],
141
142
  shap_threshold: float,
143
+ log_feature_names_directory: Optional[Union[str, Path]],
142
144
  verbose: bool = True) -> list[str]:
143
145
  """
144
146
  Scans subdirectories to find SHAP summary CSVs, then extracts feature
@@ -148,11 +150,13 @@ def select_features_by_shap(
148
150
  importance scores aggregated from multiple models.
149
151
 
150
152
  Args:
151
- root_directory (Union[str, Path]):
153
+ root_directory (str | Path):
152
154
  The path to the root directory that contains model subdirectories.
153
155
  shap_threshold (float):
154
156
  The minimum mean absolute SHAP value for a feature to be included
155
157
  in the final list.
158
+ log_feature_names_directory (str | Path | None):
159
+ If given, saves the chosen feature names as a .txt file in this directory.
156
160
 
157
161
  Returns:
158
162
  list[str]:
@@ -211,6 +215,13 @@ def select_features_by_shap(
211
215
  final_features = sorted(list(master_feature_set))
212
216
  if verbose:
213
217
  _LOGGER.info(f"Selected {len(final_features)} unique features across all files.")
218
+
219
+ if log_feature_names_directory is not None:
220
+ save_names_path = make_fullpath(log_feature_names_directory, make=True, enforce="directory")
221
+ save_list_strings(list_strings=final_features,
222
+ directory=save_names_path,
223
+ filename=DatasetKeys.FEATURE_NAMES,
224
+ verbose=verbose)
214
225
 
215
226
  return final_features
216
227
 
@@ -362,6 +362,7 @@ def encode_categorical_features(
362
362
  encode_nulls (bool): If True, encodes Null values as a distinct category
363
363
  "Other" with a value of 0. Other categories start from 1.
364
364
  If False, Nulls are ignored and categories start from 0.
365
+ Note: Use False when encoding binary values with missing entries.
365
366
  split_resulting_dataset (bool): If True, returns two separate DataFrames:
366
367
  one with non-categorical columns and one with the encoded columns.
367
368
  If False, returns a single DataFrame with all columns.
@@ -98,7 +98,7 @@ def create_optimization_bounds(
98
98
 
99
99
  # 3. Populate categorical bounds (Index-based)
100
100
  # The indices in categorical_map (e.g., {2: 4}) directly correspond
101
- # to the indices in our new `feature_names` list.
101
+ # to the indices in the `feature_names` list.
102
102
  for index, cardinality in categorical_map.items():
103
103
  if not (0 <= index < total_features):
104
104
  _LOGGER.error(f"Categorical index {index} is out of range for the {total_features} features.")
@@ -125,8 +125,8 @@ def create_optimization_bounds(
125
125
  # Map name to its index in the *feature-only* list
126
126
  index = feature_names.index(name)
127
127
  except ValueError:
128
- _LOGGER.error(f"Feature name '{name}' from 'continuous_bounds_map' not found in the CSV's feature columns.")
129
- raise ValueError()
128
+ _LOGGER.warning(f"Feature name '{name}' from 'continuous_bounds_map' not found in the CSV's feature columns.")
129
+ continue
130
130
 
131
131
  if lower_bounds[index] is not None:
132
132
  # This index was already set by the categorical map
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "12.7.0"
3
+ version = "12.8.0"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl L. Loza Vidaurre", email = "luigiloza@gmail.com" }