dragon-ml-toolbox 12.7.0__py3-none-any.whl → 12.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 12.7.0
3
+ Version: 12.8.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,5 +1,5 @@
1
- dragon_ml_toolbox-12.7.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
- dragon_ml_toolbox-12.7.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
1
+ dragon_ml_toolbox-12.8.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
+ dragon_ml_toolbox-12.8.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
3
3
  ml_tools/ETL_cleaning.py,sha256=2VBRllV8F-ZiPylPp8Az2gwn5ztgazN0BH5OKnRUhV0,20402
4
4
  ml_tools/ETL_engineering.py,sha256=KfYqgsxupAx6e_TxwO1LZXeu5mFkIhVXJrNjP3CzIZc,54927
5
5
  ml_tools/GUI_tools.py,sha256=Va6ig-dHULPVRwQYYtH3fvY5XPIoqRcJpRW8oXC55Hw,45413
@@ -14,7 +14,7 @@ ml_tools/ML_optimization.py,sha256=es3TlQbY7RYgJMZnznkjYGbUxFnAqzZxE_g3_qLK9Q8,2
14
14
  ml_tools/ML_scaler.py,sha256=tw6onj9o8_kk3FQYb930HUzvv1zsFZe2YZJdF3LtHkU,7538
15
15
  ml_tools/ML_simple_optimization.py,sha256=W2mce1XFCuiOHTOjOsCNbETISHn5MwYlYsTIXH5hMMo,18177
16
16
  ml_tools/ML_trainer.py,sha256=_g48w5Ak-wQr5fGHdJqlcpnzv3gWyL1ghkOhy9VOZbo,23930
17
- ml_tools/ML_utilities.py,sha256=35DfZzAwfDwVwfRECD8X_2ynsU2NCpTdNJSmza6oAzQ,8712
17
+ ml_tools/ML_utilities.py,sha256=EnKpPTnJ2qjZmz7kvows4Uu5CfSA7ByRmI1v2-KarKw,9337
18
18
  ml_tools/PSO_optimization.py,sha256=fVHeemqilBS0zrGV25E5yKwDlGdd2ZKa18d8CZ6Q6Fk,22961
19
19
  ml_tools/RNN_forecast.py,sha256=Qa2KoZfdAvSjZ4yE78N4BFXtr3tTr0Gx7tQJZPotsh0,1967
20
20
  ml_tools/SQL.py,sha256=vXLPGfVVg8bfkbBE3HVfyEclVbdJy0TBhuQONtMwSCQ,11234
@@ -24,18 +24,18 @@ ml_tools/_logger.py,sha256=dlp5cGbzooK9YSNSZYB4yjZrOaQUGW8PTrM411AOvL8,4717
24
24
  ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
25
25
  ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
26
26
  ml_tools/custom_logger.py,sha256=xot-VeZFigKjcVxADgzvI54vZO_MqMMejo7JmDED8Xo,5892
27
- ml_tools/data_exploration.py,sha256=xvuWHrxk5HOtY7_WmtXEUNf1d_TUxuNPFPFyN7QJxCY,46836
27
+ ml_tools/data_exploration.py,sha256=joaJPgXeweYMAn-xnMOzUIE8VvKvbEPenVjVHM21U4c,46914
28
28
  ml_tools/ensemble_evaluation.py,sha256=FGHSe8LBI8_w8LjNeJWOcYQ1UK_mc6fVah8gmSvNVGg,26853
29
29
  ml_tools/ensemble_inference.py,sha256=0yLmLNj45RVVoSCLH1ZYJG9IoAhTkWUqEZmLOQTFGTY,9348
30
30
  ml_tools/ensemble_learning.py,sha256=vsIED7nlheYI4w2SBzP6SC1AnNeMfn-2A1Gqw5EfxsM,21964
31
31
  ml_tools/handle_excel.py,sha256=pfdAPb9ywegFkM9T54bRssDOsX-K7rSeV0RaMz7lEAo,14006
32
32
  ml_tools/keys.py,sha256=FDpbS3Jb0pjrVvvp2_8nZi919mbob_-xwuy5OOtKM_A,1848
33
33
  ml_tools/math_utilities.py,sha256=PxoOrnuj6Ntp7_TJqyDWi0JX03WpAO5iaFNK2Oeq5I4,8800
34
- ml_tools/optimization_tools.py,sha256=ewYMAdSGlFxYALAGFXn-MsHpvW_Sbx6I-sKg9Kp6rB8,13533
34
+ ml_tools/optimization_tools.py,sha256=P074YCuZzkqkONnAsM-Zb9DTX_i8cRkkJLpwAWz6CRw,13521
35
35
  ml_tools/path_manager.py,sha256=CyDU16pOKmC82jPubqJPT6EBt-u-3rGVbxyPIZCvDDY,18432
36
36
  ml_tools/serde.py,sha256=UIshIesHRFmxr8F6B3LxGG8bYc1HHK-nlE3kENSZL18,5288
37
37
  ml_tools/utilities.py,sha256=OcAyV1tEcYAfOWlGjRgopsjDLxU3DcI5EynzvWV4q3A,15754
38
- dragon_ml_toolbox-12.7.0.dist-info/METADATA,sha256=Ek5m6-ExduWUlSvDJ26AYySjU5aILoyiUVlP5dDYWyY,6166
39
- dragon_ml_toolbox-12.7.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
- dragon_ml_toolbox-12.7.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
41
- dragon_ml_toolbox-12.7.0.dist-info/RECORD,,
38
+ dragon_ml_toolbox-12.8.0.dist-info/METADATA,sha256=zbA_0bdkX_96eSpkx7QGZelCTKrckDXUdvmHE4oCNMI,6166
39
+ dragon_ml_toolbox-12.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
+ dragon_ml_toolbox-12.8.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
41
+ dragon_ml_toolbox-12.8.0.dist-info/RECORD,,
ml_tools/ML_utilities.py CHANGED
@@ -1,12 +1,13 @@
1
1
  import pandas as pd
2
2
  from pathlib import Path
3
- from typing import Union, Any
3
+ from typing import Union, Any, Optional
4
4
 
5
5
  from .path_manager import make_fullpath, list_subdirectories, list_files_by_extension
6
6
  from ._script_info import _script_info
7
7
  from ._logger import _LOGGER
8
8
  from .keys import DatasetKeys, PytorchModelArchitectureKeys, PytorchArtifactPathKeys, SHAPKeys
9
9
  from .utilities import load_dataframe
10
+ from .custom_logger import save_list_strings
10
11
 
11
12
 
12
13
  __all__ = [
@@ -139,6 +140,7 @@ def find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool, v
139
140
  def select_features_by_shap(
140
141
  root_directory: Union[str, Path],
141
142
  shap_threshold: float,
143
+ log_feature_names_directory: Optional[Union[str, Path]],
142
144
  verbose: bool = True) -> list[str]:
143
145
  """
144
146
  Scans subdirectories to find SHAP summary CSVs, then extracts feature
@@ -148,11 +150,13 @@ def select_features_by_shap(
148
150
  importance scores aggregated from multiple models.
149
151
 
150
152
  Args:
151
- root_directory (Union[str, Path]):
153
+ root_directory (str | Path):
152
154
  The path to the root directory that contains model subdirectories.
153
155
  shap_threshold (float):
154
156
  The minimum mean absolute SHAP value for a feature to be included
155
157
  in the final list.
158
+ log_feature_names_directory (str | Path | None):
159
+ If given, saves the chosen feature names as a .txt file in this directory.
156
160
 
157
161
  Returns:
158
162
  list[str]:
@@ -211,6 +215,13 @@ def select_features_by_shap(
211
215
  final_features = sorted(list(master_feature_set))
212
216
  if verbose:
213
217
  _LOGGER.info(f"Selected {len(final_features)} unique features across all files.")
218
+
219
+ if log_feature_names_directory is not None:
220
+ save_names_path = make_fullpath(log_feature_names_directory, make=True, enforce="directory")
221
+ save_list_strings(list_strings=final_features,
222
+ directory=save_names_path,
223
+ filename=DatasetKeys.FEATURE_NAMES,
224
+ verbose=verbose)
214
225
 
215
226
  return final_features
216
227
 
@@ -362,6 +362,7 @@ def encode_categorical_features(
362
362
  encode_nulls (bool): If True, encodes Null values as a distinct category
363
363
  "Other" with a value of 0. Other categories start from 1.
364
364
  If False, Nulls are ignored and categories start from 0.
365
+ Note: Use False when encoding binary values with missing entries.
365
366
  split_resulting_dataset (bool): If True, returns two separate DataFrames:
366
367
  one with non-categorical columns and one with the encoded columns.
367
368
  If False, returns a single DataFrame with all columns.
@@ -98,7 +98,7 @@ def create_optimization_bounds(
98
98
 
99
99
  # 3. Populate categorical bounds (Index-based)
100
100
  # The indices in categorical_map (e.g., {2: 4}) directly correspond
101
- # to the indices in our new `feature_names` list.
101
+ # to the indices in the `feature_names` list.
102
102
  for index, cardinality in categorical_map.items():
103
103
  if not (0 <= index < total_features):
104
104
  _LOGGER.error(f"Categorical index {index} is out of range for the {total_features} features.")
@@ -125,8 +125,8 @@ def create_optimization_bounds(
125
125
  # Map name to its index in the *feature-only* list
126
126
  index = feature_names.index(name)
127
127
  except ValueError:
128
- _LOGGER.error(f"Feature name '{name}' from 'continuous_bounds_map' not found in the CSV's feature columns.")
129
- raise ValueError()
128
+ _LOGGER.warning(f"Feature name '{name}' from 'continuous_bounds_map' not found in the CSV's feature columns.")
129
+ continue
130
130
 
131
131
  if lower_bounds[index] is not None:
132
132
  # This index was already set by the categorical map