dragon-ml-toolbox 12.6.0__tar.gz → 12.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (46) hide show
  1. {dragon_ml_toolbox-12.6.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-12.8.0}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_utilities.py +13 -2
  4. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/data_exploration.py +1 -0
  5. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/optimization_tools.py +3 -3
  6. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/utilities.py +49 -0
  7. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/pyproject.toml +1 -1
  8. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/LICENSE +0 -0
  9. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/LICENSE-THIRD-PARTY.md +0 -0
  10. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/README.md +0 -0
  11. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  12. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  13. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  14. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  15. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ETL_cleaning.py +0 -0
  16. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ETL_engineering.py +0 -0
  17. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/GUI_tools.py +0 -0
  18. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/MICE_imputation.py +0 -0
  19. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_callbacks.py +0 -0
  20. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_datasetmaster.py +0 -0
  21. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_evaluation.py +0 -0
  22. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_evaluation_multi.py +0 -0
  23. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_inference.py +0 -0
  24. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_models.py +0 -0
  25. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_optimization.py +0 -0
  26. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_scaler.py +0 -0
  27. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_simple_optimization.py +0 -0
  28. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_trainer.py +0 -0
  29. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/PSO_optimization.py +0 -0
  30. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/RNN_forecast.py +0 -0
  31. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/SQL.py +0 -0
  32. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/VIF_factor.py +0 -0
  33. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/__init__.py +0 -0
  34. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/_logger.py +0 -0
  35. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/_script_info.py +0 -0
  36. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/constants.py +0 -0
  37. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/custom_logger.py +0 -0
  38. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ensemble_evaluation.py +0 -0
  39. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ensemble_inference.py +0 -0
  40. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ensemble_learning.py +0 -0
  41. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/handle_excel.py +0 -0
  42. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/keys.py +0 -0
  43. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/math_utilities.py +0 -0
  44. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/path_manager.py +0 -0
  45. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/serde.py +0 -0
  46. {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 12.6.0
3
+ Version: 12.8.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 12.6.0
3
+ Version: 12.8.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,12 +1,13 @@
1
1
  import pandas as pd
2
2
  from pathlib import Path
3
- from typing import Union, Any
3
+ from typing import Union, Any, Optional
4
4
 
5
5
  from .path_manager import make_fullpath, list_subdirectories, list_files_by_extension
6
6
  from ._script_info import _script_info
7
7
  from ._logger import _LOGGER
8
8
  from .keys import DatasetKeys, PytorchModelArchitectureKeys, PytorchArtifactPathKeys, SHAPKeys
9
9
  from .utilities import load_dataframe
10
+ from .custom_logger import save_list_strings
10
11
 
11
12
 
12
13
  __all__ = [
@@ -139,6 +140,7 @@ def find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool, v
139
140
  def select_features_by_shap(
140
141
  root_directory: Union[str, Path],
141
142
  shap_threshold: float,
143
+ log_feature_names_directory: Optional[Union[str, Path]],
142
144
  verbose: bool = True) -> list[str]:
143
145
  """
144
146
  Scans subdirectories to find SHAP summary CSVs, then extracts feature
@@ -148,11 +150,13 @@ def select_features_by_shap(
148
150
  importance scores aggregated from multiple models.
149
151
 
150
152
  Args:
151
- root_directory (Union[str, Path]):
153
+ root_directory (str | Path):
152
154
  The path to the root directory that contains model subdirectories.
153
155
  shap_threshold (float):
154
156
  The minimum mean absolute SHAP value for a feature to be included
155
157
  in the final list.
158
+ log_feature_names_directory (str | Path | None):
159
+ If given, saves the chosen feature names as a .txt file in this directory.
156
160
 
157
161
  Returns:
158
162
  list[str]:
@@ -211,6 +215,13 @@ def select_features_by_shap(
211
215
  final_features = sorted(list(master_feature_set))
212
216
  if verbose:
213
217
  _LOGGER.info(f"Selected {len(final_features)} unique features across all files.")
218
+
219
+ if log_feature_names_directory is not None:
220
+ save_names_path = make_fullpath(log_feature_names_directory, make=True, enforce="directory")
221
+ save_list_strings(list_strings=final_features,
222
+ directory=save_names_path,
223
+ filename=DatasetKeys.FEATURE_NAMES,
224
+ verbose=verbose)
214
225
 
215
226
  return final_features
216
227
 
@@ -362,6 +362,7 @@ def encode_categorical_features(
362
362
  encode_nulls (bool): If True, encodes Null values as a distinct category
363
363
  "Other" with a value of 0. Other categories start from 1.
364
364
  If False, Nulls are ignored and categories start from 0.
365
+ Note: Use False when encoding binary values with missing entries.
365
366
  split_resulting_dataset (bool): If True, returns two separate DataFrames:
366
367
  one with non-categorical columns and one with the encoded columns.
367
368
  If False, returns a single DataFrame with all columns.
@@ -98,7 +98,7 @@ def create_optimization_bounds(
98
98
 
99
99
  # 3. Populate categorical bounds (Index-based)
100
100
  # The indices in categorical_map (e.g., {2: 4}) directly correspond
101
- # to the indices in our new `feature_names` list.
101
+ # to the indices in the `feature_names` list.
102
102
  for index, cardinality in categorical_map.items():
103
103
  if not (0 <= index < total_features):
104
104
  _LOGGER.error(f"Categorical index {index} is out of range for the {total_features} features.")
@@ -125,8 +125,8 @@ def create_optimization_bounds(
125
125
  # Map name to its index in the *feature-only* list
126
126
  index = feature_names.index(name)
127
127
  except ValueError:
128
- _LOGGER.error(f"Feature name '{name}' from 'continuous_bounds_map' not found in the CSV's feature columns.")
129
- raise ValueError()
128
+ _LOGGER.warning(f"Feature name '{name}' from 'continuous_bounds_map' not found in the CSV's feature columns.")
129
+ continue
130
130
 
131
131
  if lower_bounds[index] is not None:
132
132
  # This index was already set by the categorical map
@@ -12,6 +12,7 @@ from ._logger import _LOGGER
12
12
  # Keep track of available tools
13
13
  __all__ = [
14
14
  "load_dataframe",
15
+ "load_dataframe_greedy",
15
16
  "yield_dataframes_from_dir",
16
17
  "merge_dataframes",
17
18
  "save_dataframe_filename",
@@ -124,6 +125,54 @@ def load_dataframe(
124
125
  return df, df_name # type: ignore
125
126
 
126
127
 
128
+ def load_dataframe_greedy(directory: Union[str, Path],
129
+ use_columns: Optional[list[str]] = None,
130
+ all_strings: bool = False,
131
+ verbose: bool = True) -> pd.DataFrame:
132
+ """
133
+ Greedily loads the first found CSV file from a directory into a Pandas DataFrame.
134
+
135
+ This function scans the specified directory for any CSV files. It will
136
+ attempt to load the *first* CSV file it finds using the `load_dataframe`
137
+ function as a Pandas DataFrame.
138
+
139
+ Args:
140
+ directory (str, Path):
141
+ The path to the directory to search for a CSV file.
142
+ use_columns (list[str] | None):
143
+ A list of column names to load. If None, all columns are loaded.
144
+ all_strings (bool):
145
+ If True, loads all columns as string data types.
146
+
147
+ Returns:
148
+ pd.DataFrame:
149
+ A pandas DataFrame loaded from the first CSV file found.
150
+
151
+ Raises:
152
+ FileNotFoundError:
153
+ If the specified directory does not exist or the CSV file path
154
+ found is invalid.
155
+ ValueError:
156
+ If the loaded DataFrame is empty or `use_columns` contains
157
+ invalid column names.
158
+ """
159
+ # validate directory
160
+ dir_path = make_fullpath(directory, enforce="directory")
161
+
162
+ # list all csv files and grab one (should be the only one)
163
+ csv_dict = list_csv_paths(directory=dir_path, verbose=False)
164
+
165
+ for df_path in csv_dict.values():
166
+ df , _df_name = load_dataframe(df_path=df_path,
167
+ use_columns=use_columns,
168
+ kind="pandas",
169
+ all_strings=all_strings,
170
+ verbose=verbose)
171
+ break
172
+
173
+ return df
174
+
175
+
127
176
  def yield_dataframes_from_dir(datasets_dir: Union[str,Path], verbose: bool=True):
128
177
  """
129
178
  Iterates over all CSV files in a given directory, loading each into a Pandas DataFrame.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "12.6.0"
3
+ version = "12.8.0"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl L. Loza Vidaurre", email = "luigiloza@gmail.com" }