PyPI - dragon-ml-toolbox - Versions diffs - 12.6.0__tar.gz → 12.8.0__tar.gz - Mend

dragon-ml-toolbox 12.6.0tar.gz → 12.8.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (46) hide show

{dragon_ml_toolbox-12.6.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-12.8.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 12.6.0
+Version: 12.8.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0/dragon_ml_toolbox.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 12.6.0
+Version: 12.8.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_utilities.py RENAMED Viewed

@@ -1,12 +1,13 @@
 import pandas as pd
 from pathlib import Path
-from typing import Union, Any
+from typing import Union, Any, Optional
 from .path_manager import make_fullpath, list_subdirectories, list_files_by_extension
 from ._script_info import _script_info
 from ._logger import _LOGGER
 from .keys import DatasetKeys, PytorchModelArchitectureKeys, PytorchArtifactPathKeys, SHAPKeys
 from .utilities import load_dataframe
+from .custom_logger import save_list_strings
 __all__ = [
@@ -139,6 +140,7 @@ def find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool, v
 def select_features_by_shap(
     root_directory: Union[str, Path],
     shap_threshold: float,
+    log_feature_names_directory: Optional[Union[str, Path]],
     verbose: bool = True) -> list[str]:
     """
     Scans subdirectories to find SHAP summary CSVs, then extracts feature
@@ -148,11 +150,13 @@ def select_features_by_shap(
     importance scores aggregated from multiple models.
     Args:
-        root_directory (Union[str, Path]):
+        root_directory (str | Path):
             The path to the root directory that contains model subdirectories.
         shap_threshold (float):
             The minimum mean absolute SHAP value for a feature to be included
             in the final list.
+        log_feature_names_directory (str | Path | None):
+            If given, saves the chosen feature names as a .txt file in this directory.
     Returns:
         list[str]:
@@ -211,6 +215,13 @@ def select_features_by_shap(
     final_features = sorted(list(master_feature_set))
     if verbose:
         _LOGGER.info(f"Selected {len(final_features)} unique features across all files.")
+    if log_feature_names_directory is not None:
+        save_names_path = make_fullpath(log_feature_names_directory, make=True, enforce="directory")
+        save_list_strings(list_strings=final_features,
+                          directory=save_names_path,
+                          filename=DatasetKeys.FEATURE_NAMES,
+                          verbose=verbose)
     return final_features

{dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/data_exploration.py RENAMED Viewed

@@ -362,6 +362,7 @@ def encode_categorical_features(
         encode_nulls (bool): If True, encodes Null values as a distinct category
             "Other" with a value of 0. Other categories start from 1.
             If False, Nulls are ignored and categories start from 0.
+            Note: Use False when encoding binary values with missing entries.
         split_resulting_dataset (bool): If True, returns two separate DataFrames:
             one with non-categorical columns and one with the encoded columns.
             If False, returns a single DataFrame with all columns.

{dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/optimization_tools.py RENAMED Viewed

@@ -98,7 +98,7 @@ def create_optimization_bounds(
     # 3. Populate categorical bounds (Index-based)
     # The indices in categorical_map (e.g., {2: 4}) directly correspond
-    # to the indices in our new `feature_names` list.
+    # to the indices in the `feature_names` list.
     for index, cardinality in categorical_map.items():
         if not (0 <= index < total_features):
             _LOGGER.error(f"Categorical index {index} is out of range for the {total_features} features.")
@@ -125,8 +125,8 @@ def create_optimization_bounds(
             # Map name to its index in the *feature-only* list
             index = feature_names.index(name)
         except ValueError:
-            _LOGGER.error(f"Feature name '{name}' from 'continuous_bounds_map' not found in the CSV's feature columns.")
-            raise ValueError()
+            _LOGGER.warning(f"Feature name '{name}' from 'continuous_bounds_map' not found in the CSV's feature columns.")
+            continue
         if lower_bounds[index] is not None:
             # This index was already set by the categorical map

{dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/utilities.py RENAMED Viewed

@@ -12,6 +12,7 @@ from ._logger import _LOGGER
 # Keep track of available tools
 __all__ = [
     "load_dataframe",
+    "load_dataframe_greedy",
     "yield_dataframes_from_dir",
     "merge_dataframes",
     "save_dataframe_filename",
@@ -124,6 +125,54 @@ def load_dataframe(
     return df, df_name # type: ignore
+def load_dataframe_greedy(directory: Union[str, Path],
+                          use_columns: Optional[list[str]] = None,
+                          all_strings: bool = False,
+                          verbose: bool = True) -> pd.DataFrame:
+    """
+    Greedily loads the first found CSV file from a directory into a Pandas DataFrame.
+    This function scans the specified directory for any CSV files. It will
+    attempt to load the *first* CSV file it finds using the `load_dataframe`
+    function as a Pandas DataFrame.
+    Args:
+        directory (str, Path):
+            The path to the directory to search for a CSV file.
+        use_columns (list[str] | None):
+            A list of column names to load. If None, all columns are loaded.
+        all_strings (bool):
+            If True, loads all columns as string data types.
+    Returns:
+        pd.DataFrame:
+            A pandas DataFrame loaded from the first CSV file found.
+    Raises:
+        FileNotFoundError:
+            If the specified directory does not exist or the CSV file path
+            found is invalid.
+        ValueError:
+            If the loaded DataFrame is empty or `use_columns` contains
+            invalid column names.
+    """
+    # validate directory
+    dir_path = make_fullpath(directory, enforce="directory")
+    # list all csv files and grab one (should be the only one)
+    csv_dict = list_csv_paths(directory=dir_path, verbose=False)
+    for df_path in csv_dict.values():
+        df , _df_name = load_dataframe(df_path=df_path,
+                                    use_columns=use_columns,
+                                    kind="pandas",
+                                    all_strings=all_strings,
+                                    verbose=verbose)
+        break
+    return df
 def yield_dataframes_from_dir(datasets_dir: Union[str,Path], verbose: bool=True):
     """
     Iterates over all CSV files in a given directory, loading each into a Pandas DataFrame.

{dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "dragon-ml-toolbox"
-version = "12.6.0"
+version = "12.8.0"
 description = "A collection of tools for data science and machine learning projects."
 authors = [
     { name = "Karl L. Loza Vidaurre", email = "luigiloza@gmail.com" }