PyPI - dragon-ml-toolbox - Versions diffs - 8.2.0__py3-none-any.whl → 9.0.0__py3-none-any.whl - Mend

dragon-ml-toolbox 8.2.0py3-none-any.whl → 9.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (34) hide show

{dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/METADATA +5 -1
dragon_ml_toolbox-9.0.0.dist-info/RECORD +35 -0
ml_tools/ETL_engineering.py +177 -79
ml_tools/GUI_tools.py +5 -5
ml_tools/MICE_imputation.py +12 -8
ml_tools/ML_callbacks.py +6 -3
ml_tools/ML_datasetmaster.py +37 -20
ml_tools/ML_evaluation.py +4 -4
ml_tools/ML_evaluation_multi.py +26 -17
ml_tools/ML_inference.py +30 -23
ml_tools/ML_models.py +14 -14
ml_tools/ML_optimization.py +4 -3
ml_tools/ML_scaler.py +7 -7
ml_tools/ML_trainer.py +17 -15
ml_tools/PSO_optimization.py +16 -8
ml_tools/RNN_forecast.py +1 -1
ml_tools/SQL.py +22 -13
ml_tools/VIF_factor.py +7 -6
ml_tools/_logger.py +105 -7
ml_tools/custom_logger.py +12 -8
ml_tools/data_exploration.py +20 -15
ml_tools/ensemble_evaluation.py +10 -6
ml_tools/ensemble_inference.py +18 -18
ml_tools/ensemble_learning.py +8 -5
ml_tools/handle_excel.py +15 -11
ml_tools/optimization_tools.py +3 -4
ml_tools/path_manager.py +21 -15
ml_tools/utilities.py +35 -26
dragon_ml_toolbox-8.2.0.dist-info/RECORD +0 -36
ml_tools/_ML_optimization_multi.py +0 -231
{dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/WHEEL +0 -0
{dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/licenses/LICENSE +0 -0
{dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
{dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/top_level.txt +0 -0

ml_tools/ML_trainer.py CHANGED Viewed

@@ -76,10 +76,10 @@ class MLTrainer:
         """Validates the selected device and returns a torch.device object."""
         device_lower = device.lower()
         if "cuda" in device_lower and not torch.cuda.is_available():
-            _LOGGER.warning("⚠️ CUDA not available, switching to CPU.")
+            _LOGGER.warning("CUDA not available, switching to CPU.")
             device = "cpu"
         elif device_lower == "mps" and not torch.backends.mps.is_available():
-            _LOGGER.warning("⚠️ Apple Metal Performance Shaders (MPS) not available, switching to CPU.")
+            _LOGGER.warning("Apple Metal Performance Shaders (MPS) not available, switching to CPU.")
             device = "cpu"
         return torch.device(device)
@@ -275,7 +275,8 @@ class MLTrainer:
             dataset_for_names = data
         else: # data is None, use the trainer's default test dataset
             if self.test_dataset is None:
-                raise ValueError("Cannot evaluate. No data provided and no test_dataset available in the trainer.")
+                _LOGGER.error("Cannot evaluate. No data provided and no test_dataset available in the trainer.")
+                raise ValueError()
             # Create a fresh DataLoader from the test_dataset
             eval_loader = DataLoader(self.test_dataset,
                                      batch_size=32,
@@ -285,7 +286,8 @@ class MLTrainer:
             dataset_for_names = self.test_dataset
         if eval_loader is None:
-            raise ValueError("Cannot evaluate. No valid data was provided or found.")
+            _LOGGER.error("Cannot evaluate. No valid data was provided or found.")
+            raise ValueError()
         print("\n--- Model Evaluation ---")
@@ -296,7 +298,7 @@ class MLTrainer:
             if y_true_b is not None: all_true.append(y_true_b)
         if not all_true:
-            _LOGGER.error("❌ Evaluation failed: No data was processed.")
+            _LOGGER.error("Evaluation failed: No data was processed.")
             return
         y_pred = np.concatenate(all_preds)
@@ -316,7 +318,7 @@ class MLTrainer:
             except AttributeError:
                 num_targets = y_true.shape[1]
                 target_names = [f"target_{i}" for i in range(num_targets)]
-                _LOGGER.warning(f"⚠️ Dataset has no 'target_names' attribute. Using generic names.")
+                _LOGGER.warning(f"Dataset has no 'target_names' attribute. Using generic names.")
             multi_target_regression_metrics(y_true, y_pred, target_names, save_dir)
         elif self.kind == "multi_label_classification":
@@ -325,10 +327,10 @@ class MLTrainer:
             except AttributeError:
                 num_targets = y_true.shape[1]
                 target_names = [f"label_{i}" for i in range(num_targets)]
-                _LOGGER.warning(f"⚠️ Dataset has no 'target_names' attribute. Using generic names.")
+                _LOGGER.warning(f"Dataset has no 'target_names' attribute. Using generic names.")
             if y_prob is None:
-                _LOGGER.error("❌ Evaluation for multi_label_classification requires probabilities (y_prob).")
+                _LOGGER.error("Evaluation for multi_label_classification requires probabilities (y_prob).")
                 return
             multi_label_classification_metrics(y_true, y_prob, target_names, save_dir, classification_threshold)
@@ -390,14 +392,14 @@ class MLTrainer:
         # 1. Get background data from the trainer's train_dataset
         background_data = _get_random_sample(self.train_dataset, n_samples)
         if background_data is None:
-            _LOGGER.error("❌ Trainer's train_dataset is empty or invalid. Skipping SHAP analysis.")
+            _LOGGER.error("Trainer's train_dataset is empty or invalid. Skipping SHAP analysis.")
             return
         # 2. Determine target dataset and get explanation instances
         target_dataset = explain_dataset if explain_dataset is not None else self.test_dataset
         instances_to_explain = _get_random_sample(target_dataset, n_samples)
         if instances_to_explain is None:
-            _LOGGER.error("❌ Explanation dataset is empty or invalid. Skipping SHAP analysis.")
+            _LOGGER.error("Explanation dataset is empty or invalid. Skipping SHAP analysis.")
             return
         # attempt to get feature names
@@ -410,8 +412,8 @@ class MLTrainer:
                 # Handle PyTorch Subset
                     feature_names = target_dataset.dataset.feature_names # type: ignore
                 except AttributeError:
-                    _LOGGER.error("❌ Could not extract `feature_names` from the dataset.")
-                    raise ValueError("`feature_names` must be provided if the dataset object does not have a `feature_names` attribute.")
+                    _LOGGER.error("Could not extract `feature_names` from the dataset. It must be provided if the dataset object does not have a `feature_names` attribute.")
+                    raise ValueError()
         # 3. Call the plotting function
         if self.kind in ["regression", "classification"]:
@@ -490,13 +492,13 @@ class MLTrainer:
         # --- Step 1: Check if the model supports this explanation ---
         if not hasattr(self.model, 'forward_attention'):
-            _LOGGER.error("❌ Model does not have a `forward_attention` method. Skipping attention explanation.")
+            _LOGGER.error("Model does not have a `forward_attention` method. Skipping attention explanation.")
             return
         # --- Step 2: Set up the dataloader ---
         dataset_to_use = explain_dataset if explain_dataset is not None else self.test_dataset
         if not isinstance(dataset_to_use, Dataset):
-            _LOGGER.error("❌ The explanation dataset is empty or invalid. Skipping attention analysis.")
+            _LOGGER.error("The explanation dataset is empty or invalid. Skipping attention analysis.")
             return
         explain_loader = DataLoader(
@@ -519,7 +521,7 @@ class MLTrainer:
                 save_dir=save_dir
             )
         else:
-            _LOGGER.error("❌ No attention weights were collected from the model.")
+            _LOGGER.error("No attention weights were collected from the model.")
     def callbacks_hook(self, method_name: str, *args, **kwargs):
         """Calls the specified method on all callbacks."""

ml_tools/PSO_optimization.py CHANGED Viewed

@@ -65,7 +65,9 @@ class ObjectiveFunction():
         np.ndarray
             1D array with length n_samples containing predicted target values.
         """
-        assert features_array.ndim == 2, f"Expected 2D array, got shape {features_array.shape}"
+        if features_array.ndim != 2:
+            _LOGGER.error(f"Expected 2D array, got shape {features_array.shape}.")
+            raise AssertionError()
         # Apply noise if enabled
         if self.use_noise:
@@ -101,7 +103,9 @@ class ObjectiveFunction():
         np.ndarray
             Noised array of same shape
         """
-        assert features_array.ndim == 2, "Expected 2D array for batch noise injection"
+        if features_array.ndim != 2:
+            _LOGGER.error(f"Expected 2D array for batch noise injection, got shape {features_array.shape}.")
+            raise AssertionError()
         if self.binary_features > 0:
             split_idx = -self.binary_features
@@ -118,13 +122,16 @@ class ObjectiveFunction():
     def check_model(self):
         if isinstance(self.model, xgb.XGBClassifier) or isinstance(self.model, lgb.LGBMClassifier):
-            raise ValueError(f"[Model Check Failed] ❌\nThe loaded model ({type(self.model).__name__}) is a Classifier.\nOptimization is not suitable for standard classification tasks.")
+            _LOGGER.error(f"[Model Check Failed]\nThe loaded model ({type(self.model).__name__}) is a Classifier.\nOptimization is not suitable for standard classification tasks.")
+            raise ValueError()
         if self.model is None:
-            raise ValueError("Loaded model is None")
+            _LOGGER.error("Loaded model is None")
+            raise ValueError()
     def _get_from_artifact(self, key: str):
         if self._artifact is None:
-            raise TypeError("Load model error")
+            _LOGGER.error("Load model error")
+            raise TypeError()
         val = self._artifact.get(key)
         if key == EnsembleKeys.FEATURES:
             result = val if isinstance(val, list) and val else None
@@ -314,7 +321,8 @@ def run_pso(lower_boundaries: list[float],
     if target_name is None and objective_function.target_name is not None:
         target_name = objective_function.target_name
     if target_name is None:
-        raise ValueError(f"'target' name was not provided and was not found in the .joblib object.")
+        _LOGGER.error(f"'target' name was not provided and was not found in the .joblib object.")
+        raise ValueError()
     # --- Setup: Saving Infrastructure ---
     sanitized_target_name = sanitize_filename(target_name)
@@ -355,7 +363,7 @@ def run_pso(lower_boundaries: list[float],
                 objective_function, pso_arguments, names, target_name, random_state,
                 save_format, csv_path, db_manager, db_table_name
             )
-            _LOGGER.info(f"✅ Single optimization complete.")
+            _LOGGER.info(f"Single optimization complete.")
             return features_dict, target_dict
         else:
@@ -365,7 +373,7 @@ def run_pso(lower_boundaries: list[float],
                 objective_function, pso_arguments, names, target_name, post_hoc_analysis,
                 save_format, csv_path, db_manager, db_table_name
             )
-            _LOGGER.info("✅ Post-hoc analysis complete. Results saved.")
+            _LOGGER.info("Post-hoc analysis complete. Results saved.")
             return None

ml_tools/RNN_forecast.py CHANGED Viewed

@@ -51,4 +51,4 @@ def rnn_forecast(model: nn.Module, start_sequence: torch.Tensor, steps: int, dev
 def info():
-    _script_info
+    _script_info(__all__)

ml_tools/SQL.py CHANGED Viewed

@@ -62,7 +62,7 @@ class DatabaseManager:
             _LOGGER.info(f"❇️ Successfully connected to database: {self.db_path}")
             return self
         except sqlite3.Error as e:
-            _LOGGER.error(f"❌ Database connection failed: {e}")
+            _LOGGER.error(f"Database connection failed: {e}")
             raise  # Re-raise the exception after logging
     def __exit__(self, exc_type, exc_val, exc_tb):
@@ -70,11 +70,11 @@ class DatabaseManager:
         if self.conn:
             if exc_type:  # If an exception occurred, rollback
                 self.conn.rollback()
-                _LOGGER.warning("⚠️ Rolling back transaction due to an error.")
+                _LOGGER.warning("Rolling back transaction due to an error.")
             else:  # Otherwise, commit the transaction
                 self.conn.commit()
             self.conn.close()
-            _LOGGER.info(f"❇️ Database connection closed: {self.db_path.name}")
+            _LOGGER.info(f"Database connection closed: {self.db_path.name}")
     def create_table(self, table_name: str, schema: Dict[str, str], if_not_exists: bool = True):
         """
@@ -92,7 +92,8 @@ class DatabaseManager:
             if the table already exists.
         """
         if not self.cursor:
-            raise sqlite3.Error("Database connection is not open.")
+            _LOGGER.error("Database connection is not open.")
+            raise sqlite3.Error()
         columns_def = ", ".join([f'"{col_name}" {col_type}' for col_name, col_type in schema.items()])
         exists_clause = "IF NOT EXISTS" if if_not_exists else ""
@@ -115,7 +116,8 @@ class DatabaseManager:
             data to be inserted.
         """
         if not self.cursor:
-            raise sqlite3.Error("Database connection is not open.")
+            _LOGGER.error("Database connection is not open.")
+            raise sqlite3.Error()
         columns = ', '.join(f'"{k}"' for k in data.keys())
         placeholders = ', '.join(['?'] * len(data))
@@ -143,7 +145,8 @@ class DatabaseManager:
             A DataFrame containing the query results.
         """
         if not self.conn:
-            raise sqlite3.Error("Database connection is not open.")
+            _LOGGER.error("Database connection is not open.")
+            raise sqlite3.Error()
         return pd.read_sql_query(query, self.conn, params=params)
@@ -159,7 +162,8 @@ class DatabaseManager:
             An optional tuple of parameters for the query.
         """
         if not self.cursor:
-            raise sqlite3.Error("Database connection is not open.")
+            _LOGGER.error("Database connection is not open.")
+            raise sqlite3.Error()
         self.cursor.execute(query, params if params else ())
@@ -176,9 +180,10 @@ class DatabaseManager:
             All dictionaries should have the same keys.
         """
         if not self.cursor:
-            raise sqlite3.Error("Database connection is not open.")
+            _LOGGER.error("Database connection is not open.")
+            raise sqlite3.Error()
         if not data:
-            _LOGGER.warning("⚠️ insert_many called with empty data list. No action taken.")
+            _LOGGER.warning("'insert_many' called with empty data list. No action taken.")
             return
         # Assume all dicts have the same keys as the first one
@@ -211,7 +216,8 @@ class DatabaseManager:
             - 'append': Insert new values to the existing table.
         """
         if not self.conn:
-            raise sqlite3.Error("Database connection is not open.")
+            _LOGGER.error("Database connection is not open.")
+            raise sqlite3.Error()
         df.to_sql(
             table_name,
@@ -224,7 +230,8 @@ class DatabaseManager:
     def list_tables(self) -> List[str]:
         """Returns a list of all table names in the database."""
         if not self.cursor:
-            raise sqlite3.Error("Database connection is not open.")
+            _LOGGER.error("Database connection is not open.")
+            raise sqlite3.Error()
         self.cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
         # The result of the fetch is a list of tuples, e.g., [('table1',), ('table2',)]
@@ -237,7 +244,8 @@ class DatabaseManager:
         Returns a DataFrame with columns: cid, name, type, notnull, dflt_value, pk
         """
         if not self.conn:
-            raise sqlite3.Error("Database connection is not open.")
+            _LOGGER.error("Database connection is not open.")
+            raise sqlite3.Error()
         # PRAGMA is a special SQL command in SQLite for database metadata
         return pd.read_sql_query(f'PRAGMA table_info("{table_name}");', self.conn)
@@ -257,7 +265,8 @@ class DatabaseManager:
             column are unique.
         """
         if not self.cursor:
-            raise sqlite3.Error("Database connection is not open.")
+            _LOGGER.error("Database connection is not open.")
+            raise sqlite3.Error()
         index_name = f"idx_{table_name}_{column_name}"
         unique_clause = "UNIQUE" if unique else ""

ml_tools/VIF_factor.py CHANGED Viewed

@@ -55,19 +55,19 @@ def compute_vif(
         sanitized_columns = df.select_dtypes(include='number').columns.tolist()
         missing_features = set(ground_truth_cols) - set(sanitized_columns)
         if missing_features:
-            _LOGGER.warning(f"⚠️ These columns are not Numeric:\n{missing_features}")
+            _LOGGER.warning(f"These columns are not Numeric:\n{missing_features}")
     else:
         sanitized_columns = list()
         for feature in use_columns:
             if feature not in ground_truth_cols:
-                _LOGGER.warning(f"⚠️ The provided column '{feature}' is not in the DataFrame.")
+                _LOGGER.warning(f"The provided column '{feature}' is not in the DataFrame.")
             else:
                 sanitized_columns.append(feature)
     if ignore_columns is not None and use_columns is None:
         missing_ignore = set(ignore_columns) - set(ground_truth_cols)
         if missing_ignore:
-            _LOGGER.warning(f"⚠️ Warning: The following 'columns to ignore' are not found in the Dataframe:\n{missing_ignore}")
+            _LOGGER.warning(f"The following 'columns to ignore' are not found in the Dataframe:\n{missing_ignore}")
         sanitized_columns = [f for f in sanitized_columns if f not in ignore_columns]
     X = df[sanitized_columns].copy()
@@ -138,7 +138,7 @@ def compute_vif(
                         filename += ".svg"
                 full_save_path = save_path / filename
                 plt.savefig(full_save_path, format='svg', bbox_inches='tight')
-                _LOGGER.info(f"✅ Saved VIF plot: '{filename}'")
+                _LOGGER.info(f"📊 Saved VIF plot: '{filename}'")
             if show_plot:
                 plt.show()
@@ -163,7 +163,8 @@ def drop_vif_based(df: pd.DataFrame, vif_df: pd.DataFrame, threshold: float = 10
     """
     # Ensure expected structure
     if 'feature' not in vif_df.columns or 'VIF' not in vif_df.columns:
-        raise ValueError("'vif_df' must contain 'feature' and 'VIF' columns.")
+        _LOGGER.error("'vif_df' must contain 'feature' and 'VIF' columns.")
+        raise ValueError()
     # Identify features to drop
     to_drop = vif_df[vif_df["VIF"] > threshold]["feature"].tolist()
@@ -177,7 +178,7 @@ def drop_vif_based(df: pd.DataFrame, vif_df: pd.DataFrame, threshold: float = 10
     result_df = df.drop(columns=to_drop)
     if result_df.empty:
-        _LOGGER.warning(f"⚠️ All columns were dropped.")
+        _LOGGER.warning(f"All columns were dropped.")
     return result_df, to_drop

ml_tools/_logger.py CHANGED Viewed

@@ -1,6 +1,73 @@
 import logging
 import sys
+# Step 1: Conditionally import colorlog
+try:
+    import colorlog # type: ignore
+except ImportError:
+    colorlog = None
+# --- Centralized Configuration ---
+LEVEL_EMOJIS = {
+    logging.INFO: "✅",
+    logging.WARNING: "⚠️ ",
+    logging.ERROR: "🚨",
+    logging.CRITICAL: "❌"
+}
+# Define base format strings.
+BASE_INFO_FORMAT = '\n🐉 %(asctime)s [%(emoji)s %(levelname)s] - %(message)s'
+BASE_WARN_FORMAT = '\n🐉 %(asctime)s [%(emoji)s %(levelname)s] [%(filename)s:%(lineno)d] - %(message)s'
+# --- Unified Formatter ---
+# Determine the base class and format strings based on colorlog availability
+if colorlog:
+    # If colorlog is available, use it as the base and use colorized formats.
+    _BaseFormatter = colorlog.ColoredFormatter
+    _INFO_FORMAT = BASE_INFO_FORMAT.replace('%(levelname)s', '%(log_color)s%(levelname)s%(reset)s')
+    _WARN_FORMAT = BASE_WARN_FORMAT.replace('%(levelname)s', '%(log_color)s%(levelname)s%(reset)s')
+else:
+    # Otherwise, fall back to the standard logging.Formatter.
+    _BaseFormatter = logging.Formatter
+    _INFO_FORMAT = BASE_INFO_FORMAT
+    _WARN_FORMAT = BASE_WARN_FORMAT
+class _UnifiedFormatter(_BaseFormatter): # type: ignore
+    """
+    A unified log formatter that adds emojis, uses level-specific formats,
+    and applies colors if colorlog is available.
+    """
+    def __init__(self, *args, **kwargs):
+        """Initializes the formatter, creating sub-formatters for each level."""
+        # The base class __init__ is called implicitly. We prepare our custom formatters here.
+        self.datefmt = kwargs.get('datefmt')
+        # We need to pass the correct arguments to the correct formatter type
+        if colorlog:
+            log_colors = kwargs.get('log_colors', {})
+            self.info_formatter = colorlog.ColoredFormatter(_INFO_FORMAT, datefmt=self.datefmt, log_colors=log_colors)
+            self.warn_formatter = colorlog.ColoredFormatter(_WARN_FORMAT, datefmt=self.datefmt, log_colors=log_colors)
+        else:
+            self.info_formatter = logging.Formatter(_INFO_FORMAT, datefmt=self.datefmt)
+            self.warn_formatter = logging.Formatter(_WARN_FORMAT, datefmt=self.datefmt)
+    def format(self, record):
+        """Adds a custom emoji attribute to the record before formatting."""
+        # Add the new attribute to the record. Use .get() for a safe default.
+        record.emoji = LEVEL_EMOJIS.get(record.levelno, "")
+        # Select the appropriate formatter and let it handle the rest.
+        if record.levelno >= logging.WARNING:
+            return self.warn_formatter.format(record)
+        else:
+            return self.info_formatter.format(record)
 def _get_logger(name: str = "ml_tools", level: int = logging.INFO):
     """
@@ -9,6 +76,7 @@ def _get_logger(name: str = "ml_tools", level: int = logging.INFO):
     - `logger.info()`
     - `logger.warning()`
     - `logger.error()` the program can potentially recover.
+    - `logger.exception()` inside an except block.
     - `logger.critical()` the program is going to crash.
     """
     logger = logging.getLogger(name)
@@ -16,15 +84,26 @@ def _get_logger(name: str = "ml_tools", level: int = logging.INFO):
     # Prevents adding handlers multiple times if the function is called again
     if not logger.handlers:
-        handler = logging.StreamHandler(sys.stdout)
-        # Define the format string and the date format separately
-        log_format = '\n🐉%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-        date_format = '%Y-%m-%d %H:%M' # Format: Year-Month-Day Hour:Minute
+        # Prepare arguments for the unified formatter
+        formatter_kwargs = {
+            'datefmt': '%Y-%m-%d %H:%M'
+        }
-        # Pass both the format and the date format to the Formatter
-        formatter = logging.Formatter(log_format, datefmt=date_format)
+        # Use colorlog's handler if available, and add color arguments
+        if colorlog:
+            handler = colorlog.StreamHandler()
+            formatter_kwargs["log_colors"] = { # type: ignore
+                'DEBUG':    'cyan',
+                'INFO':     'green',
+                'WARNING':  'yellow',
+                'ERROR':    'red',
+                'CRITICAL': 'red,bg_white',
+            }
+        else:
+            handler = logging.StreamHandler(sys.stdout)
+        # Create and set the single, unified formatter
+        formatter = _UnifiedFormatter(**formatter_kwargs)
         handler.setFormatter(formatter)
         logger.addHandler(handler)
@@ -32,5 +111,24 @@ def _get_logger(name: str = "ml_tools", level: int = logging.INFO):
     return logger
 # Create a single logger instance to be imported by other modules
 _LOGGER = _get_logger()
+def _log_and_exit(message: str, exit_code: int = 1):
+    """Logs a critical message inside an exception block and terminates the program."""
+    _LOGGER.exception(message)
+    sys.exit(exit_code)
+if __name__ == "__main__":
+    _LOGGER.info("Data loading process started.")
+    _LOGGER.warning("A non-critical configuration value is missing.")
+    try:
+        x = 1 / 0
+    except ZeroDivisionError:
+        _LOGGER.exception("Critical error during calculation.")
+    _LOGGER.critical("Total failure.")

ml_tools/custom_logger.py CHANGED Viewed

@@ -76,12 +76,13 @@ def custom_logger(
             _log_exception_to_log(data, base_path.with_suffix(".log"))
         else:
-            raise ValueError("Unsupported data type. Must be list, dict, str, or BaseException.")
+            _LOGGER.error("Unsupported data type. Must be list, dict, str, or BaseException.")
+            raise ValueError()
-        _LOGGER.info(f"🗄️ Log saved to: '{base_path}'")
+        _LOGGER.info(f"Log saved to: '{base_path}'")
-    except Exception as e:
-        _LOGGER.error(f"❌ Log not saved: {e}")
+    except Exception:
+        _LOGGER.exception(f"Log not saved.")
 def _log_list_to_txt(data: List[Any], path: Path) -> None:
@@ -102,7 +103,9 @@ def _log_dict_to_csv(data: Dict[Any, List[Any]], path: Path) -> None:
     for key, value in data.items():
         if not isinstance(value, list):
-            raise ValueError(f"Dictionary value for key '{key}' must be a list.")
+            _LOGGER.error(f"Dictionary value for key '{key}' must be a list.")
+            raise ValueError()
         sanitized_key = str(key).strip().replace('\n', '_').replace('\r', '_')
         padded_value = value + [None] * (max_length - len(value))
         sanitized_dict[sanitized_key] = padded_value
@@ -152,7 +155,7 @@ def save_list_strings(list_strings: list[str], directory: Union[str,Path], filen
             f.write(f"{string_data}\n")
     if verbose:
-        _LOGGER.info(f"✅ Text file saved as '{full_path.name}'.")
+        _LOGGER.info(f"Text file saved as '{full_path.name}'.")
 def load_list_strings(text_file: Union[str,Path], verbose: bool=True) -> list[str]:
@@ -164,10 +167,11 @@ def load_list_strings(text_file: Union[str,Path], verbose: bool=True) -> list[st
         loaded_strings = [line.strip() for line in f]
     if len(loaded_strings) == 0:
-        raise ValueError("❌ The text file is empty.")
+        _LOGGER.error("The text file is empty.")
+        raise ValueError()
     if verbose:
-        _LOGGER.info(f"✅ Text file loaded as list of strings.")
+        _LOGGER.info(f"Text file loaded as list of strings.")
     return loaded_strings

ml_tools/data_exploration.py CHANGED Viewed

@@ -83,7 +83,8 @@ def drop_constant_columns(df: pd.DataFrame, verbose: bool = True) -> pd.DataFram
             A new DataFrame with the constant columns removed.
     """
     if not isinstance(df, pd.DataFrame):
-        raise TypeError("Input must be a pandas DataFrame.")
+        _LOGGER.error("Input must be a pandas DataFrame.")
+        raise TypeError()
     original_columns = set(df.columns)
     cols_to_keep = []
@@ -136,7 +137,7 @@ def drop_rows_with_missing_data(df: pd.DataFrame, targets: Optional[list[str]],
             _LOGGER.info(f"🧹 Dropping {target_na.sum()} rows with all target columns missing.")
             df_clean = df_clean[~target_na]
         else:
-            _LOGGER.info("✅ No rows with all targets missing.")
+            _LOGGER.info("No rows found where all targets are missing.")
     else:
         valid_targets = []
@@ -149,9 +150,9 @@ def drop_rows_with_missing_data(df: pd.DataFrame, targets: Optional[list[str]],
             _LOGGER.info(f"🧹 Dropping {len(rows_to_drop)} rows with more than {threshold*100:.0f}% missing feature data.")
             df_clean = df_clean.drop(index=rows_to_drop)
         else:
-            _LOGGER.info(f"✅ No rows exceed the {threshold*100:.0f}% missing feature data threshold.")
+            _LOGGER.info(f"No rows exceed the {threshold*100:.0f}% missing feature data threshold.")
     else:
-        _LOGGER.warning("⚠️ No feature columns available to evaluate.")
+        _LOGGER.warning("No feature columns available to evaluate.")
     return df_clean
@@ -211,7 +212,7 @@ def drop_columns_with_missing_data(df: pd.DataFrame, threshold: float = 0.7, sho
     cols_to_drop = missing_fraction[missing_fraction > threshold].index
     if len(cols_to_drop) > 0:
-        _LOGGER.info(f"Dropping columns with more than {threshold*100:.0f}% missing data:")
+        _LOGGER.info(f"🧹 Dropping columns with more than {threshold*100:.0f}% missing data:")
         print(list(cols_to_drop))
         result_df = df.drop(columns=cols_to_drop)
@@ -339,7 +340,8 @@ def split_continuous_binary(df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFram
         TypeError: If any column is not numeric.
     """
     if not all(np.issubdtype(dtype, np.number) for dtype in df.dtypes):
-        raise TypeError("All columns must be numeric (int or float).")
+        _LOGGER.error("All columns must be numeric (int or float).")
+        raise TypeError()
     binary_cols = []
     continuous_cols = []
@@ -390,7 +392,7 @@ def plot_correlation_heatmap(df: pd.DataFrame,
     """
     numeric_df = df.select_dtypes(include='number')
     if numeric_df.empty:
-        _LOGGER.warning("⚠️ No numeric columns found. Heatmap not generated.")
+        _LOGGER.warning("No numeric columns found. Heatmap not generated.")
         return
     corr = numeric_df.corr(method=method)
@@ -558,11 +560,11 @@ def clip_outliers_single(
         None: if a problem with the dataframe column occurred.
     """
     if column not in df.columns:
-        _LOGGER.warning(f"⚠️ Column '{column}' not found in DataFrame.")
+        _LOGGER.warning(f"Column '{column}' not found in DataFrame.")
         return None
     if not pd.api.types.is_numeric_dtype(df[column]):
-        _LOGGER.warning(f"⚠️ Column '{column}' must be numeric.")
+        _LOGGER.warning(f"Column '{column}' must be numeric.")
         return None
     new_df = df.copy(deep=True)
@@ -600,13 +602,16 @@ def clip_outliers_multi(
     for col, bounds in clip_dict.items():
         try:
             if col not in df.columns:
-                raise ValueError(f"Column '{col}' not found in DataFrame.")
+                _LOGGER.error(f"Column '{col}' not found in DataFrame.")
+                raise ValueError()
             if not pd.api.types.is_numeric_dtype(df[col]):
-                raise TypeError(f"Column '{col}' is not numeric.")
+                _LOGGER.error(f"Column '{col}' is not numeric.")
+                raise TypeError()
             if not (isinstance(bounds, tuple) and len(bounds) == 2):
-                raise ValueError(f"Bounds for '{col}' must be a tuple of (min, max).")
+                _LOGGER.error(f"Bounds for '{col}' must be a tuple of (min, max).")
+                raise ValueError()
             min_val, max_val = bounds
             new_df[col] = new_df[col].clip(lower=min_val, upper=max_val)
@@ -621,7 +626,7 @@ def clip_outliers_multi(
     _LOGGER.info(f"Clipped {clipped_columns} columns.")
     if skipped_columns:
-        _LOGGER.warning("⚠️ Skipped columns:")
+        _LOGGER.warning("Skipped columns:")
         for col, msg in skipped_columns:
             print(f" - {col}: {msg}")
@@ -707,11 +712,11 @@ def standardize_percentages(
     for col in columns:
         # --- Robustness Checks ---
         if col not in df_copy.columns:
-            _LOGGER.warning(f"⚠️ Column '{col}' not found. Skipping.")
+            _LOGGER.warning(f"Column '{col}' not found. Skipping.")
             continue
         if not is_numeric_dtype(df_copy[col]):
-            _LOGGER.warning(f"⚠️ Column '{col}' is not numeric. Skipping.")
+            _LOGGER.warning(f"Column '{col}' is not numeric. Skipping.")
             continue
         # --- Applying the Logic ---

dragon-ml-toolbox 8.2.0__py3-none-any.whl → 9.0.0__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 8.2.0py3-none-any.whl → 9.0.0py3-none-any.whl