PyPI - dragon-ml-toolbox - Versions diffs - 8.2.0__py3-none-any.whl → 9.0.0__py3-none-any.whl - Mend

dragon-ml-toolbox 8.2.0py3-none-any.whl → 9.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (34) hide show

{dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/METADATA +5 -1
dragon_ml_toolbox-9.0.0.dist-info/RECORD +35 -0
ml_tools/ETL_engineering.py +177 -79
ml_tools/GUI_tools.py +5 -5
ml_tools/MICE_imputation.py +12 -8
ml_tools/ML_callbacks.py +6 -3
ml_tools/ML_datasetmaster.py +37 -20
ml_tools/ML_evaluation.py +4 -4
ml_tools/ML_evaluation_multi.py +26 -17
ml_tools/ML_inference.py +30 -23
ml_tools/ML_models.py +14 -14
ml_tools/ML_optimization.py +4 -3
ml_tools/ML_scaler.py +7 -7
ml_tools/ML_trainer.py +17 -15
ml_tools/PSO_optimization.py +16 -8
ml_tools/RNN_forecast.py +1 -1
ml_tools/SQL.py +22 -13
ml_tools/VIF_factor.py +7 -6
ml_tools/_logger.py +105 -7
ml_tools/custom_logger.py +12 -8
ml_tools/data_exploration.py +20 -15
ml_tools/ensemble_evaluation.py +10 -6
ml_tools/ensemble_inference.py +18 -18
ml_tools/ensemble_learning.py +8 -5
ml_tools/handle_excel.py +15 -11
ml_tools/optimization_tools.py +3 -4
ml_tools/path_manager.py +21 -15
ml_tools/utilities.py +35 -26
dragon_ml_toolbox-8.2.0.dist-info/RECORD +0 -36
ml_tools/_ML_optimization_multi.py +0 -231
{dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/WHEEL +0 -0
{dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/licenses/LICENSE +0 -0
{dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
{dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/top_level.txt +0 -0

ml_tools/ensemble_evaluation.py CHANGED Viewed

@@ -119,8 +119,8 @@ def evaluate_model_classification(
         heatmap_path = save_path / f"Classification_Report_{sanitized_target_name}.svg"
         plt.savefig(heatmap_path, format="svg", bbox_inches="tight")
         plt.close()
-    except Exception as e:
-        _LOGGER.error(f"❌ Could not generate classification report heatmap for {target_name}: {e}")
+    except Exception:
+        _LOGGER.exception(f"Could not generate classification report heatmap for {target_name}:")
     # Create confusion matrix
     fig, ax = plt.subplots(figsize=figsize)
@@ -198,7 +198,8 @@ def plot_roc_curve(
     elif hasattr(probabilities_or_model, "predict_proba"):
         if input_features is None:
-            raise ValueError("input_features must be provided when using a classifier.")
+            _LOGGER.error("input_features must be provided when using a classifier.")
+            raise ValueError()
         try:
             classes = probabilities_or_model.classes_ # type: ignore
@@ -209,7 +210,8 @@ def plot_roc_curve(
         y_score = probabilities_or_model.predict_proba(input_features)[:, positive_class_index] # type: ignore
     else:
-        raise TypeError("Unsupported type for 'probabilities_or_model'. Must be a NumPy array or a model with support for '.predict_proba()'.")
+        _LOGGER.error("Unsupported type for 'probabilities_or_model'. Must be a NumPy array or a model with support for '.predict_proba()'.")
+        raise TypeError()
     # ROC and AUC
     fpr, tpr, _ = roc_curve(true_labels, y_score)
@@ -276,7 +278,8 @@ def plot_precision_recall_curve(
     elif hasattr(probabilities_or_model, "predict_proba"):
         if input_features is None:
-            raise ValueError("input_features must be provided when using a classifier.")
+            _LOGGER.error("input_features must be provided when using a classifier.")
+            raise ValueError()
         try:
             classes = probabilities_or_model.classes_ # type: ignore
             positive_class_index = list(classes).index(1)
@@ -284,7 +287,8 @@ def plot_precision_recall_curve(
             positive_class_index = 1
         y_score = probabilities_or_model.predict_proba(input_features)[:, positive_class_index] # type: ignore
     else:
-        raise TypeError("Unsupported type for 'probabilities_or_model'. Must be a NumPy array or a model with support for '.predict_proba()'.")
+        _LOGGER.error("Unsupported type for 'probabilities_or_model'. Must be a NumPy array or a model with support for '.predict_proba()'.")
+        raise TypeError()
     # Calculate PR curve and AP score
     precision, recall, _ = precision_recall_curve(true_labels, y_score)

ml_tools/ensemble_inference.py CHANGED Viewed

@@ -59,15 +59,15 @@ class InferenceHandler:
                     self._feature_names = feature_names_list
                 elif self._feature_names != feature_names_list:
                     # Add a warning if subsequent models have different feature names.
-                    _LOGGER.warning(f"⚠️ Mismatched feature names in {fname}. Using feature order from the first model loaded.")
+                    _LOGGER.warning(f"Mismatched feature names in {fname}. Using feature order from the first model loaded.")
                 self.models[target_name] = model
                 if self.verbose:
-                    _LOGGER.info(f"✅ Loaded model for target: {target_name}")
+                    _LOGGER.info(f"Loaded model for target: {target_name}")
+            except Exception:
+                _LOGGER.error(f"Failed to load or parse {fname}.")
-            except Exception as e:
-                _LOGGER.warning(f"⚠️ Failed to load or parse {fname}: {e}")
     @property
     def feature_names(self) -> List[str]:
         """
@@ -92,7 +92,8 @@ class InferenceHandler:
             features = features.reshape(1, -1)
         if features.shape[0] != 1:
-            raise ValueError("The predict() method is for a single sample. Use predict_batch() for multiple samples.")
+            _LOGGER.error("The 'predict()' method is for a single sample. Use 'predict_batch()' for multiple samples.")
+            raise ValueError()
         results: Dict[str, Any] = dict()
         for target_name, model in self.models.items():
@@ -106,7 +107,7 @@ class InferenceHandler:
                                         EnsembleKeys.CLASSIFICATION_PROBABILITIES: probabilities}
         if self.verbose:
-            _LOGGER.info("✅ Inference process complete.")
+            _LOGGER.info("Inference process complete.")
         return results
     def predict_batch(self, features: np.ndarray) -> Dict[str, Any]:
@@ -122,7 +123,8 @@ class InferenceHandler:
                 - For classification: The value is another dictionary {'labels': ..., 'probabilities': ...}.
         """
         if features.ndim != 2:
-            raise ValueError("Input for batch prediction must be a 2D array.")
+            _LOGGER.error("Input for batch prediction must be a 2D array.")
+            raise ValueError()
         results: Dict[str, Any] = dict()
         for target_name, model in self.models.items():
@@ -134,7 +136,7 @@ class InferenceHandler:
                 results[target_name] = {"labels": labels, "probabilities": probabilities}
         if self.verbose:
-            _LOGGER.info("✅ Inference process complete.")
+            _LOGGER.info("Inference process complete.")
         return results
@@ -174,11 +176,11 @@ def model_report(
         target = full_object[EnsembleKeys.TARGET]
         features = full_object[EnsembleKeys.FEATURES]
     except FileNotFoundError:
-        _LOGGER.error(f"❌ Model file not found at '{model_p}'")
+        _LOGGER.error(f"Model file not found at '{model_p}'")
         raise
     except (KeyError, TypeError) as e:
         _LOGGER.error(
-            f"❌ The serialized object is missing required keys '{EnsembleKeys.MODEL}', '{EnsembleKeys.TARGET}', '{EnsembleKeys.FEATURES}'"
+            f"The serialized object is missing required keys '{EnsembleKeys.MODEL}', '{EnsembleKeys.TARGET}', '{EnsembleKeys.FEATURES}'"
         )
         raise e
@@ -209,9 +211,9 @@ def model_report(
         with open(json_filepath, 'w') as f:
             json.dump(report_data, f, indent=4)
         if verbose:
-            _LOGGER.info(f"✅ JSON report saved to: '{json_filepath}'")
+            _LOGGER.info(f"JSON report saved to: '{json_filepath}'")
     except PermissionError:
-        _LOGGER.error(f"❌ Permission denied to write JSON report at '{json_filepath}'")
+        _LOGGER.exception(f"Permission denied to write JSON report at '{json_filepath}'.")
     # --- 5. Return the extracted data ---
     return report_data
@@ -233,15 +235,13 @@ def _deserialize_object(filepath: Union[str,Path], verbose: bool=True, raise_on_
     try:
         obj = joblib.load(true_filepath)
     except (IOError, OSError, EOFError, TypeError, ValueError) as e:
-        message = f"❌ Failed to deserialize object from '{true_filepath}': {e}"
+        _LOGGER.error(f"Failed to deserialize object from '{true_filepath}'.")
         if raise_on_error:
-            raise Exception(message)
-        else:
-            print(message)
+            raise e
         return None
     else:
         if verbose:
-            print(f"\n✅ Loaded object of type '{type(obj)}'")
+            _LOGGER.info(f"Loaded object of type '{type(obj)}'")
         return obj

ml_tools/ensemble_learning.py CHANGED Viewed

@@ -339,7 +339,8 @@ def _resample(X_train: np.ndarray, y_train: pd.Series,
     elif strategy == 'ADASYN':
         resample_algorithm = ADASYN(random_state=random_state, n_neighbors=3)
     else:
-        raise ValueError(f"Invalid resampling strategy: {strategy}")
+        _LOGGER.error(f"Invalid resampling strategy: {strategy}")
+        raise ValueError()
     X_res, y_res, *_ = resample_algorithm.fit_resample(X_train, y_train)
     return X_res, y_res
@@ -459,7 +460,8 @@ def train_test_pipeline(model, model_name: str, dataset_id: str, task: Literal["
         y_pred = evaluate_model_regression(model=trained_model, model_name=model_name, save_dir=local_save_directory,
                              x_test_scaled=test_features, single_y_test=test_target, target_name=target_name)
     else:
-        raise ValueError(f"Unrecognized task '{task}' for model training,")
+        _LOGGER.error(f"Unrecognized task '{task}' for model training,")
+        raise ValueError()
     if debug:
         _LOGGER.info(f"Predicted vector: {type(y_pred)} with shape: {y_pred.shape}")
@@ -487,13 +489,14 @@ def run_ensemble_pipeline(datasets_dir: Union[str,Path], save_dir: Union[str,Pat
     elif isinstance(model_object, ClassificationTreeModels):
         task = "classification"
         if handle_classification_imbalance is None:
-            _LOGGER.warning("⚠️ No method to handle classification class imbalance has been selected. Datasets are assumed to be balanced.")
+            _LOGGER.warning("No method to handle classification class imbalance has been selected. Datasets are assumed to be balanced.")
         elif handle_classification_imbalance == "by_model":
             model_object.use_model_balance = True
         else:
             model_object.use_model_balance = False
     else:
-        raise TypeError(f"Unrecognized model {type(model_object)}")
+        _LOGGER.error(f"Unrecognized model {type(model_object)}")
+        raise TypeError()
     #Check paths
     datasets_path = make_fullpath(datasets_dir)
@@ -519,7 +522,7 @@ def run_ensemble_pipeline(datasets_dir: Union[str,Path], save_dir: Union[str,Pat
                                     debug=debug, save_dir=save_path, save_model=save_model,
                                     generate_learning_curves=generate_learning_curves)
-    _LOGGER.info("✅ Training and evaluation complete.")
+    _LOGGER.info("Training and evaluation complete.")
 def info():

ml_tools/handle_excel.py CHANGED Viewed

@@ -37,7 +37,8 @@ def find_excel_files(
     input_path = make_fullpath(directory)
     if not input_path.is_dir():
-        raise NotADirectoryError(f"❌ Directory not found: {input_path}")
+        _LOGGER.error(f"Directory not found: {input_path}")
+        raise NotADirectoryError()
     excel_files = [
         f for f in input_path.iterdir()
@@ -47,7 +48,8 @@ def find_excel_files(
     ]
     if not excel_files:
-        raise FileNotFoundError(f"❌ No valid Excel files found in directory: {input_path}")
+        _LOGGER.error(f"No valid Excel files found in directory: {input_path}")
+        raise FileNotFoundError()
     return excel_files
@@ -99,7 +101,7 @@ def unmerge_and_split_excel(filepath: Union[str,Path]) -> None:
         total_output_files += 1
-    _LOGGER.info(f"✅ Processed file: {file_path} into {total_output_files} output file(s).")
+    _LOGGER.info(f"Processed file: {file_path} into {total_output_files} output file(s).")
     return None
@@ -155,7 +157,7 @@ def unmerge_and_split_from_directory(input_dir: Union[str,Path], output_dir: Uni
             total_output_files += 1
-    _LOGGER.info(f"✅ Processed {len(excel_files)} input Excel file(s) with a total of {total_output_files} output Excel file(s).")
+    _LOGGER.info(f"Processed {len(excel_files)} input Excel file(s) with a total of {total_output_files} output Excel file(s).")
     return None
@@ -199,13 +201,13 @@ def validate_excel_schema(
                     invalid_files.append(file)
         except Exception as e:
-            _LOGGER.error(f"❌ Error processing '{file}': {e}")
+            _LOGGER.error(f"Error processing '{file}': {e}")
             invalid_files.append(file)
     valid_excel_number = len(excel_paths) - len(invalid_files)
     _LOGGER.info(f"{valid_excel_number} out of {len(excel_paths)} excel files conform to the schema.")
     if invalid_files:
-        _LOGGER.warning(f"⚠️ {len(invalid_files)} excel files are invalid:")
+        _LOGGER.warning(f"{len(invalid_files)} excel files are invalid:")
         for in_file in invalid_files:
             print(f"  - {in_file.name}")
@@ -252,7 +254,8 @@ def vertical_merge_transform_excel(
         if target_columns is not None:
             missing = [col for col in target_columns if col not in df.columns]
             if missing:
-                raise ValueError(f"❌ Invalid columns in {file.name}: {missing}")
+                _LOGGER.error(f"Invalid columns in {file.name}: {missing}")
+                raise ValueError()
             df = df[target_columns]
         dataframes.append(df)
@@ -262,11 +265,12 @@ def vertical_merge_transform_excel(
     if rename_columns is not None:
         expected_len = len(target_columns if target_columns is not None else merged_df.columns)
         if len(rename_columns) != expected_len:
-            raise ValueError("❌ Length of 'rename_columns' must match the selected columns")
+            _LOGGER.error("Length of 'rename_columns' must match the selected columns")
+            raise ValueError()
         merged_df.columns = rename_columns
     merged_df.to_csv(csv_path, index=False, encoding='utf-8')
-    _LOGGER.info(f"✅ Merged {len(dataframes)} excel files into '{csv_filename}'.")
+    _LOGGER.info(f"Merged {len(dataframes)} excel files into '{csv_filename}'.")
 def horizontal_merge_transform_excel(
@@ -327,7 +331,7 @@ def horizontal_merge_transform_excel(
     duplicate_columns = merged_df.columns[merged_df.columns.duplicated()].tolist()
     if duplicate_columns:
-        _LOGGER.warning(f"⚠️ Duplicate columns: {duplicate_columns}")
+        _LOGGER.warning(f"Duplicate columns: {duplicate_columns}")
     if skip_duplicates:
         merged_df = merged_df.loc[:, ~merged_df.columns.duplicated()]
@@ -347,7 +351,7 @@ def horizontal_merge_transform_excel(
     merged_df.to_csv(csv_path, index=False, encoding='utf-8')
-    _LOGGER.info(f"✅ Merged {len(excel_files)} Excel files into '{csv_filename}'.")
+    _LOGGER.info(f"Merged {len(excel_files)} Excel files into '{csv_filename}'.")
 def info():

ml_tools/optimization_tools.py CHANGED Viewed

@@ -61,7 +61,7 @@ def plot_optimal_feature_distributions(results_dir: Union[str, Path]):
     long_df = pd.concat(data_to_plot, ignore_index=True)
     features = long_df['feature'].unique()
-    _LOGGER.info(f"📂 Found data for {len(features)} features across {len(long_df['target'].unique())} targets. Generating plots...")
+    _LOGGER.info(f"Found data for {len(features)} features across {len(long_df['target'].unique())} targets. Generating plots...")
     # --- Plotting Loop ---
     for feature_name in features:
@@ -105,7 +105,7 @@ def plot_optimal_feature_distributions(results_dir: Union[str, Path]):
         plt.savefig(plot_filename, bbox_inches='tight')
         plt.close()
-    _LOGGER.info(f"✅ All plots saved successfully to: '{output_path}'")
+    _LOGGER.info(f"All plots saved successfully to: '{output_path}'")
 def _save_result(
@@ -129,8 +129,7 @@ def _save_result(
         if db_manager and db_table_name:
             db_manager.insert_row(db_table_name, result_dict)
         else:
-            _LOGGER.warning("⚠️ SQLite saving requested but db_manager or table_name not provided.")
+            _LOGGER.warning("SQLite saving requested but db_manager or table_name not provided.")
 def info():

ml_tools/path_manager.py CHANGED Viewed

@@ -88,7 +88,7 @@ class PathManager:
         try:
             return self._paths[key]
         except KeyError:
-            _LOGGER.error(f"❌ Path key '{key}' not found.")
+            _LOGGER.error(f"Path key '{key}' not found.")
             raise
     def update(self, new_paths: Dict[str, Union[str, Path]], overwrite: bool = False) -> None:
@@ -106,9 +106,8 @@ class PathManager:
         if not overwrite:
             for key in new_paths:
                 if key in self._paths:
-                    raise KeyError(
-                        f"❌ Path key '{key}' already exists in the manager. To replace it, call update() with overwrite=True."
-                    )
+                    _LOGGER.error(f"Path key '{key}' already exists in the manager. To replace it, call update() with overwrite=True.")
+                    raise KeyError
         # Resolve any string paths to Path objects before storing
         resolved_new_paths = {k: Path(v) for k, v in new_paths.items()}
@@ -136,7 +135,7 @@ class PathManager:
                 if key in self._paths:
                     path_items.append((key, self._paths[key]))
                 elif verbose:
-                    _LOGGER.warning(f"⚠️ Key '{key}' not found in PathManager, skipping.")
+                    _LOGGER.warning(f"Key '{key}' not found in PathManager, skipping.")
         else:
             path_items = self._paths.items()
@@ -153,7 +152,7 @@ class PathManager:
             if self._is_bundled and is_internal_path:
                 if verbose:
-                    _LOGGER.warning(f"⚠️ Skipping internal directory '{key}' in bundled app (read-only).")
+                    _LOGGER.warning(f"Skipping internal directory '{key}' in bundled app (read-only).")
                 continue
             # -------------------------
@@ -261,7 +260,8 @@ def make_fullpath(
         resolved = path.resolve(strict=True)
     except FileNotFoundError:
         if not make:
-            raise ValueError(f"❌ Path does not exist: '{path}'")
+            _LOGGER.error(f"Path does not exist: '{path}'.")
+            raise FileNotFoundError()
         try:
             if is_file:
@@ -271,14 +271,17 @@ def make_fullpath(
             else:
                 path.mkdir(parents=True, exist_ok=True)
             resolved = path.resolve(strict=True)
-        except Exception as e:
-            raise ValueError(f"❌ Failed to create {'file' if is_file else 'directory'} '{path}': {e}")
+        except Exception:
+            _LOGGER.exception(f"Failed to create {'file' if is_file else 'directory'} '{path}'.")
+            raise IOError()
     if enforce == "file" and not resolved.is_file():
-        raise TypeError(f"❌ Path was enforced as a file, but it is not: '{resolved}'")
+        _LOGGER.error(f"Path was enforced as a file, but it is not: '{resolved}'")
+        raise TypeError()
     if enforce == "directory" and not resolved.is_dir():
-        raise TypeError(f"❌ Path was enforced as a directory, but it is not: '{resolved}'")
+        _LOGGER.error(f"Path was enforced as a directory, but it is not: '{resolved}'")
+        raise TypeError()
     if verbose:
         if resolved.is_file():
@@ -315,7 +318,8 @@ def sanitize_filename(filename: str) -> str:
     # Check for empty string after sanitization
     if not sanitized:
-        raise ValueError("The sanitized filename is empty. The original input may have contained only invalid characters.")
+        _LOGGER.error("The sanitized filename is empty. The original input may have contained only invalid characters.")
+        raise ValueError()
     return sanitized
@@ -334,7 +338,8 @@ def list_csv_paths(directory: Union[str,Path], verbose: bool=True) -> dict[str,
     csv_paths = list(dir_path.glob("*.csv"))
     if not csv_paths:
-        raise IOError(f"❌ No CSV files found in directory: {dir_path.name}")
+        _LOGGER.error(f"No CSV files found in directory: {dir_path.name}")
+        raise IOError()
     # make a dictionary of paths and names
     name_path_dict = {p.stem: p for p in csv_paths}
@@ -367,12 +372,13 @@ def list_files_by_extension(directory: Union[str,Path], extension: str, verbose:
     matched_paths = list(dir_path.glob(pattern))
     if not matched_paths:
-        raise IOError(f"❌ No '.{normalized_ext}' files found in directory: {dir_path}")
+        _LOGGER.error(f"No '.{normalized_ext}' files found in directory: {dir_path}.")
+        raise IOError()
     name_path_dict = {p.stem: p for p in matched_paths}
     if verbose:
-        _LOGGER.info(f"\n📂 '{normalized_ext.upper()}' files found:")
+        _LOGGER.info(f"📂 '{normalized_ext.upper()}' files found:")
         for name in name_path_dict:
             print(f"\t{name}")

ml_tools/utilities.py CHANGED Viewed

@@ -76,11 +76,13 @@ def load_dataframe(
             df = pl.read_csv(path, infer_schema_length=1000)
     else:
-        raise ValueError(f"❌ Invalid kind '{kind}'. Must be one of 'pandas' or 'polars'.")
+        _LOGGER.error(f"Invalid kind '{kind}'. Must be one of 'pandas' or 'polars'.")
+        raise ValueError()
     # This check works for both pandas and polars DataFrames
     if df.shape[0] == 0:
-        raise ValueError(f"❌ DataFrame '{df_name}' loaded from '{path}' is empty.")
+        _LOGGER.error(f"DataFrame '{df_name}' loaded from '{path}' is empty.")
+        raise ValueError()
     if verbose:
         _LOGGER.info(f"💾 Loaded {kind.upper()} dataset: '{df_name}' with shape: {df.shape}")
@@ -162,13 +164,14 @@ def merge_dataframes(
         merged_df = pd.concat(dfs, axis=0)
     else:
-        raise ValueError(f"❌ Invalid merge direction: {direction}")
+        _LOGGER.error(f"Invalid merge direction: {direction}")
+        raise ValueError()
     if reset_index:
         merged_df = merged_df.reset_index(drop=True)
     if verbose:
-        _LOGGER.info(f"✅ Merged DataFrame shape: {merged_df.shape}")
+        _LOGGER.info(f"Merged DataFrame shape: {merged_df.shape}")
     return merged_df
@@ -187,7 +190,7 @@ def save_dataframe(df: Union[pd.DataFrame, pl.DataFrame], save_dir: Union[str,Pa
     """
     # This check works for both pandas and polars
     if df.shape[0] == 0:
-        _LOGGER.warning(f"⚠️ Attempting to save an empty DataFrame: '{filename}'. Process Skipped.")
+        _LOGGER.warning(f"Attempting to save an empty DataFrame: '{filename}'. Process Skipped.")
         return
     # Create the directory if it doesn't exist
@@ -207,9 +210,10 @@ def save_dataframe(df: Union[pd.DataFrame, pl.DataFrame], save_dir: Union[str,Pa
         df.write_csv(output_path) # Polars defaults to utf8 and no index
     else:
         # This error handles cases where an unsupported type is passed
-        raise TypeError(f"❌ Unsupported DataFrame type: {type(df)}. Must be pandas or polars.")
+        _LOGGER.error(f"Unsupported DataFrame type: {type(df)}. Must be pandas or polars.")
+        raise TypeError()
-    _LOGGER.info(f"✅ Saved dataset: '{filename}' with shape: {df.shape}")
+    _LOGGER.info(f"Saved dataset: '{filename}' with shape: {df.shape}")
 def normalize_mixed_list(data: list, threshold: int = 2) -> list[float]:
@@ -243,7 +247,8 @@ def normalize_mixed_list(data: list, threshold: int = 2) -> list[float]:
     # Raise for negative values
     if any(x < 0 for x in float_list):
-        raise ValueError("❌ Negative values are not allowed in the input list.")
+        _LOGGER.error("Negative values are not allowed in the input list.")
+        raise ValueError()
     # Step 2: Compute log10 of non-zero values
     nonzero = [x for x in float_list if x > 0]
@@ -302,14 +307,16 @@ def threshold_binary_values(
     elif isinstance(input_array, (list, tuple)):
         array = np.array(input_array)
     else:
-        raise TypeError("❌ Unsupported input type")
+        _LOGGER.error("Unsupported input type")
+        raise TypeError()
     array = array.flatten()
     total = array.shape[0]
     bin_count = total if binary_values is None else binary_values
     if not (0 <= bin_count <= total):
-        raise ValueError("❌ binary_values must be between 0 and the total number of elements")
+        _LOGGER.error("'binary_values' must be between 0 and the total number of elements")
+        raise ValueError()
     if bin_count == 0:
         result = array
@@ -349,9 +356,15 @@ def threshold_binary_values_batch(
     np.ndarray
         Thresholded array, same shape as input.
     """
-    assert input_array.ndim == 2, f"❌ Expected 2D array, got {input_array.ndim}D"
+    if input_array.ndim != 2:
+        _LOGGER.error(f"Expected 2D array, got {input_array.ndim}D array.")
+        raise AssertionError()
     batch_size, total_features = input_array.shape
-    assert 0 <= binary_values <= total_features, "❌ binary_values out of valid range"
+    if not (0 <= binary_values <= total_features):
+        _LOGGER.error("'binary_values' out of valid range.")
+        raise AssertionError()
     if binary_values == 0:
         return input_array.copy()
@@ -380,15 +393,13 @@ def serialize_object(obj: Any, save_dir: Union[str,Path], filename: str, verbose
         full_path = save_path / sanitized_name
         joblib.dump(obj, full_path)
     except (IOError, OSError, TypeError, TerminatedWorkerError) as e:
-        message = f"❌ Failed to serialize object of type '{type(obj)}': {e}"
+        _LOGGER.error(f"Failed to serialize object of type '{type(obj)}'.")
         if raise_on_error:
-            raise Exception(message)
-        else:
-            _LOGGER.warning(message)
+            raise e
         return None
     else:
         if verbose:
-            _LOGGER.info(f"✅ Object of type '{type(obj)}' saved to '{full_path}'")
+            _LOGGER.info(f"Object of type '{type(obj)}' saved to '{full_path}'")
         return None
@@ -407,15 +418,13 @@ def deserialize_object(filepath: Union[str,Path], verbose: bool=True, raise_on_e
     try:
         obj = joblib.load(true_filepath)
     except (IOError, OSError, EOFError, TypeError, ValueError) as e:
-        message = f"❌ Failed to deserialize object from '{true_filepath}': {e}"
+        _LOGGER.error(f"Failed to deserialize object from '{true_filepath}'.")
         if raise_on_error:
-            raise Exception(message)
-        else:
-            _LOGGER.warning(message)
+            raise e
         return None
     else:
         if verbose:
-            _LOGGER.info(f"✅ Loaded object of type '{type(obj)}'")
+            _LOGGER.info(f"Loaded object of type '{type(obj)}'.")
         return obj
@@ -486,7 +495,8 @@ def train_dataset_orchestrator(list_of_dirs: list[Union[str,Path]],
     for dir in list_of_dirs:
         dir_path = make_fullpath(dir)
         if not dir_path.is_dir():
-            raise IOError(f"'{dir}' is not a directory.")
+            _LOGGER.error(f"'{dir}' is not a directory.")
+            raise IOError()
         all_dir_paths.append(dir_path)
     # main loop
@@ -502,10 +512,10 @@ def train_dataset_orchestrator(list_of_dirs: list[Union[str,Path]],
                     save_dataframe(df=df, save_dir=save_dir, filename=filename)
                     total_saved += 1
             except Exception as e:
-                _LOGGER.warning(f"⚠️ Failed to process file '{df_path}'. Reason: {e}")
+                _LOGGER.error(f"Failed to process file '{df_path}'. Reason: {e}")
                 continue
-    _LOGGER.info(f"✅ {total_saved} single-target datasets were created.")
+    _LOGGER.info(f"{total_saved} single-target datasets were created.")
 def train_dataset_yielder(
@@ -530,6 +540,5 @@ def train_dataset_yielder(
         yield (df_features, df_target, feature_names, target_col)
 def info():
     _script_info(__all__)

dragon_ml_toolbox-8.2.0.dist-info/RECORD DELETED Viewed

@@ -1,36 +0,0 @@
-dragon_ml_toolbox-8.2.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
-dragon_ml_toolbox-8.2.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
-ml_tools/ETL_engineering.py,sha256=69YGK4fN5ouRBknTvU4uZ8KLQGT-hPrvwymH-IygEnk,40911
-ml_tools/GUI_tools.py,sha256=n4ZZ5kEjwK5rkOCFJE41HeLFfjhpJVLUSzk9Kd9Kr_0,45410
-ml_tools/MICE_imputation.py,sha256=oFHg-OytOzPYTzBR_wIRHhP71cMn3aupDeT59ABsXlQ,11576
-ml_tools/ML_callbacks.py,sha256=noedVMmHZ72Odbg28zqx5wkhhvX2v-jXicKE_NCAiqU,13838
-ml_tools/ML_datasetmaster.py,sha256=tN-GBPEwXRWFBT8r8K0v9b3Bd77DhqSH5FkjDP6BHTw,28847
-ml_tools/ML_evaluation.py,sha256=BER5dOvSTySNzO92gm8tIpqJ5vT-s0iHMmaoly1uUH8,16018
-ml_tools/ML_evaluation_multi.py,sha256=uVtKGYWgOLv34Xj_jz6E_HAYzNb0HwRbMwA8oFZWpUk,12395
-ml_tools/ML_inference.py,sha256=hwtAdyDCE1xtqLgJgyOTAPck0eTmkOCJK1cM_IJSdck,22824
-ml_tools/ML_models.py,sha256=xZiSFh7S6eitl-VjjvNpsikojDvurK8n_ueLEh6_5pM,27979
-ml_tools/ML_optimization.py,sha256=GX-qZ2mCI3gWRCTP5w7lXrZpfGle3J_mE0O68seIoio,13475
-ml_tools/ML_scaler.py,sha256=pGkp1nUpeuoBvbq5hUkieQdxex6kNef1mEbeS_HUCJs,7471
-ml_tools/ML_trainer.py,sha256=6JSmEQaCPSo-S_5plNBTPw-SYgzZpyMNwiqpShJf7qU,23726
-ml_tools/PSO_optimization.py,sha256=9Y074d-B5h4Wvp9YPiy6KAeXM-Yv6Il3gWalKvOLVgo,22705
-ml_tools/RNN_forecast.py,sha256=2CyjBLSYYc3xLHxwLXUmP5Qv8AmV1OB_EndETNX1IBk,1956
-ml_tools/SQL.py,sha256=bkSTmMV4CtEqa67hApYWaRxTqwAlKIc5_b28P1bnDwg,10475
-ml_tools/VIF_factor.py,sha256=2nUMupfUoogf8o6ghoFZk_OwWhFXU0R3C9Gj0HOlI14,10415
-ml_tools/_ML_optimization_multi.py,sha256=DrNG3Vf1uUw-3CpYfXREgSGuR4dTpLWY1F3R9j-PYqQ,9816
-ml_tools/__init__.py,sha256=q0y9faQ6e17XCQ7eUiCZ1FJ4Bg5EQqLjZ9f_l5REUUY,41
-ml_tools/_logger.py,sha256=TpgYguxO-CWYqqgLW0tqFjtwZ58PE_W2OCfWNGZr0n0,1175
-ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
-ml_tools/custom_logger.py,sha256=nyLRxaRxkqYOFdSjI0X2BWXB8C2IU18QfmqIFKqSedI,5820
-ml_tools/data_exploration.py,sha256=RuMHWagXrSQi1MzAMlYeBeVg7UxhVvEq8gJ9bIam2BM,27103
-ml_tools/ensemble_evaluation.py,sha256=wnqoTPg4WYWf2A8z5XT0eSlW4snEuLCXQVj88sZKzQ4,24683
-ml_tools/ensemble_inference.py,sha256=rtU7eUaQne615n2g7IHZCJI-OvrBCcjxbTkEIvtCGFQ,9414
-ml_tools/ensemble_learning.py,sha256=dAyFgSTyvxJWjc_enJ_8EUoWwiekBeoNyJNxVY-kcUU,21868
-ml_tools/handle_excel.py,sha256=J9iwIqMZemoxK49J5osSwp9Ge0h9YTKyYGbOm53hcno,13007
-ml_tools/keys.py,sha256=HtPG8-MWh89C32A7eIlfuuA-DLwkxGkoDfwR2TGN9CQ,1074
-ml_tools/optimization_tools.py,sha256=EL5tgNFwRo-82pbRE1CFVy9noNhULD7wprWuKadPheg,5090
-ml_tools/path_manager.py,sha256=Z8e7w3MPqQaN8xmTnKuXZS6CIW59BFwwqGhGc00sdp4,13692
-ml_tools/utilities.py,sha256=LqXXTovaHbA5AOKRk6Ru6DgAPAM0wPfYU70kUjYBryo,19231
-dragon_ml_toolbox-8.2.0.dist-info/METADATA,sha256=C1rjTnTNSj6VI2khy7Xl1VjQ__MP6-b43x9RIQCHY3E,6778
-dragon_ml_toolbox-8.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-8.2.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-8.2.0.dist-info/RECORD,,

dragon-ml-toolbox 8.2.0__py3-none-any.whl → 9.0.0__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 8.2.0py3-none-any.whl → 9.0.0py3-none-any.whl