PyPI - dragon-ml-toolbox - Versions diffs - 1.4.3__tar.gz → 1.4.4__tar.gz - Mend

dragon-ml-toolbox 1.4.3tar.gz → 1.4.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (24) hide show

{dragon_ml_toolbox-1.4.3/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-1.4.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 1.4.3
+Version: 1.4.4
 Summary: A collection of tools for data science and machine learning projects
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-1.4.3 → dragon_ml_toolbox-1.4.4/dragon_ml_toolbox.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 1.4.3
+Version: 1.4.4
 Summary: A collection of tools for data science and machine learning projects
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-1.4.3 → dragon_ml_toolbox-1.4.4}/ml_tools/MICE_imputation.py RENAMED Viewed

@@ -36,9 +36,9 @@ def apply_mice(df: pd.DataFrame, df_name: str, resulting_datasets: int=1, iterat
         raise ValueError("No imputed datasets were generated. Check the MICE process.")
     if resulting_datasets == 1:
-        imputed_dataset_names = [f"{df_name}_imputed"]
+        imputed_dataset_names = [f"{df_name}_MICE"]
     else:
-        imputed_dataset_names = [f"{df_name}_imputed_{i+1}" for i in range(resulting_datasets)]
+        imputed_dataset_names = [f"{df_name}_MICE_{i+1}" for i in range(resulting_datasets)]
     # Ensure indexes match
     for imputed_df, subname in zip(imputed_datasets, imputed_dataset_names):

{dragon_ml_toolbox-1.4.3 → dragon_ml_toolbox-1.4.4}/ml_tools/VIF_factor.py RENAMED Viewed

@@ -26,6 +26,7 @@ def compute_vif(
     filename: Optional[str] = None,
     fontsize: int = 14,
     show_plot: bool = True,
+    verbose: bool = True
 ) -> pd.DataFrame:
     """
     Computes Variance Inflation Factors (VIF) for numeric columns in a DataFrame. Optionally, generates a bar plot of VIF values.
@@ -52,19 +53,20 @@ def compute_vif(
     if use_columns is None:
         sanitized_columns = df.select_dtypes(include='number').columns.tolist()
         missing_features = set(ground_truth_cols) - set(sanitized_columns)
-        if missing_features:
+        if missing_features and verbose:
             print(f"⚠️ These columns are not Numeric:\n{missing_features}")
     else:
         sanitized_columns = list()
         for feature in use_columns:
             if feature not in ground_truth_cols:
-                print(f"⚠️ The provided column '{feature}' is not in the DataFrame.")
+                if verbose:
+                    print(f"⚠️ The provided column '{feature}' is not in the DataFrame.")
             else:
                 sanitized_columns.append(feature)
     if ignore_columns is not None and use_columns is None:
         missing_ignore = set(ignore_columns) - set(ground_truth_cols)
-        if missing_ignore:
+        if missing_ignore and verbose:
             print(f"⚠️ Warning: The following 'columns to ignore' are not in the Dataframe:\n{missing_ignore}")
         sanitized_columns = [f for f in sanitized_columns if f not in ignore_columns]
@@ -182,7 +184,7 @@ def compute_vif_multi(input_directory: str,
                       max_features_to_plot: int = 20,
                       fontsize: int = 14):
     """
-    Computes Variance Inflation Factors (VIF) for numeric columns in a directory with CSV files (loaded as pandas DataFrames).
+    Computes Variance Inflation Factors (VIF) for numeric columns in a directory with CSV files (loaded as pandas DataFrames). No plots or warnings will be displayed inline.
     Generates a bar plot of VIF values. Optionally drops columns with VIF >= 10 and saves as a new CSV file.
     Args:
@@ -210,10 +212,11 @@ def compute_vif_multi(input_directory: str,
                             fontsize=fontsize,
                             save_dir=output_plot_directory,
                             filename=df_name,
-                            show_plot=False)
+                            show_plot=False,
+                            verbose=False)
         if output_dataset_directory is not None:
-            new_filename = 'VIF_' + df_name
+            new_filename = df_name + '_VIF'
             result_df, dropped_cols = drop_vif_based(df=df, vif_df=vif_dataframe)
             if len(dropped_cols) > 0:

{dragon_ml_toolbox-1.4.3 → dragon_ml_toolbox-1.4.4}/ml_tools/data_exploration.py RENAMED Viewed

@@ -153,7 +153,7 @@ def drop_columns_with_missing_data(df: pd.DataFrame, threshold: float = 0.7, sho
         result_df = df.drop(columns=cols_to_drop)
         if show_nulls_after:
-            show_null_columns(df=result_df).head(20)
+            print(show_null_columns(df=result_df))
         return result_df
     else:
@@ -259,7 +259,7 @@ def plot_correlation_heatmap(df: pd.DataFrame, save_dir: Union[str, None] = None
         os.makedirs(save_dir, exist_ok=True)
         full_path = os.path.join(save_dir, plot_title + ".svg")
         plt.savefig(full_path, bbox_inches="tight", format='svg')
-        print(f"Saved correlation heatmap to: {full_path}")
+        print(f"Saved correlation heatmap: '{plot_title}.svg'")
     plt.show()
     plt.close()
@@ -521,7 +521,8 @@ def clip_outliers_multi(
 def distribute_datasets_by_target(
     df: pd.DataFrame,
-    target_columns: list[str]
+    target_columns: list[str],
+    verbose: bool = False
 ) -> Iterator[Tuple[str, pd.DataFrame]]:
     """
     Yields cleaned DataFrames for each target column, where rows with missing
@@ -533,6 +534,8 @@ def distribute_datasets_by_target(
         Preprocessed dataframe with all feature and target columns ready to train.
     target_columns : List[str]
         List of target column names to generate per-target DataFrames.
+    verbose: bool
+        Whether to print info for each yielded dataset.
     Yields
     ------
@@ -540,10 +543,13 @@ def distribute_datasets_by_target(
         * First element is the target column name.
         * Second element is the corresponding cleaned DataFrame.
     """
-    feature_columns = [col for col in df.columns if col not in target_columns]
+    valid_targets = [col for col in df.columns if col in target_columns]
+    feature_columns = [col for col in df.columns if col not in valid_targets]
-    for target in target_columns:
+    for target in valid_targets:
         subset = df[feature_columns + [target]].dropna(subset=[target])
+        if verbose:
+            print(f"Target: '{target}' - Dataframe shape: {subset.shape}")
         yield target, subset

{dragon_ml_toolbox-1.4.3 → dragon_ml_toolbox-1.4.4}/ml_tools/ensemble_learning.py RENAMED Viewed

@@ -157,9 +157,7 @@ class RegressionTreeModels:
         self.gamma = gamma
         # LightGBM specific
-        if num_leaves >= (2**max_depth):
-            num_leaves = (2**max_depth) - 1
-            print(f"⚠️ Warning: 'num_leaves' should be set proportional to 'max_depth'. Value set as {num_leaves}.")
+        num_leaves = min(num_leaves, 2 ** (max_depth - 1))
         self.num_leaves = num_leaves
         self.min_data_in_leaf = min_data_in_leaf
@@ -202,7 +200,7 @@ class RegressionTreeModels:
             verbose=-1,
             reg_alpha=self.L1,
             reg_lambda=self.L2,
-            boosting_type='dart',
+            boosting_type='gbdt',
             num_leaves=self.num_leaves,
             min_data_in_leaf=self.min_data_in_leaf
         )
@@ -321,9 +319,7 @@ class ClassificationTreeModels:
         self.gamma = gamma
         # LightGBM specific
-        if num_leaves >= (2**max_depth):
-            num_leaves = (2**max_depth) - 1
-            print(f"⚠️ Warning: 'num_leaves' should be set proportional to 'max_depth'. Value set as {num_leaves}.")
+        num_leaves = min(num_leaves, 2 ** (max_depth - 1))
         self.num_leaves = num_leaves
         self.min_data_in_leaf = min_data_in_leaf
@@ -370,7 +366,7 @@ class ClassificationTreeModels:
             verbose=-1,
             reg_alpha=self.L1,
             reg_lambda=self.L2,
-            boosting_type='dart' if self.use_model_balance else 'goss',
+            boosting_type='gbdt' if self.use_model_balance else 'goss',
             num_leaves=self.num_leaves,
             min_data_in_leaf=self.min_data_in_leaf,
             class_weight='balanced' if self.use_model_balance else None

{dragon_ml_toolbox-1.4.3 → dragon_ml_toolbox-1.4.4}/ml_tools/logger.py RENAMED Viewed

@@ -55,7 +55,7 @@ def custom_logger(
     """
     try:
         os.makedirs(save_directory, exist_ok=True)
-        timestamp = datetime.now().strftime(r"%Y%m%d_%H%M")
+        timestamp = datetime.now().strftime(r"%Y%m%d_%H%M%S")
         log_name = sanitize_filename(log_name)
         base_path = os.path.join(save_directory, f"{log_name}_{timestamp}")
@@ -80,7 +80,7 @@ def custom_logger(
         else:
             raise ValueError("Unsupported data type. Must be list, dict, DataFrame, str, or BaseException.")
-        print(f"Log saved to: {base_path}")
+        print(f"Log saved to: '{base_path}'")
     except Exception as e:
         print(f"Error in custom_logger: {e}")

{dragon_ml_toolbox-1.4.3 → dragon_ml_toolbox-1.4.4}/ml_tools/particle_swarm_optimization.py RENAMED Viewed

@@ -129,10 +129,10 @@ def run_pso(lower_boundaries: list[float],
             target_name: Union[str, None]=None,
             feature_names: Union[list[str], None]=None,
             swarm_size: int=200,
-            max_iterations: int=1000,
+            max_iterations: int=1500,
             inequality_constrain_function=None,
-            post_hoc_analysis: Optional[int]=3,
-            workers: int=3) -> Tuple[Dict[str, float | list[float]], Dict[str, float | list[float]]]:
+            post_hoc_analysis: Optional[int]=5,
+            workers: int=1) -> Tuple[Dict[str, float | list[float]], Dict[str, float | list[float]]]:
     """
     Executes Particle Swarm Optimization (PSO) to optimize a given objective function and saves the results as a CSV file.

{dragon_ml_toolbox-1.4.3 → dragon_ml_toolbox-1.4.4}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "dragon-ml-toolbox"
-version = "1.4.3"
+version = "1.4.4"
 description = "A collection of tools for data science and machine learning projects"
 authors = [
     { name = "Karl Loza", email = "luigiloza@gmail.com" }