PyPI - dragon-ml-toolbox - Versions diffs - 20.2.0__py3-none-any.whl → 20.3.0__py3-none-any.whl - Mend

dragon-ml-toolbox 20.2.0py3-none-any.whl → 20.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

{dragon_ml_toolbox-20.2.0.dist-info → dragon_ml_toolbox-20.3.0.dist-info}/METADATA +1 -1
dragon_ml_toolbox-20.3.0.dist-info/RECORD +143 -0
ml_tools/ETL_cleaning/__init__.py +5 -1
ml_tools/ETL_cleaning/_basic_clean.py +1 -1
ml_tools/ETL_engineering/__init__.py +5 -1
ml_tools/GUI_tools/__init__.py +5 -1
ml_tools/IO_tools/_IO_loggers.py +12 -4
ml_tools/IO_tools/__init__.py +5 -1
ml_tools/MICE/__init__.py +8 -2
ml_tools/MICE/_dragon_mice.py +1 -1
ml_tools/ML_callbacks/__init__.py +5 -1
ml_tools/ML_chain/__init__.py +5 -1
ml_tools/ML_configuration/__init__.py +7 -1
ml_tools/ML_configuration/_training.py +65 -1
ml_tools/ML_datasetmaster/__init__.py +5 -1
ml_tools/ML_datasetmaster/_base_datasetmaster.py +31 -20
ml_tools/ML_datasetmaster/_datasetmaster.py +26 -9
ml_tools/ML_datasetmaster/_sequence_datasetmaster.py +38 -23
ml_tools/ML_evaluation/__init__.py +5 -1
ml_tools/ML_evaluation_captum/__init__.py +5 -1
ml_tools/ML_finalize_handler/__init__.py +5 -1
ml_tools/ML_inference/__init__.py +5 -1
ml_tools/ML_inference_sequence/__init__.py +5 -1
ml_tools/ML_inference_vision/__init__.py +5 -1
ml_tools/ML_models/__init__.py +21 -6
ml_tools/ML_models/_dragon_autoint.py +302 -0
ml_tools/ML_models/_dragon_gate.py +358 -0
ml_tools/ML_models/_dragon_node.py +268 -0
ml_tools/ML_models/_dragon_tabnet.py +255 -0
ml_tools/ML_models_sequence/__init__.py +5 -1
ml_tools/ML_models_vision/__init__.py +5 -1
ml_tools/ML_optimization/__init__.py +11 -3
ml_tools/ML_optimization/_multi_dragon.py +2 -2
ml_tools/ML_optimization/_single_dragon.py +47 -67
ml_tools/ML_optimization/_single_manual.py +1 -1
ml_tools/ML_scaler/_ML_scaler.py +12 -7
ml_tools/ML_scaler/__init__.py +5 -1
ml_tools/ML_trainer/__init__.py +5 -1
ml_tools/ML_trainer/_base_trainer.py +136 -13
ml_tools/ML_trainer/_dragon_detection_trainer.py +31 -91
ml_tools/ML_trainer/_dragon_sequence_trainer.py +24 -74
ml_tools/ML_trainer/_dragon_trainer.py +24 -85
ml_tools/ML_utilities/__init__.py +5 -1
ml_tools/ML_utilities/_inspection.py +44 -30
ml_tools/ML_vision_transformers/__init__.py +8 -2
ml_tools/PSO_optimization/__init__.py +5 -1
ml_tools/SQL/__init__.py +8 -2
ml_tools/VIF/__init__.py +5 -1
ml_tools/data_exploration/__init__.py +4 -1
ml_tools/data_exploration/_cleaning.py +4 -2
ml_tools/ensemble_evaluation/__init__.py +5 -1
ml_tools/ensemble_inference/__init__.py +5 -1
ml_tools/ensemble_learning/__init__.py +5 -1
ml_tools/excel_handler/__init__.py +5 -1
ml_tools/keys/__init__.py +5 -1
ml_tools/math_utilities/__init__.py +5 -1
ml_tools/optimization_tools/__init__.py +5 -1
ml_tools/path_manager/__init__.py +8 -2
ml_tools/plot_fonts/__init__.py +8 -2
ml_tools/schema/__init__.py +8 -2
ml_tools/schema/_feature_schema.py +3 -3
ml_tools/serde/__init__.py +5 -1
ml_tools/utilities/__init__.py +5 -1
ml_tools/utilities/_utility_save_load.py +38 -20
dragon_ml_toolbox-20.2.0.dist-info/RECORD +0 -179
ml_tools/ETL_cleaning/_imprimir.py +0 -13
ml_tools/ETL_engineering/_imprimir.py +0 -24
ml_tools/GUI_tools/_imprimir.py +0 -12
ml_tools/IO_tools/_imprimir.py +0 -14
ml_tools/MICE/_imprimir.py +0 -11
ml_tools/ML_callbacks/_imprimir.py +0 -12
ml_tools/ML_chain/_imprimir.py +0 -12
ml_tools/ML_configuration/_imprimir.py +0 -47
ml_tools/ML_datasetmaster/_imprimir.py +0 -15
ml_tools/ML_evaluation/_imprimir.py +0 -25
ml_tools/ML_evaluation_captum/_imprimir.py +0 -10
ml_tools/ML_finalize_handler/_imprimir.py +0 -8
ml_tools/ML_inference/_imprimir.py +0 -11
ml_tools/ML_inference_sequence/_imprimir.py +0 -8
ml_tools/ML_inference_vision/_imprimir.py +0 -8
ml_tools/ML_models/_advanced_models.py +0 -1086
ml_tools/ML_models/_imprimir.py +0 -18
ml_tools/ML_models_sequence/_imprimir.py +0 -8
ml_tools/ML_models_vision/_imprimir.py +0 -16
ml_tools/ML_optimization/_imprimir.py +0 -13
ml_tools/ML_scaler/_imprimir.py +0 -8
ml_tools/ML_trainer/_imprimir.py +0 -10
ml_tools/ML_utilities/_imprimir.py +0 -16
ml_tools/ML_vision_transformers/_imprimir.py +0 -14
ml_tools/PSO_optimization/_imprimir.py +0 -10
ml_tools/SQL/_imprimir.py +0 -8
ml_tools/VIF/_imprimir.py +0 -10
ml_tools/data_exploration/_imprimir.py +0 -32
ml_tools/ensemble_evaluation/_imprimir.py +0 -14
ml_tools/ensemble_inference/_imprimir.py +0 -9
ml_tools/ensemble_learning/_imprimir.py +0 -10
ml_tools/excel_handler/_imprimir.py +0 -13
ml_tools/keys/_imprimir.py +0 -11
ml_tools/math_utilities/_imprimir.py +0 -11
ml_tools/optimization_tools/_imprimir.py +0 -13
ml_tools/path_manager/_imprimir.py +0 -15
ml_tools/plot_fonts/_imprimir.py +0 -8
ml_tools/schema/_imprimir.py +0 -10
ml_tools/serde/_imprimir.py +0 -10
ml_tools/utilities/_imprimir.py +0 -18
{dragon_ml_toolbox-20.2.0.dist-info → dragon_ml_toolbox-20.3.0.dist-info}/WHEEL +0 -0
{dragon_ml_toolbox-20.2.0.dist-info → dragon_ml_toolbox-20.3.0.dist-info}/licenses/LICENSE +0 -0
{dragon_ml_toolbox-20.2.0.dist-info → dragon_ml_toolbox-20.3.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
{dragon_ml_toolbox-20.2.0.dist-info → dragon_ml_toolbox-20.3.0.dist-info}/top_level.txt +0 -0

ml_tools/ML_trainer/_dragon_detection_trainer.py CHANGED Viewed

@@ -80,26 +80,12 @@ class DragonDetectionTrainer(_BaseDragonTrainer):
     def _create_dataloaders(self, batch_size: int, shuffle: bool):
         """Initializes the DataLoaders with the object detection collate_fn."""
-        # Ensure stability on MPS devices by setting num_workers to 0
-        loader_workers = 0 if self.device.type == 'mps' else self.dataloader_workers
-        self.train_loader = DataLoader(
-            dataset=self.train_dataset,
-            batch_size=batch_size,
-            shuffle=shuffle,
-            num_workers=loader_workers,
-            pin_memory=("cuda" in self.device.type),
-            collate_fn=self.collate_fn, # Use the provided collate function
-            drop_last=True
-        )
-        self.validation_loader = DataLoader(
-            dataset=self.validation_dataset,
-            batch_size=batch_size,
-            shuffle=False,
-            num_workers=loader_workers,
-            pin_memory=("cuda" in self.device.type),
-            collate_fn=self.collate_fn # Use the provided collate function
+        self._make_dataloaders(
+            train_dataset=self.train_dataset,
+            validation_dataset=self.validation_dataset,
+            batch_size=batch_size,
+            shuffle=shuffle,
+            collate_fn=self.collate_fn
         )
     def _train_step(self):
@@ -207,17 +193,9 @@ class DragonDetectionTrainer(_BaseDragonTrainer):
                 - If 'current', use the current state of the trained model up the latest trained epoch.
             test_data (DataLoader | Dataset | None): Optional Test data to evaluate the model performance. Validation and Test metrics will be saved to subdirectories.
         """
-        # Validate model checkpoint
-        if isinstance(model_checkpoint, Path):
-            checkpoint_validated = make_fullpath(model_checkpoint, enforce="file")
-        elif model_checkpoint in [MagicWords.BEST, MagicWords.CURRENT]:
-            checkpoint_validated = model_checkpoint
-        else:
-            _LOGGER.error(f"'model_checkpoint' must be a Path object, or the string '{MagicWords.BEST}', or the string '{MagicWords.CURRENT}'.")
-            raise ValueError()
-        # Validate directory
-        save_path = make_fullpath(save_dir, make=True, enforce="directory")
+        # Validate inputs using base helpers
+        checkpoint_validated = self._validate_checkpoint_arg(model_checkpoint)
+        save_path = self._validate_save_dir(save_dir)
         # Validate test data and dispatch
         if test_data is not None:
@@ -230,21 +208,21 @@ class DragonDetectionTrainer(_BaseDragonTrainer):
             test_metrics_path = save_path / DragonTrainerKeys.TEST_METRICS_DIR
             # Dispatch validation set
-            _LOGGER.info(f"Evaluating on validation dataset. Metrics will be saved to '{DragonTrainerKeys.VALIDATION_METRICS_DIR}'")
+            _LOGGER.info(f"🔎 Evaluating on validation dataset. Metrics will be saved to '{DragonTrainerKeys.VALIDATION_METRICS_DIR}'")
             self._evaluate(save_dir=validation_metrics_path,
-                           model_checkpoint=checkpoint_validated,
+                           model_checkpoint=checkpoint_validated, # type: ignore
                            data=None) # 'None' triggers use of self.test_dataset
             # Dispatch test set
-            _LOGGER.info(f"Evaluating on test dataset. Metrics will be saved to '{DragonTrainerKeys.TEST_METRICS_DIR}'")
+            _LOGGER.info(f"🔎 Evaluating on test dataset. Metrics will be saved to '{DragonTrainerKeys.TEST_METRICS_DIR}'")
             self._evaluate(save_dir=test_metrics_path,
                            model_checkpoint="current", # Use 'current' state after loading checkpoint once
                            data=test_data_validated)
         else:
             # Dispatch validation set
-            _LOGGER.info(f"Evaluating on validation dataset. Metrics will be saved to '{save_path.name}'")
+            _LOGGER.info(f"🔎 Evaluating on validation dataset. Metrics will be saved to '{save_path.name}'")
             self._evaluate(save_dir=save_path,
-                           model_checkpoint=checkpoint_validated,
+                           model_checkpoint=checkpoint_validated, # type: ignore
                            data=None) # 'None' triggers use of self.test_dataset
     def _evaluate(self,
@@ -263,54 +241,17 @@ class DragonDetectionTrainer(_BaseDragonTrainer):
                 - If 'best', the best checkpoint will be loaded if a DragonModelCheckpoint was provided. The state of the trained model will be overwritten in place.
                 - If 'current', use the current state of the trained model up the latest trained epoch.
         """
-        dataset_for_artifacts = None
-        eval_loader = None
         # load model checkpoint
-        if isinstance(model_checkpoint, Path):
-            self._load_checkpoint(path=model_checkpoint)
-        elif model_checkpoint == MagicWords.BEST and self._checkpoint_callback:
-            path_to_latest = self._checkpoint_callback.best_checkpoint_path
-            self._load_checkpoint(path_to_latest)
-        elif model_checkpoint == MagicWords.BEST and self._checkpoint_callback is None:
-            _LOGGER.error(f"'model_checkpoint' set to '{MagicWords.BEST}' but no checkpoint callback was found.")
-            raise ValueError()
-        # Dataloader
-        if isinstance(data, DataLoader):
-            eval_loader = data
-            if hasattr(data, 'dataset'):
-                dataset_for_artifacts = data.dataset # type: ignore
-        elif isinstance(data, Dataset):
-            # Create a new loader from the provided dataset
-            eval_loader = DataLoader(data,
-                                     batch_size=self._batch_size,
-                                     shuffle=False,
-                                     num_workers=0 if self.device.type == 'mps' else self.dataloader_workers,
-                                     pin_memory=(self.device.type == "cuda"),
-                                     collate_fn=self.collate_fn)
-            dataset_for_artifacts = data
-        else: # data is None, use the trainer's default test dataset
-            if self.validation_dataset is None:
-                _LOGGER.error("Cannot evaluate. No data provided and no test_dataset available in the trainer.")
-                raise ValueError()
-            # Create a fresh DataLoader from the test_dataset
-            eval_loader = DataLoader(
-                self.validation_dataset,
-                batch_size=self._batch_size,
-                shuffle=False,
-                num_workers=0 if self.device.type == 'mps' else self.dataloader_workers,
-                pin_memory=(self.device.type == "cuda"),
-                collate_fn=self.collate_fn
-            )
-            dataset_for_artifacts = self.validation_dataset
-        if eval_loader is None:
-            _LOGGER.error("Cannot evaluate. No valid data was provided or found.")
-            raise ValueError()
-        # print("\n--- Model Evaluation ---")
+        self._load_model_state_wrapper(model_checkpoint)
+        # Prepare Data using Base Helper
+        eval_loader, dataset_for_artifacts = self._prepare_eval_data(
+            data,
+            self.validation_dataset,
+            collate_fn=self.collate_fn # Important for Detection
+        )
+        # Gather all predictions and targets
         all_predictions = []
         all_targets = []
@@ -380,12 +321,8 @@ class DragonDetectionTrainer(_BaseDragonTrainer):
             _LOGGER.error(f"For task {self.kind}, expected finalize_config of type 'FinalizeObjectDetection', but got {type(finalize_config).__name__}.")
             raise TypeError()
-        # handle save path
-        dir_path = make_fullpath(save_dir, make=True, enforce="directory")
-        full_path = dir_path / finalize_config.filename
         # handle checkpoint
-        self._load_model_state_for_finalizing(model_checkpoint)
+        self._load_model_state_wrapper(model_checkpoint)
         # Create finalized data
         finalized_data = {
@@ -397,6 +334,9 @@ class DragonDetectionTrainer(_BaseDragonTrainer):
         if finalize_config.class_map is not None:
             finalized_data[PyTorchCheckpointKeys.CLASS_MAP] = finalize_config.class_map
-        torch.save(finalized_data, full_path)
-        _LOGGER.info(f"Finalized model file saved to '{full_path}'")
+        # Save using base helper
+        self._save_finalized_artifact(
+            finalized_data=finalized_data,
+            save_dir=save_dir,
+            filename=finalize_config.filename
+        )

ml_tools/ML_trainer/_dragon_sequence_trainer.py CHANGED Viewed

@@ -99,23 +99,11 @@ class DragonSequenceTrainer(_BaseDragonTrainer):
     def _create_dataloaders(self, batch_size: int, shuffle: bool):
         """Initializes the DataLoaders."""
         # Ensure stability on MPS devices by setting num_workers to 0
-        loader_workers = 0 if self.device.type == 'mps' else self.dataloader_workers
-        self.train_loader = DataLoader(
-            dataset=self.train_dataset,
-            batch_size=batch_size,
-            shuffle=shuffle,
-            num_workers=loader_workers,
-            pin_memory=("cuda" in self.device.type),
-            drop_last=True  # Drops the last batch if incomplete, selecting a good batch size is key.
-        )
-        self.validation_loader = DataLoader(
-            dataset=self.validation_dataset,
-            batch_size=batch_size,
-            shuffle=False,
-            num_workers=loader_workers,
-            pin_memory=("cuda" in self.device.type)
+        self._make_dataloaders(
+            train_dataset=self.train_dataset,
+            validation_dataset=self.validation_dataset,
+            batch_size=batch_size,
+            shuffle=shuffle
         )
     def _train_step(self):
@@ -279,14 +267,9 @@ class DragonSequenceTrainer(_BaseDragonTrainer):
             val_format_configuration: Optional configuration for validation metrics.
             test_format_configuration: Optional configuration for test metrics.
         """
-        # Validate model checkpoint
-        if isinstance(model_checkpoint, Path):
-            checkpoint_validated = make_fullpath(model_checkpoint, enforce="file")
-        elif model_checkpoint in [MagicWords.BEST, MagicWords.CURRENT]:
-            checkpoint_validated = model_checkpoint
-        else:
-            _LOGGER.error(f"'model_checkpoint' must be a Path object, or '{MagicWords.BEST}', or '{MagicWords.CURRENT}'.")
-            raise ValueError()
+        # Validate inputs using base helpers
+        checkpoint_validated = self._validate_checkpoint_arg(model_checkpoint)
+        save_path = self._validate_save_dir(save_dir)
         # Validate val configuration
         if val_format_configuration is not None:
@@ -294,9 +277,6 @@ class DragonSequenceTrainer(_BaseDragonTrainer):
                 _LOGGER.error(f"Invalid 'val_format_configuration': '{type(val_format_configuration)}'.")
                 raise ValueError()
-        # Validate directory
-        save_path = make_fullpath(save_dir, make=True, enforce="directory")
         # Validate test data and dispatch
         if test_data is not None:
             if not isinstance(test_data, (DataLoader, Dataset)):
@@ -308,9 +288,9 @@ class DragonSequenceTrainer(_BaseDragonTrainer):
             test_metrics_path = save_path / DragonTrainerKeys.TEST_METRICS_DIR
             # Dispatch validation set
-            _LOGGER.info(f"Evaluating on validation dataset. Metrics will be saved to '{DragonTrainerKeys.VALIDATION_METRICS_DIR}'")
+            _LOGGER.info(f"🔎 Evaluating on validation dataset. Metrics will be saved to '{DragonTrainerKeys.VALIDATION_METRICS_DIR}'")
             self._evaluate(save_dir=validation_metrics_path,
-                           model_checkpoint=checkpoint_validated,
+                           model_checkpoint=checkpoint_validated, # type: ignore
                            data=None,
                            format_configuration=val_format_configuration)
@@ -329,16 +309,16 @@ class DragonSequenceTrainer(_BaseDragonTrainer):
                     test_configuration_validated = test_format_configuration
             # Dispatch test set
-            _LOGGER.info(f"Evaluating on test dataset. Metrics will be saved to '{DragonTrainerKeys.TEST_METRICS_DIR}'")
+            _LOGGER.info(f"🔎 Evaluating on test dataset. Metrics will be saved to '{DragonTrainerKeys.TEST_METRICS_DIR}'")
             self._evaluate(save_dir=test_metrics_path,
                            model_checkpoint="current",
                            data=test_data_validated,
                            format_configuration=test_configuration_validated)
         else:
             # Dispatch validation set
-            _LOGGER.info(f"Evaluating on validation dataset. Metrics will be saved to '{save_path.name}'")
+            _LOGGER.info(f"🔎 Evaluating on validation dataset. Metrics will be saved to '{save_path.name}'")
             self._evaluate(save_dir=save_path,
-                           model_checkpoint=checkpoint_validated,
+                           model_checkpoint=checkpoint_validated, # type: ignore
                            data=None,
                            format_configuration=val_format_configuration)
@@ -350,42 +330,13 @@ class DragonSequenceTrainer(_BaseDragonTrainer):
         """
         Private evaluation helper.
         """
-        eval_loader = None
         # load model checkpoint
-        if isinstance(model_checkpoint, Path):
-            self._load_checkpoint(path=model_checkpoint)
-        elif model_checkpoint == MagicWords.BEST and self._checkpoint_callback:
-            path_to_latest = self._checkpoint_callback.best_checkpoint_path
-            self._load_checkpoint(path_to_latest)
-        elif model_checkpoint == MagicWords.BEST and self._checkpoint_callback is None:
-            _LOGGER.error(f"'model_checkpoint' set to '{MagicWords.BEST}' but no checkpoint callback was found.")
-            raise ValueError()
+        self._load_model_state_wrapper(model_checkpoint)
-        # Dataloader
-        if isinstance(data, DataLoader):
-            eval_loader = data
-        elif isinstance(data, Dataset):
-            # Create a new loader from the provided dataset
-            eval_loader = DataLoader(data,
-                                     batch_size=self._batch_size,
-                                     shuffle=False,
-                                     num_workers=0 if self.device.type == 'mps' else self.dataloader_workers,
-                                     pin_memory=(self.device.type == "cuda"))
-        else: # data is None, use the trainer's default validation dataset
-            if self.validation_dataset is None:
-                _LOGGER.error("Cannot evaluate. No data provided and no validation_dataset available in the trainer.")
-                raise ValueError()
-            eval_loader = DataLoader(self.validation_dataset,
-                                     batch_size=self._batch_size,
-                                     shuffle=False,
-                                     num_workers=0 if self.device.type == 'mps' else self.dataloader_workers,
-                                     pin_memory=(self.device.type == "cuda"))
-        if eval_loader is None:
-            _LOGGER.error("Cannot evaluate. No valid data was provided or found.")
-            raise ValueError()
+        # Prepare Data using Base Helper
+        eval_loader, _ = self._prepare_eval_data(data, self.validation_dataset)
+        # Gather Predictions
         all_preds, _, all_true = [], [], []
         for y_pred_b, y_prob_b, y_true_b in self._predict_for_eval(eval_loader):
             if y_pred_b is not None: all_preds.append(y_pred_b)
@@ -514,13 +465,9 @@ class DragonSequenceTrainer(_BaseDragonTrainer):
         elif self.kind == MLTaskKeys.SEQUENCE_VALUE and not isinstance(finalize_config, FinalizeSequenceValuePrediction):
             _LOGGER.error(f"Received a wrong finalize configuration for task {self.kind}: {type(finalize_config).__name__}.")
             raise TypeError()
-        # handle save path
-        dir_path = make_fullpath(save_dir, make=True, enforce="directory")
-        full_path = dir_path / finalize_config.filename
         # handle checkpoint
-        self._load_model_state_for_finalizing(model_checkpoint)
+        self._load_model_state_wrapper(model_checkpoint)
         # Create finalized data
         finalized_data = {
@@ -534,7 +481,10 @@ class DragonSequenceTrainer(_BaseDragonTrainer):
         if finalize_config.initial_sequence is not None:
             finalized_data[PyTorchCheckpointKeys.INITIAL_SEQUENCE] = finalize_config.initial_sequence
-        torch.save(finalized_data, full_path)
-        _LOGGER.info(f"Finalized model file saved to '{full_path}'")
+        # Save using base helper
+        self._save_finalized_artifact(
+            finalized_data=finalized_data,
+            save_dir=save_dir,
+            filename=finalize_config.filename
+        )

ml_tools/ML_trainer/_dragon_trainer.py CHANGED Viewed

@@ -142,23 +142,11 @@ class DragonTrainer(_BaseDragonTrainer):
     def _create_dataloaders(self, batch_size: int, shuffle: bool):
         """Initializes the DataLoaders."""
         # Ensure stability on MPS devices by setting num_workers to 0
-        loader_workers = 0 if self.device.type == 'mps' else self.dataloader_workers
-        self.train_loader = DataLoader(
-            dataset=self.train_dataset,
-            batch_size=batch_size,
-            shuffle=shuffle,
-            num_workers=loader_workers,
-            pin_memory=("cuda" in self.device.type),
-            drop_last=True  # Drops the last batch if incomplete, selecting a good batch size is key.
-        )
-        self.validation_loader = DataLoader(
-            dataset=self.validation_dataset,
-            batch_size=batch_size,
-            shuffle=False,
-            num_workers=loader_workers,
-            pin_memory=("cuda" in self.device.type)
+        self._make_dataloaders(
+            train_dataset=self.train_dataset,
+            validation_dataset=self.validation_dataset,
+            batch_size=batch_size,
+            shuffle=shuffle
         )
     def _train_step(self):
@@ -403,14 +391,9 @@ class DragonTrainer(_BaseDragonTrainer):
             val_format_configuration (object): Optional configuration for metric format output for the validation set.
             test_format_configuration (object): Optional configuration for metric format output for the test set.
         """
-        # Validate model checkpoint
-        if isinstance(model_checkpoint, Path):
-            checkpoint_validated = make_fullpath(model_checkpoint, enforce="file")
-        elif model_checkpoint in [MagicWords.BEST, MagicWords.CURRENT]:
-            checkpoint_validated = model_checkpoint
-        else:
-            _LOGGER.error(f"'model_checkpoint' must be a Path object, or the string '{MagicWords.BEST}', or the string '{MagicWords.CURRENT}'.")
-            raise ValueError()
+        # Validate inputs using base helpers
+        checkpoint_validated = self._validate_checkpoint_arg(model_checkpoint)
+        save_path = self._validate_save_dir(save_dir)
         # Validate classification threshold
         if self.kind not in MLTaskKeys.ALL_BINARY_TASKS:
@@ -445,9 +428,6 @@ class DragonTrainer(_BaseDragonTrainer):
         else: # config is None
             val_configuration_validated = None
-        # Validate directory
-        save_path = make_fullpath(save_dir, make=True, enforce="directory")
         # Validate test data and dispatch
         if test_data is not None:
             if not isinstance(test_data, (DataLoader, Dataset)):
@@ -461,7 +441,7 @@ class DragonTrainer(_BaseDragonTrainer):
             # Dispatch validation set
             _LOGGER.info(f"🔎 Evaluating on validation dataset. Metrics will be saved to '{DragonTrainerKeys.VALIDATION_METRICS_DIR}'")
             self._evaluate(save_dir=validation_metrics_path,
-                           model_checkpoint=checkpoint_validated,
+                           model_checkpoint=checkpoint_validated, # type: ignore
                            classification_threshold=threshold_validated,
                            data=None,
                            format_configuration=val_configuration_validated)
@@ -499,9 +479,9 @@ class DragonTrainer(_BaseDragonTrainer):
                            format_configuration=test_configuration_validated)
         else:
             # Dispatch validation set
-            _LOGGER.info(f"Evaluating on validation dataset. Metrics will be saved to '{save_path.name}'")
+            _LOGGER.info(f"🔎 Evaluating on validation dataset. Metrics will be saved to '{save_path.name}'")
             self._evaluate(save_dir=save_path,
-                           model_checkpoint=checkpoint_validated,
+                           model_checkpoint=checkpoint_validated, # type: ignore
                            classification_threshold=threshold_validated,
                            data=None,
                            format_configuration=val_configuration_validated)
@@ -525,55 +505,16 @@ class DragonTrainer(_BaseDragonTrainer):
         """
         Changed to a private helper function.
         """
-        dataset_for_artifacts = None
-        eval_loader = None
         # set threshold
         self._classification_threshold = classification_threshold
         # load model checkpoint
-        if isinstance(model_checkpoint, Path):
-            self._load_checkpoint(path=model_checkpoint)
-        elif model_checkpoint == MagicWords.BEST and self._checkpoint_callback:
-            path_to_latest = self._checkpoint_callback.best_checkpoint_path
-            self._load_checkpoint(path_to_latest)
-        elif model_checkpoint == MagicWords.BEST and self._checkpoint_callback is None:
-            _LOGGER.error(f"'model_checkpoint' set to '{MagicWords.BEST}' but no checkpoint callback was found.")
-            raise ValueError()
+        self._load_model_state_wrapper(model_checkpoint)
-        # Dataloader
-        if isinstance(data, DataLoader):
-            eval_loader = data
-            # Try to get the dataset from the loader for fetching target names
-            if hasattr(data, 'dataset'):
-                dataset_for_artifacts = data.dataset # type: ignore
-        elif isinstance(data, Dataset):
-            # Create a new loader from the provided dataset
-            eval_loader = DataLoader(data,
-                                     batch_size=self._batch_size,
-                                     shuffle=False,
-                                     num_workers=0 if self.device.type == 'mps' else self.dataloader_workers,
-                                     pin_memory=(self.device.type == "cuda"))
-            dataset_for_artifacts = data
-        else: # data is None, use the trainer's default test dataset
-            if self.validation_dataset is None:
-                _LOGGER.error("Cannot evaluate. No data provided and no validation dataset available in the trainer.")
-                raise ValueError()
-            # Create a fresh DataLoader from the test_dataset
-            eval_loader = DataLoader(self.validation_dataset,
-                                     batch_size=self._batch_size,
-                                     shuffle=False,
-                                     num_workers=0 if self.device.type == 'mps' else self.dataloader_workers,
-                                     pin_memory=(self.device.type == "cuda"))
-            dataset_for_artifacts = self.validation_dataset
-        if eval_loader is None:
-            _LOGGER.error("Cannot evaluate. No valid data was provided or found.")
-            raise ValueError()
-        # print("\n--- Model Evaluation ---")
+        # Prepare Data using Base Helper
+        eval_loader, dataset_for_artifacts = self._prepare_eval_data(data, self.validation_dataset)
+        # Gather Predictions
         all_preds, all_probs, all_true = [], [], []
         for y_pred_b, y_prob_b, y_true_b in self._predict_for_eval(eval_loader):
             if y_pred_b is not None: all_preds.append(y_pred_b)
@@ -1128,13 +1069,9 @@ class DragonTrainer(_BaseDragonTrainer):
         elif self.kind == MLTaskKeys.MULTILABEL_BINARY_CLASSIFICATION and not isinstance(finalize_config, FinalizeMultiLabelBinaryClassification):
             _LOGGER.error(f"For task {self.kind}, expected finalize_config of type 'FinalizeMultiLabelBinaryClassification', but got {type(finalize_config).__name__}.")
             raise TypeError()
-        # handle save path
-        dir_path = make_fullpath(save_dir, make=True, enforce="directory")
-        full_path = dir_path / finalize_config.filename
         # handle checkpoint
-        self._load_model_state_for_finalizing(model_checkpoint)
+        self._load_model_state_wrapper(model_checkpoint)
         # Create finalized data
         finalized_data = {
@@ -1153,8 +1090,10 @@ class DragonTrainer(_BaseDragonTrainer):
         if finalize_config.class_map is not None:
             finalized_data[PyTorchCheckpointKeys.CLASS_MAP] = finalize_config.class_map
-        # Save model file
-        torch.save(finalized_data, full_path)
-        _LOGGER.info(f"Finalized model file saved to '{full_path}'")
+        # Save model file using base helper
+        self._save_finalized_artifact(
+            finalized_data=finalized_data,
+            save_dir=save_dir,
+            filename=finalize_config.filename
+        )

ml_tools/ML_utilities/__init__.py CHANGED Viewed

@@ -16,7 +16,7 @@ from ._train_tools import (
     save_pretrained_transforms,
 )
-from ._imprimir import info
+from .._core import _imprimir_disponibles
 __all__ = [
@@ -30,3 +30,7 @@ __all__ = [
     "save_pretrained_transforms",
     "select_features_by_shap"
 ]
+def info():
+    _imprimir_disponibles(__all__)

dragon-ml-toolbox 20.2.0__py3-none-any.whl → 20.3.0__py3-none-any.whl

dragon-ml-toolbox 20.2.0py3-none-any.whl → 20.3.0py3-none-any.whl