PyPI - dragon-ml-toolbox - Versions diffs - 19.11.0__py3-none-any.whl → 19.12.1__py3-none-any.whl - Mend

dragon-ml-toolbox 19.11.0py3-none-any.whl → 19.12.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

ml_tools/_core/_ML_configuration.py CHANGED Viewed

@@ -660,18 +660,27 @@ class DragonTrainingConfig(_BaseModelParams):
                  initial_learning_rate: float,
                  batch_size: int,
                  random_state: int = 101,
-                 early_stop_patience: Optional[int] = None,
-                 scheduler_patience: Optional[int] = None,
-                 scheduler_lr_factor: Optional[float] = None,
+                #  early_stop_patience: Optional[int] = None,
+                #  scheduler_patience: Optional[int] = None,
+                #  scheduler_lr_factor: Optional[float] = None,
                  **kwargs: Any) -> None:
+        """
+        Args:
+            validation_size (float): Proportion of data for validation set.
+            test_size (float): Proportion of data for test set.
+            initial_learning_rate (float): Starting learning rate.
+            batch_size (int): Number of samples per training batch.
+            random_state (int): Seed for reproducibility.
+            **kwargs: Additional training parameters as key-value pairs.
+        """
         self.validation_size = validation_size
         self.test_size = test_size
         self.initial_learning_rate = initial_learning_rate
         self.batch_size = batch_size
         self.random_state = random_state
-        self.early_stop_patience = early_stop_patience
-        self.scheduler_patience = scheduler_patience
-        self.scheduler_lr_factor = scheduler_lr_factor
+        # self.early_stop_patience = early_stop_patience
+        # self.scheduler_patience = scheduler_patience
+        # self.scheduler_lr_factor = scheduler_lr_factor
         # Process kwargs with validation
         for key, value in kwargs.items():

ml_tools/_core/_ML_finalize_handler.py CHANGED Viewed

@@ -51,7 +51,7 @@ class FinalizedFileHandler:
         self._initial_sequence: Optional[np.ndarray] = None
         self._target_name: Optional[str] = None
         self._target_names: Optional[list[str]] = None
-        self._model_state_dict: Optional[Any] = None
+        self._model_state_dict: Optional[dict[str, Any]] = None
         # Set warning outputs
         self._verbose: bool=True
@@ -90,7 +90,7 @@ class FinalizedFileHandler:
         else:
             # It is a dict, but missing the keys, assume it is the raw state dict
-            _LOGGER.info(f"File '{pth_path.name}' does not have the required keys for a finalized-file. Treating it as raw PyTorch state dictionary.")
+            _LOGGER.warning(f"File '{pth_path.name}' does not have the required keys for a Dragon-ML finalized-file. Keys found:\n    {list(pth_file_content.keys())}")
             self._model_state_dict = pth_file_content
@@ -113,9 +113,10 @@ class FinalizedFileHandler:
         return self._task
     @property
-    def model_state_dict(self):
+    def model_state_dict(self) -> dict[str, Any]:
         """Returns the model state dictionary."""
-        return self._model_state_dict
+        # No need to check for None, as it is guaranteed to be set in __init__
+        return self._model_state_dict # type: ignore
     @property
     def epoch(self) -> Optional[int]:

ml_tools/_core/_ML_trainer.py CHANGED Viewed

@@ -7,7 +7,7 @@ import numpy as np
 from abc import ABC, abstractmethod
 from ._path_manager import make_fullpath
-from ._ML_callbacks import _Callback, History, TqdmProgressBar, DragonModelCheckpoint, DragonEarlyStopping, DragonLRScheduler
+from ._ML_callbacks import _Callback, History, TqdmProgressBar, DragonModelCheckpoint, _DragonEarlyStopping, _DragonLRScheduler
 from ._ML_evaluation import classification_metrics, regression_metrics, plot_losses, shap_summary_plot, plot_attention_importance
 from ._ML_evaluation_multi import multi_target_regression_metrics, multi_label_classification_metrics, multi_target_shap_summary_plot
 from ._ML_vision_evaluation import segmentation_metrics, object_detection_metrics
@@ -66,8 +66,8 @@ class _BaseDragonTrainer(ABC):
                  device: Union[Literal['cuda', 'mps', 'cpu'],str],
                  dataloader_workers: int = 2,
                  checkpoint_callback: Optional[DragonModelCheckpoint] = None,
-                 early_stopping_callback: Optional[DragonEarlyStopping] = None,
-                 lr_scheduler_callback: Optional[DragonLRScheduler] = None,
+                 early_stopping_callback: Optional[_DragonEarlyStopping] = None,
+                 lr_scheduler_callback: Optional[_DragonLRScheduler] = None,
                  extra_callbacks: Optional[List[_Callback]] = None):
         self.model = model
@@ -271,18 +271,18 @@ class _BaseDragonTrainer(ABC):
         self.model.to(self.device)
         _LOGGER.info(f"Trainer and model moved to {self.device}.")
-    def _load_model_state_for_finalizing(self, model_checkpoint: Union[Path, Literal['latest', 'current']]):
+    def _load_model_state_for_finalizing(self, model_checkpoint: Union[Path, Literal['best', 'current']]):
         """
         Private helper to load the correct model state_dict based on user's choice.
         This is called by finalize_model_training() in subclasses.
         """
         if isinstance(model_checkpoint, Path):
             self._load_checkpoint(path=model_checkpoint)
-        elif model_checkpoint == MagicWords.LATEST and self._checkpoint_callback:
+        elif model_checkpoint == MagicWords.BEST and self._checkpoint_callback:
             path_to_latest = self._checkpoint_callback.best_checkpoint_path
             self._load_checkpoint(path_to_latest)
-        elif model_checkpoint == MagicWords.LATEST and self._checkpoint_callback is None:
-            _LOGGER.error(f"'model_checkpoint' set to '{MagicWords.LATEST}' but no checkpoint callback was found.")
+        elif model_checkpoint == MagicWords.BEST and self._checkpoint_callback is None:
+            _LOGGER.error(f"'model_checkpoint' set to '{MagicWords.BEST}' but no checkpoint callback was found.")
             raise ValueError()
         elif model_checkpoint == MagicWords.CURRENT:
             pass
@@ -336,8 +336,8 @@ class DragonTrainer(_BaseDragonTrainer):
                  optimizer: torch.optim.Optimizer,
                  device: Union[Literal['cuda', 'mps', 'cpu'],str],
                  checkpoint_callback: Optional[DragonModelCheckpoint],
-                 early_stopping_callback: Optional[DragonEarlyStopping],
-                 lr_scheduler_callback: Optional[DragonLRScheduler],
+                 early_stopping_callback: Optional[_DragonEarlyStopping],
+                 lr_scheduler_callback: Optional[_DragonLRScheduler],
                  extra_callbacks: Optional[List[_Callback]] = None,
                  criterion: Union[nn.Module,Literal["auto"]] = "auto",
                  dataloader_workers: int = 2):
@@ -634,7 +634,7 @@ class DragonTrainer(_BaseDragonTrainer):
     def evaluate(self,
                  save_dir: Union[str, Path],
-                 model_checkpoint: Union[Path, Literal["latest", "current"]],
+                 model_checkpoint: Union[Path, Literal["best", "current"]],
                  classification_threshold: Optional[float] = None,
                  test_data: Optional[Union[DataLoader, Dataset]] = None,
                  val_format_configuration: Optional[Union[
@@ -665,7 +665,7 @@ class DragonTrainer(_BaseDragonTrainer):
         Args:
             model_checkpoint ('auto' | Path | None):
                 - Path to a valid checkpoint for the model. The state of the trained model will be overwritten in place.
-                - If 'latest', the latest checkpoint will be loaded if a DragonModelCheckpoint was provided. The state of the trained model will be overwritten in place.
+                - If 'best', the best checkpoint will be loaded if a DragonModelCheckpoint was provided. The state of the trained model will be overwritten in place.
                 - If 'current', use the current state of the trained model up the latest trained epoch.
             save_dir (str | Path): Directory to save all reports and plots.
             classification_threshold (float | None): Used for tasks using a binary approach (binary classification, binary segmentation, multilabel binary classification)
@@ -676,10 +676,10 @@ class DragonTrainer(_BaseDragonTrainer):
         # Validate model checkpoint
         if isinstance(model_checkpoint, Path):
             checkpoint_validated = make_fullpath(model_checkpoint, enforce="file")
-        elif model_checkpoint in [MagicWords.LATEST, MagicWords.CURRENT]:
+        elif model_checkpoint in [MagicWords.BEST, MagicWords.CURRENT]:
             checkpoint_validated = model_checkpoint
         else:
-            _LOGGER.error(f"'model_checkpoint' must be a Path object, or the string '{MagicWords.LATEST}', or the string '{MagicWords.CURRENT}'.")
+            _LOGGER.error(f"'model_checkpoint' must be a Path object, or the string '{MagicWords.BEST}', or the string '{MagicWords.CURRENT}'.")
             raise ValueError()
         # Validate classification threshold
@@ -778,7 +778,7 @@ class DragonTrainer(_BaseDragonTrainer):
     def _evaluate(self,
                  save_dir: Union[str, Path],
-                 model_checkpoint: Union[Path, Literal["latest", "current"]],
+                 model_checkpoint: Union[Path, Literal["best", "current"]],
                  classification_threshold: float,
                  data: Optional[Union[DataLoader, Dataset]],
                  format_configuration: Optional[Union[
@@ -804,11 +804,11 @@ class DragonTrainer(_BaseDragonTrainer):
         # load model checkpoint
         if isinstance(model_checkpoint, Path):
             self._load_checkpoint(path=model_checkpoint)
-        elif model_checkpoint == MagicWords.LATEST and self._checkpoint_callback:
+        elif model_checkpoint == MagicWords.BEST and self._checkpoint_callback:
             path_to_latest = self._checkpoint_callback.best_checkpoint_path
             self._load_checkpoint(path_to_latest)
-        elif model_checkpoint == MagicWords.LATEST and self._checkpoint_callback is None:
-            _LOGGER.error(f"'model_checkpoint' set to '{MagicWords.LATEST}' but no checkpoint callback was found.")
+        elif model_checkpoint == MagicWords.BEST and self._checkpoint_callback is None:
+            _LOGGER.error(f"'model_checkpoint' set to '{MagicWords.BEST}' but no checkpoint callback was found.")
             raise ValueError()
         # Dataloader
@@ -1352,7 +1352,7 @@ class DragonTrainer(_BaseDragonTrainer):
             _LOGGER.error("No attention weights were collected from the model.")
     def finalize_model_training(self,
-                                model_checkpoint: Union[Path, Literal['latest', 'current']],
+                                model_checkpoint: Union[Path, Literal['best', 'current']],
                                 save_dir: Union[str, Path],
                                 finalize_config: Union[FinalizeRegression,
                                                        FinalizeMultiTargetRegression,
@@ -1369,10 +1369,10 @@ class DragonTrainer(_BaseDragonTrainer):
         This method saves the model's `state_dict`, the final epoch number, and optional configuration for the task at hand.
         Args:
-            model_checkpoint (Path | "latest" | "current"):
+            model_checkpoint (Path | "best" | "current"):
                 - Path: Loads the model state from a specific checkpoint file.
-                - "latest": Loads the best model state saved by the `DragonModelCheckpoint` callback.
-                - "current": Uses the model's state as it is at the end of the `fit()` call.
+                - "best": Loads the best model state saved by the `DragonModelCheckpoint` callback.
+                - "current": Uses the model's state as it is.
             save_dir (str | Path): The directory to save the finalized model.
             finalize_config (object): A data class instance specific to the ML task containing task-specific metadata required for inference.
         """
@@ -1442,8 +1442,8 @@ class DragonDetectionTrainer(_BaseDragonTrainer):
                  collate_fn: Callable, optimizer: torch.optim.Optimizer,
                  device: Union[Literal['cuda', 'mps', 'cpu'],str],
                  checkpoint_callback: Optional[DragonModelCheckpoint],
-                 early_stopping_callback: Optional[DragonEarlyStopping],
-                 lr_scheduler_callback: Optional[DragonLRScheduler],
+                 early_stopping_callback: Optional[_DragonEarlyStopping],
+                 lr_scheduler_callback: Optional[_DragonLRScheduler],
                  extra_callbacks: Optional[List[_Callback]] = None,
                  dataloader_workers: int = 2):
         """
@@ -1601,7 +1601,7 @@ class DragonDetectionTrainer(_BaseDragonTrainer):
     def evaluate(self,
                  save_dir: Union[str, Path],
-                 model_checkpoint: Union[Path, Literal["latest", "current"]],
+                 model_checkpoint: Union[Path, Literal["best", "current"]],
                  test_data: Optional[Union[DataLoader, Dataset]] = None):
         """
         Evaluates the model using object detection mAP metrics.
@@ -1610,17 +1610,17 @@ class DragonDetectionTrainer(_BaseDragonTrainer):
             save_dir (str | Path): Directory to save all reports and plots.
             model_checkpoint ('auto' | Path | None):
                 - Path to a valid checkpoint for the model. The state of the trained model will be overwritten in place.
-                - If 'latest', the latest checkpoint will be loaded if a DragonModelCheckpoint was provided. The state of the trained model will be overwritten in place.
+                - If 'best', the best checkpoint will be loaded if a DragonModelCheckpoint was provided. The state of the trained model will be overwritten in place.
                 - If 'current', use the current state of the trained model up the latest trained epoch.
             test_data (DataLoader | Dataset | None): Optional Test data to evaluate the model performance. Validation and Test metrics will be saved to subdirectories.
         """
         # Validate model checkpoint
         if isinstance(model_checkpoint, Path):
             checkpoint_validated = make_fullpath(model_checkpoint, enforce="file")
-        elif model_checkpoint in [MagicWords.LATEST, MagicWords.CURRENT]:
+        elif model_checkpoint in [MagicWords.BEST, MagicWords.CURRENT]:
             checkpoint_validated = model_checkpoint
         else:
-            _LOGGER.error(f"'model_checkpoint' must be a Path object, or the string '{MagicWords.LATEST}', or the string '{MagicWords.CURRENT}'.")
+            _LOGGER.error(f"'model_checkpoint' must be a Path object, or the string '{MagicWords.BEST}', or the string '{MagicWords.CURRENT}'.")
             raise ValueError()
         # Validate directory
@@ -1656,7 +1656,7 @@ class DragonDetectionTrainer(_BaseDragonTrainer):
     def _evaluate(self,
                  save_dir: Union[str, Path],
-                 model_checkpoint: Union[Path, Literal["latest", "current"]],
+                 model_checkpoint: Union[Path, Literal["best", "current"]],
                  data: Optional[Union[DataLoader, Dataset]]):
         """
         Changed to a private helper method
@@ -1667,7 +1667,7 @@ class DragonDetectionTrainer(_BaseDragonTrainer):
             data (DataLoader | Dataset | None): The data to evaluate on. If None, defaults to the trainer's internal test_dataset.
             model_checkpoint ('auto' | Path | None):
                 - Path to a valid checkpoint for the model. The state of the trained model will be overwritten in place.
-                - If 'latest', the latest checkpoint will be loaded if a DragonModelCheckpoint was provided. The state of the trained model will be overwritten in place.
+                - If 'best', the best checkpoint will be loaded if a DragonModelCheckpoint was provided. The state of the trained model will be overwritten in place.
                 - If 'current', use the current state of the trained model up the latest trained epoch.
         """
         dataset_for_artifacts = None
@@ -1676,11 +1676,11 @@ class DragonDetectionTrainer(_BaseDragonTrainer):
         # load model checkpoint
         if isinstance(model_checkpoint, Path):
             self._load_checkpoint(path=model_checkpoint)
-        elif model_checkpoint == MagicWords.LATEST and self._checkpoint_callback:
+        elif model_checkpoint == MagicWords.BEST and self._checkpoint_callback:
             path_to_latest = self._checkpoint_callback.best_checkpoint_path
             self._load_checkpoint(path_to_latest)
-        elif model_checkpoint == MagicWords.LATEST and self._checkpoint_callback is None:
-            _LOGGER.error(f"'model_checkpoint' set to '{MagicWords.LATEST}' but no checkpoint callback was found.")
+        elif model_checkpoint == MagicWords.BEST and self._checkpoint_callback is None:
+            _LOGGER.error(f"'model_checkpoint' set to '{MagicWords.BEST}' but no checkpoint callback was found.")
             raise ValueError()
         # Dataloader
@@ -1767,7 +1767,7 @@ class DragonDetectionTrainer(_BaseDragonTrainer):
     def finalize_model_training(self,
                                 save_dir: Union[str, Path],
-                                model_checkpoint: Union[Path, Literal['latest', 'current']],
+                                model_checkpoint: Union[Path, Literal['best', 'current']],
                                 finalize_config: FinalizeObjectDetection
                                 ):
         """
@@ -1777,10 +1777,10 @@ class DragonDetectionTrainer(_BaseDragonTrainer):
         Args:
             save_dir (Union[str, Path]): The directory to save the finalized model.
-            model_checkpoint (Union[Path, Literal["latest", "current"]]):
+            model_checkpoint (Union[Path, Literal["best", "current"]]):
                 - Path: Loads the model state from a specific checkpoint file.
-                - "latest": Loads the best model state saved by the `DragonModelCheckpoint` callback.
-                - "current": Uses the model's state as it is at the end of the `fit()` call.
+                - "best": Loads the best model state saved by the `DragonModelCheckpoint` callback.
+                - "current": Uses the model's state as it is.
             finalize_config (FinalizeObjectDetection): A data class instance specific to the ML task containing task-specific metadata required for inference.
         """
         if not isinstance(finalize_config, FinalizeObjectDetection):
@@ -1818,8 +1818,8 @@ class DragonSequenceTrainer(_BaseDragonTrainer):
                  optimizer: torch.optim.Optimizer,
                  device: Union[Literal['cuda', 'mps', 'cpu'],str],
                  checkpoint_callback: Optional[DragonModelCheckpoint],
-                 early_stopping_callback: Optional[DragonEarlyStopping],
-                 lr_scheduler_callback: Optional[DragonLRScheduler],
+                 early_stopping_callback: Optional[_DragonEarlyStopping],
+                 lr_scheduler_callback: Optional[_DragonLRScheduler],
                  extra_callbacks: Optional[List[_Callback]] = None,
                  criterion: Union[nn.Module,Literal["auto"]] = "auto",
                  dataloader_workers: int = 2):
@@ -2036,7 +2036,7 @@ class DragonSequenceTrainer(_BaseDragonTrainer):
     def evaluate(self,
                  save_dir: Union[str, Path],
-                 model_checkpoint: Union[Path, Literal["latest", "current"]],
+                 model_checkpoint: Union[Path, Literal["best", "current"]],
                  test_data: Optional[Union[DataLoader, Dataset]] = None,
                  val_format_configuration: Optional[Union[SequenceValueMetricsFormat,
                                                           SequenceSequenceMetricsFormat]]=None,
@@ -2048,7 +2048,7 @@ class DragonSequenceTrainer(_BaseDragonTrainer):
         Args:
             model_checkpoint ('auto' | Path | None):
                 - Path to a valid checkpoint for the model.
-                - If 'latest', the latest checkpoint will be loaded.
+                - If 'best', the best checkpoint will be loaded.
                 - If 'current', use the current state of the trained model.
             save_dir (str | Path): Directory to save all reports and plots.
             test_data (DataLoader | Dataset | None): Optional Test data.
@@ -2058,10 +2058,10 @@ class DragonSequenceTrainer(_BaseDragonTrainer):
         # Validate model checkpoint
         if isinstance(model_checkpoint, Path):
             checkpoint_validated = make_fullpath(model_checkpoint, enforce="file")
-        elif model_checkpoint in [MagicWords.LATEST, MagicWords.CURRENT]:
+        elif model_checkpoint in [MagicWords.BEST, MagicWords.CURRENT]:
             checkpoint_validated = model_checkpoint
         else:
-            _LOGGER.error(f"'model_checkpoint' must be a Path object, or '{MagicWords.LATEST}', or '{MagicWords.CURRENT}'.")
+            _LOGGER.error(f"'model_checkpoint' must be a Path object, or '{MagicWords.BEST}', or '{MagicWords.CURRENT}'.")
             raise ValueError()
         # Validate val configuration
@@ -2120,7 +2120,7 @@ class DragonSequenceTrainer(_BaseDragonTrainer):
     def _evaluate(self,
                  save_dir: Union[str, Path],
-                 model_checkpoint: Union[Path, Literal["latest", "current"]],
+                 model_checkpoint: Union[Path, Literal["best", "current"]],
                  data: Optional[Union[DataLoader, Dataset]],
                  format_configuration: object):
         """
@@ -2131,11 +2131,11 @@ class DragonSequenceTrainer(_BaseDragonTrainer):
         # load model checkpoint
         if isinstance(model_checkpoint, Path):
             self._load_checkpoint(path=model_checkpoint)
-        elif model_checkpoint == MagicWords.LATEST and self._checkpoint_callback:
+        elif model_checkpoint == MagicWords.BEST and self._checkpoint_callback:
             path_to_latest = self._checkpoint_callback.best_checkpoint_path
             self._load_checkpoint(path_to_latest)
-        elif model_checkpoint == MagicWords.LATEST and self._checkpoint_callback is None:
-            _LOGGER.error(f"'model_checkpoint' set to '{MagicWords.LATEST}' but no checkpoint callback was found.")
+        elif model_checkpoint == MagicWords.BEST and self._checkpoint_callback is None:
+            _LOGGER.error(f"'model_checkpoint' set to '{MagicWords.BEST}' but no checkpoint callback was found.")
             raise ValueError()
         # Dataloader
@@ -2273,7 +2273,7 @@ class DragonSequenceTrainer(_BaseDragonTrainer):
     def finalize_model_training(self,
                                 save_dir: Union[str, Path],
-                                model_checkpoint: Union[Path, Literal['latest', 'current']],
+                                model_checkpoint: Union[Path, Literal['best', 'current']],
                                 finalize_config: Union[FinalizeSequenceSequencePrediction, FinalizeSequenceValuePrediction]):
         """
         Saves a finalized, "inference-ready" model state to a .pth file.
@@ -2282,10 +2282,10 @@ class DragonSequenceTrainer(_BaseDragonTrainer):
         Args:
             save_dir (Union[str, Path]): The directory to save the finalized model.
-            model_checkpoint (Union[Path, Literal["latest", "current"]]):
+            model_checkpoint (Union[Path, Literal["best", "current"]]):
                 - Path: Loads the model state from a specific checkpoint file.
-                - "latest": Loads the best model state saved by the `DragonModelCheckpoint` callback.
-                - "current": Uses the model's state as it is at the end of the `fit()` call.
+                - "best": Loads the best model state saved by the `DragonModelCheckpoint` callback.
+                - "current": Uses the model's state as it is.
             finalize_config (FinalizeSequencePrediction): A data class instance specific to the ML task containing task-specific metadata required for inference.
         """
         if self.kind == MLTaskKeys.SEQUENCE_SEQUENCE and not isinstance(finalize_config, FinalizeSequenceSequencePrediction):

ml_tools/_core/_keys.py CHANGED Viewed

@@ -1,6 +1,6 @@
 class MagicWords:
     """General purpose keys"""
-    LATEST = "latest"
+    BEST = "best"
     CURRENT = "current"
     RENAME = "rename"
     UNKNOWN = "unknown"
@@ -200,6 +200,37 @@ class MLTaskKeys:
     ALL_BINARY_TASKS = [BINARY_CLASSIFICATION, MULTILABEL_BINARY_CLASSIFICATION, BINARY_IMAGE_CLASSIFICATION, BINARY_SEGMENTATION]
+class _PublicTaskKeys:
+    """
+    Task keys used in the Dragon ML pipeline:
+    1. REGRESSION
+    2. MULTITARGET_REGRESSION
+    3. BINARY_CLASSIFICATION
+    4. MULTICLASS_CLASSIFICATION
+    5. MULTILABEL_BINARY_CLASSIFICATION
+    6. BINARY_IMAGE_CLASSIFICATION
+    7. MULTICLASS_IMAGE_CLASSIFICATION
+    8. BINARY_SEGMENTATION
+    9. MULTICLASS_SEGMENTATION
+    10. OBJECT_DETECTION
+    11. SEQUENCE_SEQUENCE
+    12. SEQUENCE_VALUE
+    """
+    REGRESSION = MLTaskKeys.REGRESSION
+    MULTITARGET_REGRESSION = MLTaskKeys.MULTITARGET_REGRESSION
+    BINARY_CLASSIFICATION = MLTaskKeys.BINARY_CLASSIFICATION
+    MULTICLASS_CLASSIFICATION = MLTaskKeys.MULTICLASS_CLASSIFICATION
+    MULTILABEL_BINARY_CLASSIFICATION = MLTaskKeys.MULTILABEL_BINARY_CLASSIFICATION
+    BINARY_IMAGE_CLASSIFICATION = MLTaskKeys.BINARY_IMAGE_CLASSIFICATION
+    MULTICLASS_IMAGE_CLASSIFICATION = MLTaskKeys.MULTICLASS_IMAGE_CLASSIFICATION
+    BINARY_SEGMENTATION = MLTaskKeys.BINARY_SEGMENTATION
+    MULTICLASS_SEGMENTATION = MLTaskKeys.MULTICLASS_SEGMENTATION
+    OBJECT_DETECTION = MLTaskKeys.OBJECT_DETECTION
+    SEQUENCE_SEQUENCE = MLTaskKeys.SEQUENCE_SEQUENCE
+    SEQUENCE_VALUE = MLTaskKeys.SEQUENCE_VALUE
 class DragonTrainerKeys:
     VALIDATION_METRICS_DIR = "Validation_Metrics"
     TEST_METRICS_DIR = "Test_Metrics"

ml_tools/_core/_path_manager.py CHANGED Viewed

@@ -1,8 +1,8 @@
-from pprint import pprint
 from typing import Optional, List, Dict, Union, Literal
 from pathlib import Path
 import re
 import sys
+import shutil
 from ._script_info import _script_info
 from ._logger import get_logger
@@ -17,7 +17,9 @@ __all__ = [
     "sanitize_filename",
     "list_csv_paths",
     "list_files_by_extension",
-    "list_subdirectories"
+    "list_subdirectories",
+    "clean_directory",
+    "safe_move",
 ]
@@ -542,5 +544,112 @@ def list_subdirectories(
     return dir_map
+def clean_directory(directory: Union[str, Path], verbose: bool = False) -> None:
+    """
+    ⚠️  DANGER: DESTRUCTIVE OPERATION ⚠️
+    Deletes all files and subdirectories inside the specified directory. It is designed to empty a folder, not delete the folder itself.
+    Safety: It skips hidden files and directories (those starting with a period '.'). This works for macOS/Linux hidden files and dot-config folders on Windows.
+    Args:
+        directory (str | Path): The directory path to clean.
+        verbose (bool): If True, prints the name of each top-level item deleted.
+    """
+    target_dir = make_fullpath(directory, enforce="directory")
+    if verbose:
+        _LOGGER.warning(f"Starting cleanup of directory: {target_dir}")
+    for item in target_dir.iterdir():
+        # Safety Check: Skip hidden files/dirs
+        if item.name.startswith("."):
+            continue
+        try:
+            if item.is_file() or item.is_symlink():
+                item.unlink()
+                if verbose:
+                    print(f"    🗑️  Deleted file: {item.name}")
+            elif item.is_dir():
+                shutil.rmtree(item)
+                if verbose:
+                    print(f"    🗑️  Deleted directory: {item.name}")
+        except Exception as e:
+            _LOGGER.warning(f"Failed to delete item '{item.name}': {e}")
+            continue
+def safe_move(
+    source: Union[str, Path],
+    final_destination: Union[str, Path],
+    rename: Optional[str] = None,
+    overwrite: bool = False
+) -> Path:
+    """
+    Moves a file or directory to a destination directory with safety checks.
+    Features:
+    - Supports optional renaming (sanitized automatically).
+    - PRESERVES file extensions during renaming (cannot be modified).
+    - Prevents accidental overwrites unless explicit.
+    Args:
+        source (str | Path): The file or directory to move.
+        final_destination (str | Path): The destination DIRECTORY where the item will be moved. It will be created if it does not exist.
+        rename (Optional[str]): If provided, the moved item will be renamed to this. Note: For files, the extension is strictly preserved.
+        overwrite (bool): If True, overwrites the destination path if it exists.
+    Returns:
+        Path: The new absolute path of the moved item.
+    """
+    # 1. Validation and Setup
+    src_path = make_fullpath(source, make=False)
+    # Ensure destination directory exists
+    dest_dir_path = make_fullpath(final_destination, make=True, enforce="directory")
+    # 2. Determine Target Name
+    if rename:
+        sanitized_name = sanitize_filename(rename)
+        if src_path.is_file():
+            # Strict Extension Preservation
+            final_name = f"{sanitized_name}{src_path.suffix}"
+        else:
+            final_name = sanitized_name
+    else:
+        final_name = src_path.name
+    final_path = dest_dir_path / final_name
+    # 3. Safety Checks (Collision Detection)
+    if final_path.exists():
+        if not overwrite:
+            _LOGGER.error(f"Destination already exists: '{final_path}'. Use overwrite=True to force.")
+            raise FileExistsError()
+        # Smart Overwrite Handling
+        if final_path.is_dir():
+            if src_path.is_file():
+                _LOGGER.error(f"Cannot overwrite directory '{final_path}' with file '{src_path}'")
+                raise IsADirectoryError()
+            # If overwriting a directory, we must remove the old one first to avoid nesting/errors
+            shutil.rmtree(final_path)
+        else:
+            # Destination is a file
+            if src_path.is_dir():
+                _LOGGER.error(f"Cannot overwrite file '{final_path}' with directory '{src_path}'")
+                raise FileExistsError()
+            final_path.unlink()
+    # 4. Perform Move
+    try:
+        shutil.move(str(src_path), str(final_path))
+        return final_path
+    except Exception as e:
+        _LOGGER.exception(f"Failed to move '{src_path}' to '{final_path}'")
+        raise e
 def info():
     _script_info(__all__)

ml_tools/keys.py CHANGED Viewed

@@ -2,10 +2,12 @@ from ._core._keys import (
     PyTorchInferenceKeys as InferenceKeys,
     _CheckpointCallbackKeys as CheckpointCallbackKeys,
     _FinalizedFileKeys as FinalizedFileKeys,
+    _PublicTaskKeys as TaskKeys,
 )
 __all__ = [
     "InferenceKeys",
     "CheckpointCallbackKeys",
     "FinalizedFileKeys",
+    "TaskKeys",
 ]

ml_tools/path_manager.py CHANGED Viewed

@@ -5,6 +5,8 @@ from ._core._path_manager import (
     list_csv_paths,
     list_files_by_extension,
     list_subdirectories,
+    clean_directory,
+    safe_move,
     info
 )
@@ -14,5 +16,7 @@ __all__ = [
     "sanitize_filename",
     "list_csv_paths",
     "list_files_by_extension",
-    "list_subdirectories"
+    "list_subdirectories",
+    "clean_directory",
+    "safe_move",
 ]

{dragon_ml_toolbox-19.11.0.dist-info → dragon_ml_toolbox-19.12.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{dragon_ml_toolbox-19.11.0.dist-info → dragon_ml_toolbox-19.12.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dragon_ml_toolbox-19.11.0.dist-info → dragon_ml_toolbox-19.12.1.dist-info}/licenses/LICENSE-THIRD-PARTY.md RENAMED Viewed

File without changes

{dragon_ml_toolbox-19.11.0.dist-info → dragon_ml_toolbox-19.12.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

dragon-ml-toolbox 19.11.0__py3-none-any.whl → 19.12.1__py3-none-any.whl

dragon-ml-toolbox 19.11.0py3-none-any.whl → 19.12.1py3-none-any.whl