PyPI - kostyl-toolkit - Versions diffs - 0.1.30__tar.gz → 0.1.32__tar.gz - Mend

kostyl-toolkit 0.1.30tar.gz → 0.1.32tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

{kostyl_toolkit-0.1.30 → kostyl_toolkit-0.1.32}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: kostyl-toolkit
-Version: 0.1.30
+Version: 0.1.32
 Summary: Kickass Orchestration System for Training, Yielding & Logging
 Requires-Dist: case-converter>=1.2.0
 Requires-Dist: loguru>=0.7.3

{kostyl_toolkit-0.1.30 → kostyl_toolkit-0.1.32}/kostyl/ml/clearml/pulling_utils.py RENAMED Viewed

@@ -9,7 +9,7 @@ from transformers import AutoTokenizer
 from transformers import PreTrainedModel
 from transformers import PreTrainedTokenizerBase
-from kostyl.ml.lightning.extenstions.pretrained_model import (
+from kostyl.ml.lightning.extensions.pretrained_model import (
     LightningCheckpointLoaderMixin,
 )

kostyl_toolkit-0.1.32/kostyl/ml/data_processing_utils.py ADDED Viewed

@@ -0,0 +1,102 @@
+from copy import deepcopy
+from typing import Any
+import torch
+from transformers import DataCollatorWithPadding
+from transformers.data.data_collator import DataCollatorMixin
+class BatchCollatorWithKeyAlignment:
+    """
+    Maps dataset keys to HuggingFace DataCollator expected keys and collates the batch.
+    HuggingFace collators expect specific keys depending on the collator type:
+        - `DataCollatorWithPadding`: "input_ids", "attention_mask", "token_type_ids" (optional).
+        - `DataCollatorForLanguageModeling`: "input_ids", "attention_mask", "special_tokens_mask" (optional).
+        - `DataCollatorForSeq2Seq`: "input_ids", "attention_mask", "labels".
+        - `DataCollatorForTokenClassification`: "input_ids", "attention_mask", "labels".
+    This wrapper allows you to map arbitrary dataset keys to these expected names before collation,
+    optionally truncating sequences to a maximum length.
+    """
+    def __init__(
+        self,
+        collator: DataCollatorWithPadding | DataCollatorMixin,
+        keys_mapping: dict[str, str] | None = None,
+        keys_to_keep: set[str] | None = None,
+        max_length: int | None = None,
+    ) -> None:
+        """
+        Initialize the BatchCollatorWithKeyAlignment.
+        Args:
+        collator: A callable (usually a Hugging Face DataCollator) that takes a list
+            of dictionaries and returns a collated batch (e.g., padded tensors).
+        keys_mapping: A dictionary mapping original keys to new keys.
+        keys_to_keep: A set of keys to retain as-is from the original items.
+        max_length: If provided, truncates "input_ids" and "attention_mask" to this length.
+        Raises:
+            ValueError: If both `keys_mapping` and `keys_to_keep` are None.
+        """
+        if (keys_mapping is None) and (keys_to_keep is None):
+            raise ValueError("Either keys_mapping or keys_to_keep must be provided.")
+        if keys_mapping is None:
+            keys_mapping = {}
+        if keys_to_keep is None:
+            keys_to_keep = set()
+        self.collator = collator
+        self.keys_mapping = deepcopy(keys_mapping)
+        self.max_length = max_length
+        keys_to_keep_mapping = {v: v for v in keys_to_keep}
+        self.keys_mapping.update(keys_to_keep_mapping)
+    def _truncate_data(self, key: str, value: Any) -> Any:
+        match value:
+            case torch.Tensor():
+                if value.dim() > 2:
+                    raise ValueError(
+                        f"Expected value with dim <= 2 for key {key}, got {value.dim()}"
+                    )
+            case list():
+                if isinstance(value[0], list):
+                    raise ValueError(
+                        f"Expected value with dim <= 2 for key {key}, got nested lists"
+                    )
+        value = value[: self.max_length]
+        return value
+    def __call__(self, batch: list[dict[str, Any]]) -> dict[str, Any]:
+        """
+        Align keys and collate the batch.
+        Args:
+            batch: A list of dictionaries representing the data batch.
+        Returns:
+            The collated batch returned by the underlying collator.
+        """
+        aligned_batch = []
+        for item in batch:
+            new_item = {}
+            for k in item.keys():
+                new_key = self.keys_mapping.get(k, None)
+                if new_key is None:
+                    continue
+                value = item[k]
+                if self.max_length is not None and new_key in (
+                    "input_ids",
+                    "attention_mask",
+                ):
+                    value = self._truncate_data(new_key, value)
+                new_item[new_key] = value
+            aligned_batch.append(new_item)
+        collated_batch = self.collator(aligned_batch)
+        return collated_batch

kostyl_toolkit-0.1.32/kostyl/ml/lightning/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .extensions import KostylLightningModule
+from .extensions import LightningCheckpointLoaderMixin
+__all__ = ["KostylLightningModule", "LightningCheckpointLoaderMixin"]

{kostyl_toolkit-0.1.30 → kostyl_toolkit-0.1.32}/kostyl/ml/lightning/callbacks/__init__.py RENAMED Viewed

@@ -1,10 +1,8 @@
 from .checkpoint import setup_checkpoint_callback
 from .early_stopping import setup_early_stopping_callback
-from .registry_uploader import ClearMLRegistryUploaderCallback
 __all__ = [
-    "ClearMLRegistryUploaderCallback",
     "setup_checkpoint_callback",
     "setup_early_stopping_callback",
 ]

{kostyl_toolkit-0.1.30 → kostyl_toolkit-0.1.32}/kostyl/ml/lightning/callbacks/checkpoint.py RENAMED Viewed

@@ -12,10 +12,9 @@ from lightning.pytorch.callbacks import ModelCheckpoint
 from kostyl.ml.configs import CheckpointConfig
 from kostyl.ml.dist_utils import is_main_process
 from kostyl.ml.lightning import KostylLightningModule
+from kostyl.ml.registry_uploader import RegistryUploaderCallback
 from kostyl.utils import setup_logger
-from .registry_uploader import RegistryUploaderCallback
 logger = setup_logger("callbacks/checkpoint.py")

{kostyl_toolkit-0.1.30/kostyl/ml/lightning/extenstions → kostyl_toolkit-0.1.32/kostyl/ml/lightning/extensions}/custom_module.py RENAMED Viewed

@@ -20,12 +20,17 @@ from kostyl.ml.schedulers.base import BaseScheduler
 from kostyl.utils import setup_logger
-logger = setup_logger(fmt="only_message")
+module_logger = setup_logger(fmt="only_message")
 class KostylLightningModule(L.LightningModule):
     """Custom PyTorch Lightning Module with logging, checkpointing, and distributed training utilities."""
+    @property
+    def process_group(self) -> ProcessGroup | None:
+        """Returns the data parallel process group for distributed training."""
+        return self.get_process_group()
     def get_process_group(self) -> ProcessGroup | None:
         """
         Retrieves the data parallel process group for distributed training.
@@ -45,7 +50,7 @@ class KostylLightningModule(L.LightningModule):
         if self.device_mesh is not None:
             dp_mesh = self.device_mesh["data_parallel"]
             if dp_mesh.size() == 1:
-                logger.warning("Data parallel mesh size is 1, returning None")
+                module_logger.warning("Data parallel mesh size is 1, returning None")
                 return None
             dp_pg = dp_mesh.get_group()
         else:
@@ -129,11 +134,16 @@ class KostylLightningModule(L.LightningModule):
         stage: str | None = None,
     ) -> None:
         if stage is not None:
-            dictionary = apply_suffix(
-                metrics=dictionary,
-                suffix=stage,
-                add_dist_rank=False,
-            )
+            if not isinstance(dictionary, MetricCollection):
+                dictionary = apply_suffix(
+                    metrics=dictionary,
+                    suffix=stage,
+                    add_dist_rank=False,
+                )
+            else:
+                module_logger.warning_once(
+                    "Stage suffixing for MetricCollection is not implemented. Skipping suffixing."
+                )
         super().log_dict(
             dictionary,
             prog_bar,
@@ -161,9 +171,12 @@ class KostylLightningModule(L.LightningModule):
         """
         scheduler: BaseScheduler = self.lr_schedulers()  # type: ignore
         if not isinstance(scheduler, BaseScheduler):
+            module_logger.warning_once(
+                "Scheduler is not an instance of BaseScheduler. Skipping scheduled values logging."
+            )
             return
         scheduler_state_dict = scheduler.current_value()
-        scheduler_state_dict = apply_suffix(scheduler_state_dict, "scheduler")
+        scheduler_state_dict = apply_suffix(scheduler_state_dict, "scheduled")
         self.log_dict(
             scheduler_state_dict,
             prog_bar=False,

kostyl_toolkit-0.1.32/kostyl/ml/lightning/training_utils.py ADDED Viewed

@@ -0,0 +1,241 @@
+from dataclasses import dataclass
+from dataclasses import fields
+from pathlib import Path
+from typing import Literal
+from typing import cast
+import lightning as L
+import torch
+import torch.distributed as dist
+from clearml import OutputModel
+from clearml import Task
+from lightning.pytorch.callbacks import Callback
+from lightning.pytorch.callbacks import EarlyStopping
+from lightning.pytorch.callbacks import LearningRateMonitor
+from lightning.pytorch.callbacks import ModelCheckpoint
+from lightning.pytorch.loggers import TensorBoardLogger
+from lightning.pytorch.strategies import DDPStrategy
+from lightning.pytorch.strategies import FSDPStrategy
+from torch.distributed import ProcessGroup
+from torch.distributed.fsdp import MixedPrecision
+from torch.nn import Module
+from kostyl.ml.configs import CheckpointConfig
+from kostyl.ml.configs import DDPStrategyConfig
+from kostyl.ml.configs import EarlyStoppingConfig
+from kostyl.ml.configs import FSDP1StrategyConfig
+from kostyl.ml.configs import SingleDeviceStrategyConfig
+from kostyl.ml.lightning.callbacks import setup_checkpoint_callback
+from kostyl.ml.lightning.callbacks import setup_early_stopping_callback
+from kostyl.ml.lightning.loggers import setup_tb_logger
+from kostyl.ml.registry_uploader import ClearMLRegistryUploaderCallback
+from kostyl.utils.logging import setup_logger
+TRAINING_STRATEGIES = (
+    FSDP1StrategyConfig | DDPStrategyConfig | SingleDeviceStrategyConfig
+)
+logger = setup_logger(add_rank=True)
+def estimate_total_steps(
+    trainer: L.Trainer, process_group: ProcessGroup | None = None
+) -> int:
+    """
+    Estimates the total number of training steps based on the
+    dataloader length, accumulation steps, and distributed world size.
+    """  # noqa: D205
+    if dist.is_initialized():
+        world_size = dist.get_world_size(process_group)
+    else:
+        world_size = 1
+    datamodule = trainer.datamodule  # type: ignore
+    if datamodule is None:
+        raise ValueError("Trainer must have a datamodule to estimate total steps.")
+    datamodule = cast(L.LightningDataModule, datamodule)
+    logger.info("Loading `train_dataloader` to estimate number of stepping batches.")
+    datamodule.setup("fit")
+    dataloader_len = len(datamodule.train_dataloader())
+    steps_per_epoch = dataloader_len // trainer.accumulate_grad_batches // world_size
+    if trainer.max_epochs is None:
+        raise ValueError("Trainer must have `max_epochs` set to estimate total steps.")
+    total_steps = steps_per_epoch * trainer.max_epochs
+    logger.info(
+        f"Total steps: {total_steps} (per-epoch: {steps_per_epoch})\n"
+        f"-> Dataloader len: {dataloader_len}\n"
+        f"-> Accumulate grad batches: {trainer.accumulate_grad_batches}\n"
+        f"-> Epochs: {trainer.max_epochs}\n "
+        f"-> World size: {world_size}"
+    )
+    return total_steps
+@dataclass
+class Callbacks:
+    """Dataclass to hold PyTorch Lightning callbacks."""
+    checkpoint: ModelCheckpoint
+    lr_monitor: LearningRateMonitor
+    early_stopping: EarlyStopping | None = None
+    def to_list(self) -> list[Callback]:
+        """Convert dataclass fields to a list of Callbacks. None values are omitted."""
+        callbacks: list[Callback] = [
+            getattr(self, field.name)
+            for field in fields(self)
+            if getattr(self, field.name) is not None
+        ]
+        return callbacks
+def setup_callbacks(
+    task: Task,
+    root_path: Path,
+    checkpoint_cfg: CheckpointConfig,
+    uploading_mode: Literal["only-best", "every-checkpoint"],
+    output_model: OutputModel,
+    early_stopping_cfg: EarlyStoppingConfig | None = None,
+    config_dict: dict[str, str] | None = None,
+    enable_tag_versioning: bool = False,
+) -> Callbacks:
+    """
+    Set up PyTorch Lightning callbacks for training.
+    Creates and configures a set of callbacks including checkpoint saving,
+    learning rate monitoring, model registry uploading, and optional early stopping.
+    Args:
+        task: ClearML task for organizing checkpoints by task name and ID.
+        root_path: Root directory for saving checkpoints.
+        checkpoint_cfg: Configuration for checkpoint saving behavior.
+        uploading_mode: Model upload strategy:
+            - `"only-best"`: Upload only the best checkpoint based on monitored metric.
+            - `"every-checkpoint"`: Upload every saved checkpoint.
+        output_model: ClearML OutputModel instance for model registry integration.
+        early_stopping_cfg: Configuration for early stopping. If None, early stopping
+            is disabled.
+        config_dict: Optional configuration dictionary to store with the model
+            in the registry.
+        enable_tag_versioning: Whether to auto-increment version tags (e.g., "v1.0")
+            on the uploaded model.
+    Returns:
+        Callbacks dataclass containing configured checkpoint, lr_monitor,
+        and optionally early_stopping callbacks.
+    """
+    lr_monitor = LearningRateMonitor(
+        logging_interval="step", log_weight_decay=True, log_momentum=False
+    )
+    model_uploader = ClearMLRegistryUploaderCallback(
+        output_model=output_model,
+        config_dict=config_dict,
+        verbose=True,
+        enable_tag_versioning=enable_tag_versioning,
+    )
+    checkpoint_callback = setup_checkpoint_callback(
+        root_path / "checkpoints" / task.name / task.id,
+        checkpoint_cfg,
+        registry_uploader_callback=model_uploader,
+        uploading_mode=uploading_mode,
+    )
+    if early_stopping_cfg is not None:
+        early_stopping_callback = setup_early_stopping_callback(early_stopping_cfg)
+    else:
+        early_stopping_callback = None
+    callbacks = Callbacks(
+        checkpoint=checkpoint_callback,
+        lr_monitor=lr_monitor,
+        early_stopping=early_stopping_callback,
+    )
+    return callbacks
+def setup_loggers(task: Task, root_path: Path) -> list[TensorBoardLogger]:
+    """
+    Set up PyTorch Lightning loggers for training.
+    Args:
+        task: ClearML task used to organize log directories by task name and ID.
+        root_path: Root directory for storing TensorBoard logs.
+    Returns:
+        List of configured TensorBoard loggers.
+    """
+    loggers = [
+        setup_tb_logger(root_path / "runs" / task.name / task.id),
+    ]
+    return loggers
+def setup_strategy(
+    strategy_settings: TRAINING_STRATEGIES,
+    devices: list[int] | int,
+    auto_wrap_policy: set[type[Module]] | None = None,
+) -> Literal["auto"] | FSDPStrategy | DDPStrategy:
+    """
+    Configure and return a PyTorch Lightning training strategy.
+    Args:
+        strategy_settings: Strategy configuration object. Must be one of:
+            - `FSDP1StrategyConfig`: Fully Sharded Data Parallel strategy (requires 2+ devices).
+            - `DDPStrategyConfig`: Distributed Data Parallel strategy (requires 2+ devices).
+            - `SingleDeviceStrategyConfig`: Single device training (requires exactly 1 device).
+        devices: Device(s) to use for training. Either a list of device IDs or
+            a single integer representing the number of devices.
+        auto_wrap_policy: Set of module types that should be wrapped for FSDP.
+            Required when using `FSDP1StrategyConfig`, ignored otherwise.
+    Returns:
+        Configured strategy: `FSDPStrategy`, `DDPStrategy`, or `"auto"` for single device.
+    Raises:
+        ValueError: If device count doesn't match strategy requirements or
+            if `auto_wrap_policy` is missing for FSDP.
+    """
+    if isinstance(devices, list):
+        num_devices = len(devices)
+    else:
+        num_devices = devices
+    match strategy_settings:
+        case FSDP1StrategyConfig():
+            if num_devices < 2:
+                raise ValueError("FSDP strategy requires multiple devices.")
+            if auto_wrap_policy is None:
+                raise ValueError("auto_wrap_policy must be provided for FSDP strategy.")
+            mixed_precision_config = MixedPrecision(
+                param_dtype=getattr(torch, strategy_settings.param_dtype),
+                reduce_dtype=getattr(torch, strategy_settings.reduce_dtype),
+                buffer_dtype=getattr(torch, strategy_settings.buffer_dtype),
+            )
+            strategy = FSDPStrategy(
+                auto_wrap_policy=auto_wrap_policy,
+                mixed_precision=mixed_precision_config,
+            )
+        case DDPStrategyConfig():
+            if num_devices < 2:
+                raise ValueError("DDP strategy requires at least two devices.")
+            strategy = DDPStrategy(
+                find_unused_parameters=strategy_settings.find_unused_parameters
+            )
+        case SingleDeviceStrategyConfig():
+            if num_devices != 1:
+                raise ValueError("SingleDevice strategy requires exactly one device.")
+            strategy = "auto"
+        case _:
+            raise ValueError(
+                f"Unsupported strategy type: {type(strategy_settings.trainer.strategy)}"
+            )
+    return strategy

{kostyl_toolkit-0.1.30 → kostyl_toolkit-0.1.32}/kostyl/ml/metrics_formatting.py RENAMED Viewed

@@ -3,14 +3,13 @@ from collections.abc import Mapping
 import torch.distributed as dist
 from torch import Tensor
 from torchmetrics import Metric
-from torchmetrics import MetricCollection
 def apply_suffix(
-    metrics: Mapping[str, Metric | Tensor | int | float] | MetricCollection,
+    metrics: Mapping[str, Metric | Tensor | int | float],
     suffix: str,
     add_dist_rank: bool = False,
-) -> Mapping[str, Metric | Tensor | int | float] | MetricCollection:
+) -> Mapping[str, Metric | Tensor | int | float]:
     """Add stage prefix to metric names."""
     new_metrics_dict = {}
     for key, value in metrics.items():

{kostyl_toolkit-0.1.30/kostyl/ml/lightning/callbacks → kostyl_toolkit-0.1.32/kostyl/ml}/registry_uploader.py RENAMED Viewed

@@ -5,7 +5,6 @@ from pathlib import Path
 from typing import override
 from clearml import OutputModel
-from clearml import Task
 from kostyl.ml.clearml.logging_utils import find_version_in_tags
 from kostyl.ml.clearml.logging_utils import increment_version
@@ -29,69 +28,50 @@ class ClearMLRegistryUploaderCallback(RegistryUploaderCallback):
     def __init__(
         self,
-        task: Task,
-        output_model_name: str,
-        output_model_tags: list[str] | None = None,
-        verbose: bool = True,
-        enable_tag_versioning: bool = True,
-        label_enumeration: dict[str, int] | None = None,
+        output_model: OutputModel,
         config_dict: dict[str, str] | None = None,
+        verbose: bool = True,
+        enable_tag_versioning: bool = False,
     ) -> None:
         """
         Initializes the ClearMLRegistryUploaderCallback.
         Args:
-            task: ClearML task.
-            ckpt_callback: ModelCheckpoint instance used by Trainer.
-            output_model_name: Name for the ClearML output model.
-            output_model_tags: Tags for the output model.
-            verbose: Whether to log messages.
-            label_enumeration: Optional mapping of label names to integer IDs.
+            output_model: ClearML OutputModel instance representing the model to upload.
+            verbose: Whether to log messages during upload.
             config_dict: Optional configuration dictionary to associate with the model.
             enable_tag_versioning: Whether to enable versioning in tags. If True,
                 the version tag (e.g., "v1.0") will be automatically incremented or if not present, added as "v1.0".
         """
         super().__init__()
-        if output_model_tags is None:
-            output_model_tags = []
-        self.task = task
-        self.output_model_name = output_model_name
-        self.output_model_tags = output_model_tags
+        self.output_model = output_model
         self.config_dict = config_dict
-        self.label_enumeration = label_enumeration
         self.verbose = verbose
         self.enable_tag_versioning = enable_tag_versioning
         self.best_model_path: str = ""
-        self._output_model: OutputModel | None = None
         self._last_uploaded_model_path: str = ""
         self._upload_callback: Callable | None = None
+        self._validate_tags()
         return
-    def _create_output_model(self) -> OutputModel:
+    def _validate_tags(self) -> None:
+        output_model_tags = self.output_model.tags or []
         if self.enable_tag_versioning:
-            version = find_version_in_tags(self.output_model_tags)
+            version = find_version_in_tags(output_model_tags)
             if version is None:
-                self.output_model_tags.append("v1.0")
+                output_model_tags.append("v1.0")
             else:
                 new_version = increment_version(version)
-                self.output_model_tags.remove(version)
-                self.output_model_tags.append(new_version)
-        if "LightningCheckpoint" not in self.output_model_tags:
-            self.output_model_tags.append("LightningCheckpoint")
-        return OutputModel(
-            task=self.task,
-            name=self.output_model_name,
-            framework="PyTorch",
-            tags=self.output_model_tags,
-            config_dict=None,
-            label_enumeration=self.label_enumeration,
-        )
+                output_model_tags.remove(version)
+                output_model_tags.append(new_version)
+        if "LightningCheckpoint" not in output_model_tags:
+            output_model_tags.append("LightningCheckpoint")
+        self.output_model.tags = output_model_tags
+        return None
     @override
     def upload_checkpoint(
@@ -105,18 +85,15 @@ class ClearMLRegistryUploaderCallback(RegistryUploaderCallback):
                 logger.info("Model unchanged since last upload")
             return
-        if self._output_model is None:
-            self._output_model = self._create_output_model()
         if self.verbose:
             logger.info(f"Uploading model from {path}")
-        self._output_model.update_weights(
+        self.output_model.update_weights(
             path,
             auto_delete_file=False,
             async_enable=False,
         )
-        self._output_model.update_design(config_dict=self.config_dict)
+        self.output_model.update_design(config_dict=self.config_dict)
         self._last_uploaded_model_path = path
         return

{kostyl_toolkit-0.1.30 → kostyl_toolkit-0.1.32}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "kostyl-toolkit"
-version = "0.1.30"
+version = "0.1.32"
 description = "Kickass Orchestration System for Training, Yielding & Logging "
 readme = "README.md"
 requires-python = ">=3.12"

kostyl_toolkit-0.1.30/kostyl/ml/lightning/__init__.py DELETED Viewed

@@ -1,5 +0,0 @@
-from .extenstions import KostylLightningModule
-from .extenstions import LightningCheckpointLoaderMixin
-__all__ = ["KostylLightningModule", "LightningCheckpointLoaderMixin"]

kostyl_toolkit-0.1.30/kostyl/ml/lightning/steps_estimation.py DELETED Viewed

@@ -1,44 +0,0 @@
-from typing import cast
-import lightning as L
-import torch.distributed as dist
-from torch.distributed import ProcessGroup
-from kostyl.utils.logging import setup_logger
-logger = setup_logger(add_rank=True)
-def estimate_total_steps(
-    trainer: L.Trainer, process_group: ProcessGroup | None = None
-) -> int:
-    """Estimates the total number of training steps for a given PyTorch Lightning Trainer."""
-    if dist.is_initialized():
-        world_size = dist.get_world_size(process_group)
-    else:
-        world_size = 1
-    datamodule = trainer.datamodule  # type: ignore
-    if datamodule is None:
-        raise ValueError("Trainer must have a datamodule to estimate total steps.")
-    datamodule = cast(L.LightningDataModule, datamodule)
-    logger.info("Loading `train_dataloader` to estimate number of stepping batches.")
-    datamodule.setup("fit")
-    dataloader_len = len(datamodule.train_dataloader())
-    steps_per_epoch = dataloader_len // trainer.accumulate_grad_batches // world_size
-    if trainer.max_epochs is None:
-        raise ValueError("Trainer must have `max_epochs` set to estimate total steps.")
-    total_steps = steps_per_epoch * trainer.max_epochs
-    logger.info(
-        f"Total steps: {total_steps} (per-epoch: {steps_per_epoch})\n"
-        f"-> Dataloader len: {dataloader_len}\n"
-        f"-> Accumulate grad batches: {trainer.accumulate_grad_batches}\n"
-        f"-> Epochs: {trainer.max_epochs}\n "
-        f"-> World size: {world_size}"
-    )
-    return total_steps