PyPI - kostyl-toolkit - Versions diffs - 0.1.37__py3-none-any.whl → 0.1.38__py3-none-any.whl - Mend

kostyl-toolkit 0.1.37py3-none-any.whl → 0.1.38py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

kostyl/ml/base_uploader.py ADDED Viewed

@@ -0,0 +1,17 @@
+from abc import ABC
+from abc import abstractmethod
+from pathlib import Path
+from kostyl.utils.logging import setup_logger
+logger = setup_logger()
+class ModelCheckpointUploader(ABC):
+    """Abstract base class for uploading model checkpoints to a registry backend."""
+    @abstractmethod
+    def upload_checkpoint(self, path: str | Path) -> None:
+        """Upload the checkpoint located at the given path to the configured registry backend."""
+        raise NotImplementedError

kostyl/ml/configs/__init__.py CHANGED Viewed

@@ -1,8 +1,8 @@
-from .base_model import KostylBaseModel
 from .hyperparams import HyperparamsConfig
 from .hyperparams import Lr
 from .hyperparams import Optimizer
 from .hyperparams import WeightDecay
+from .mixins import ConfigLoadingMixin
 from .training_settings import CheckpointConfig
 from .training_settings import DataConfig
 from .training_settings import DDPStrategyConfig
@@ -15,12 +15,12 @@ from .training_settings import TrainingSettings
 __all__ = [
     "CheckpointConfig",
+    "ConfigLoadingMixin",
     "DDPStrategyConfig",
     "DataConfig",
     "EarlyStoppingConfig",
     "FSDP1StrategyConfig",
     "HyperparamsConfig",
-    "KostylBaseModel",
     "LightningTrainerParameters",
     "Lr",
     "Optimizer",

kostyl/ml/configs/mixins.py ADDED Viewed

@@ -0,0 +1,50 @@
+from pathlib import Path
+from pydantic import BaseModel as PydanticBaseModel
+from kostyl.utils.fs import load_config
+class ConfigLoadingMixin[TConfig: PydanticBaseModel]:
+    """Mixin providing configuration loading functionality for Pydantic models."""
+    @classmethod
+    def from_file(
+        cls: type[TConfig],  # pyright: ignore
+        path: str | Path,
+    ) -> TConfig:
+        """
+        Create an instance of the class from a configuration file.
+        Args:
+            cls_: The class type to instantiate.
+            path (str | Path): Path to the configuration file.
+        Returns:
+            An instance of the class created from the configuration file.
+        """
+        config = load_config(path)
+        instance = cls.model_validate(config)
+        return instance
+    @classmethod
+    def from_dict(
+        cls: type[TConfig],  # pyright: ignore
+        state_dict: dict,
+    ) -> TConfig:
+        """
+        Creates an instance from a dictionary.
+        Args:
+            cls_: The class type to instantiate.
+            state_dict (dict): A dictionary representing the state of the
+                class that must be validated and used for initialization.
+        Returns:
+            An initialized instance of the class based on the
+                provided state dictionary.
+        """
+        instance = cls.model_validate(state_dict)
+        return instance

kostyl/ml/{data_processing_utils.py → data_collator.py} RENAMED Viewed

@@ -36,6 +36,7 @@ class BatchCollatorWithKeyAlignment:
         keys_mapping: A dictionary mapping original keys to new keys.
         keys_to_keep: A set of keys to retain as-is from the original items.
         max_length: If provided, truncates "input_ids" and "attention_mask" to this length.
+            Only 1D tensors/lists are supported.
         Raises:
             ValueError: If both `keys_mapping` and `keys_to_keep` are None.
@@ -59,14 +60,16 @@ class BatchCollatorWithKeyAlignment:
     def _truncate_data(self, key: str, value: Any) -> Any:
         match value:
             case torch.Tensor():
-                if value.dim() > 2:
+                if value.dim() >= 2:
                     raise ValueError(
-                        f"Expected value with dim <= 2 for key {key}, got {value.dim()}"
+                        f"Expected tensor with dim < 2 for key {key}, got {value.dim()}. "
+                        "Check your data or disable truncation with `max_length=None`."
                     )
             case list():
                 if isinstance(value[0], list):
                     raise ValueError(
-                        f"Expected value with dim <= 2 for key {key}, got nested lists"
+                        f"Expected value with dim <= 2 for key {key}, got nested lists. "
+                        "Check your data or disable truncation with `max_length=None`."
                     )
         value = value[: self.max_length]
         return value

kostyl/ml/dist_utils.py CHANGED Viewed

@@ -40,7 +40,7 @@ def log_dist(
     if not dist.is_initialized():
         module_logger.warning_once(
-            "Distributed process group is not initialized; logging from all ranks."
+            "Distributed process group is not initialized. Logging from the current process only."
         )
         log_attr(msg)
         return
@@ -65,7 +65,6 @@ def log_dist(
 def scale_lrs_by_world_size(
     lrs: dict[str, float],
     group: dist.ProcessGroup | None = None,
-    config_name: str = "",
     inv_scale: bool = False,
     verbose_level: Literal["only-zero-rank", "world"] | None = None,
 ) -> dict[str, float]:
@@ -79,7 +78,6 @@ def scale_lrs_by_world_size(
         lrs (dict[str, float]): A dictionary of learning rate names and their corresponding values to be scaled.
         group (dist.ProcessGroup | None): Optional process group used to determine
             the target world size. Defaults to the global process group.
-        config_name (str): Human-readable identifier included in log messages.
         inv_scale (bool): If True, use the inverse square-root scale factor.
         verbose_level (Literal["only-zero-rank", "world"] | None): Verbosity level for logging scaled values.
             - "only-zero-rank": Log only from the main process (rank 0).
@@ -102,7 +100,7 @@ def scale_lrs_by_world_size(
         new_value = value * scale
         if verbose_level is not None:
             log_dist(
-                f"New {config_name} lr {name.upper()}: {new_value}; OLD: {old_value}",
+                f"lr {name.upper()}: {new_value}; OLD: {old_value}",
                 log_scope=verbose_level,
                 group=group,
             )

kostyl/ml/integrations/clearml/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+try:
+    import clearml  # noqa: F401
+except ImportError as e:
+    raise ImportError(
+        "ClearML integration requires the 'clearml' package. "
+        "Please install it via 'pip install clearml'."
+    ) from e

kostyl/ml/{registry_uploader.py → integrations/clearml/checkpoint_uploader.py} RENAMED Viewed

@@ -1,5 +1,3 @@
-from abc import ABC
-from abc import abstractmethod
 from collections.abc import Callable
 from functools import partial
 from pathlib import Path
@@ -7,22 +5,14 @@ from typing import override
 from clearml import OutputModel
+from kostyl.ml.base_uploader import ModelCheckpointUploader
 from kostyl.utils.logging import setup_logger
 logger = setup_logger()
-class RegistryUploaderCallback(ABC):
-    """Abstract Lightning callback responsible for tracking and uploading the best-performing model checkpoint."""
-    @abstractmethod
-    def upload_checkpoint(self, path: str | Path) -> None:
-        """Upload the checkpoint located at the given path to the configured registry backend."""
-        raise NotImplementedError
-class ClearMLRegistryUploaderCallback(RegistryUploaderCallback):
+class ClearMLCheckpointUploader(ModelCheckpointUploader):
     """PyTorch Lightning callback to upload the best model checkpoint to ClearML."""
     def __init__(
@@ -38,7 +28,7 @@ class ClearMLRegistryUploaderCallback(RegistryUploaderCallback):
         verbose: bool = True,
     ) -> None:
         """
-        Initializes the ClearMLRegistryUploaderCallback.
+        Initializes the ClearMLRegistryUploader.
         Args:
             model_name: The name for the newly created model.

kostyl/ml/{configs/base_model.py → integrations/clearml/config_mixin.py} RENAMED Viewed

@@ -1,75 +1,25 @@
 from pathlib import Path
-from typing import Self
-from typing import TypeVar
 from caseconverter import pascalcase
 from caseconverter import snakecase
 from clearml import Task
-from pydantic import BaseModel as PydanticBaseModel
+from kostyl.ml.configs import ConfigLoadingMixin
 from kostyl.utils.dict_manipulations import convert_to_flat_dict
 from kostyl.utils.dict_manipulations import flattened_dict_to_nested
 from kostyl.utils.fs import load_config
-TConfig = TypeVar("TConfig", bound=PydanticBaseModel)
-class BaseModelWithConfigLoading(PydanticBaseModel):
-    """Pydantic class providing basic configuration loading functionality."""
-    @classmethod
-    def from_file(
-        cls: type[Self],  # pyright: ignore
-        path: str | Path,
-    ) -> Self:
-        """
-        Create an instance of the class from a configuration file.
-        Args:
-            cls_: The class type to instantiate.
-            path (str | Path): Path to the configuration file.
-        Returns:
-            An instance of the class created from the configuration file.
-        """
-        config = load_config(path)
-        instance = cls.model_validate(config)
-        return instance
-    @classmethod
-    def from_dict(
-        cls: type[Self],  # pyright: ignore
-        state_dict: dict,
-    ) -> Self:
-        """
-        Creates an instance from a dictionary.
-        Args:
-            cls_: The class type to instantiate.
-            state_dict (dict): A dictionary representing the state of the
-                class that must be validated and used for initialization.
-        Returns:
-            An initialized instance of the class based on the
-                provided state dictionary.
-        """
-        instance = cls.model_validate(state_dict)
-        return instance
-class BaseModelWithClearmlSyncing(BaseModelWithConfigLoading):
-    """Pydantic class providing ClearML configuration loading and syncing functionality."""
+class BaseModelWithClearmlSyncing[TConfig: ConfigLoadingMixin]:
+    """Mixin providing ClearML task configuration syncing functionality for Pydantic models."""
     @classmethod
     def connect_as_file(
-        cls: type[Self],  # pyright: ignore
+        cls: type[TConfig],  # pyright: ignore
         task: Task,
         path: str | Path,
         alias: str | None = None,
-    ) -> Self:
+    ) -> TConfig:
         """
         Connects the configuration file to a ClearML task and creates an instance of the class from it.
@@ -104,11 +54,11 @@ class BaseModelWithClearmlSyncing(BaseModelWithConfigLoading):
     @classmethod
     def connect_as_dict(
-        cls: type[Self],  # pyright: ignore
+        cls: type[TConfig],  # pyright: ignore
         task: Task,
         path: str | Path,
         alias: str | None = None,
-    ) -> Self:
+    ) -> TConfig:
         """
         Connects configuration from a file as a dictionary to a ClearML task and creates an instance of the class.
@@ -135,9 +85,3 @@ class BaseModelWithClearmlSyncing(BaseModelWithConfigLoading):
         model = cls.from_dict(state_dict=config)
         return model
-class KostylBaseModel(BaseModelWithClearmlSyncing):
-    """A Pydantic model class with basic configuration loading functionality."""
-    pass

kostyl/ml/{clearml/pulling_utils.py → integrations/clearml/loading_utils.py} RENAMED Viewed

@@ -9,9 +9,26 @@ from transformers import AutoTokenizer
 from transformers import PreTrainedModel
 from transformers import PreTrainedTokenizerBase
-from kostyl.ml.lightning.extensions.pretrained_model import (
-    LightningCheckpointLoaderMixin,
-)
+try:
+    from kostyl.ml.integrations.lightning import (
+        LightningCheckpointLoaderMixin,  # pyright: ignore[reportAssignmentType]
+    )
+    LIGHTING_MIXIN_AVAILABLE = True
+except ImportError:
+    class LightningCheckpointLoaderMixin(PreTrainedModel):  # noqa: D101
+        pass  # type: ignore
+    @classmethod
+    def from_lightning_checkpoint(cls, *args: Any, **kwargs: Any) -> Any:  # noqa: D103
+        raise ImportError(
+            "Loading from Lightning checkpoints requires lightning integration. "
+            "Please package install via 'pip install lightning' to enable this functionality."
+        )
+    LIGHTING_MIXIN_AVAILABLE = False
 def get_tokenizer_from_clearml(
@@ -89,13 +106,23 @@ def get_model_from_clearml[
     local_path = Path(input_model.get_local_copy(raise_on_error=True))
     if local_path.is_dir() and input_model._is_package():
+        if not issubclass(model, (PreTrainedModel, AutoModel)):
+            raise ValueError(
+                f"Model class {model.__name__} must be a subclass of PreTrainedModel or AutoModel for directory loads."
+            )
         model_instance = model.from_pretrained(local_path, **kwargs)
     elif local_path.suffix == ".ckpt":
+        if not LIGHTING_MIXIN_AVAILABLE:
+            raise ImportError(
+                "Loading from Lightning checkpoints requires lightning integration. "
+                "Please package install via 'pip install lightning' to enable this functionality."
+            )
         if not issubclass(model, LightningCheckpointLoaderMixin):
             raise ValueError(
-                f"Model class {model.__name__} is not compatible with Lightning checkpoints."
+                f"Model class {model.__name__} is not compatible with Lightning checkpoints "
+                "(must inherit from LightningCheckpointLoaderMixin)."
             )
-        model_instance = model.from_lightning_checkpoint(local_path, **kwargs)
+        model_instance = model.from_lightning_checkpoint(local_path, **kwargs)  # type: ignore
     else:
         raise ValueError(
             f"Unsupported model format for path: {local_path}. "

kostyl/ml/integrations/lightning/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+try:
+    import lightning  # noqa: F401
+except ImportError as e:
+    raise ImportError(
+        "Lightning integration requires the 'lightning' package. "
+        "Please install it via 'pip install lightning'."
+    ) from e
+from .mixins import LightningCheckpointLoaderMixin
+from .module import KostylLightningModule
+__all__ = ["KostylLightningModule", "LightningCheckpointLoaderMixin"]

kostyl/ml/{lightning → integrations/lightning}/callbacks/checkpoint.py RENAMED Viewed

@@ -9,17 +9,16 @@ import torch.distributed as dist
 from lightning.fabric.utilities.types import _PATH
 from lightning.pytorch.callbacks import ModelCheckpoint
+from kostyl.ml.base_uploader import ModelCheckpointUploader
 from kostyl.ml.configs import CheckpointConfig
 from kostyl.ml.dist_utils import is_local_zero_rank
-from kostyl.ml.lightning import KostylLightningModule
-from kostyl.ml.registry_uploader import RegistryUploaderCallback
 from kostyl.utils import setup_logger
 logger = setup_logger("callbacks/checkpoint.py")
-class ModelCheckpointWithRegistryUploader(ModelCheckpoint):
+class ModelCheckpointWithCheckpointUploader(ModelCheckpoint):
     r"""
     Save the model after every epoch by monitoring a quantity. Every logged metrics are passed to the
     :class:`~lightning.pytorch.loggers.logger.Logger` for the version it gets saved in the same directory as the
@@ -229,8 +228,8 @@ class ModelCheckpointWithRegistryUploader(ModelCheckpoint):
     def __init__(  # noqa: D107
         self,
-        registry_uploader_callback: RegistryUploaderCallback,
-        uploading_mode: Literal["only-best", "every-checkpoint"] = "only-best",
+        checkpoint_uploader: ModelCheckpointUploader,
+        upload_strategy: Literal["only-best", "every-checkpoint"] = "only-best",
         dirpath: _PATH | None = None,
         filename: str | None = None,
         monitor: str | None = None,
@@ -247,9 +246,9 @@ class ModelCheckpointWithRegistryUploader(ModelCheckpoint):
         save_on_train_epoch_end: bool | None = None,
         enable_version_counter: bool = True,
     ) -> None:
-        self.registry_uploader_callback = registry_uploader_callback
+        self.registry_uploader = checkpoint_uploader
         self.process_group: dist.ProcessGroup | None = None
-        self.uploading_mode = uploading_mode
+        self.upload_strategy = upload_strategy
         super().__init__(
             dirpath=dirpath,
             filename=filename,
@@ -269,40 +268,26 @@ class ModelCheckpointWithRegistryUploader(ModelCheckpoint):
         )
         return
-    @override
-    def setup(
-        self,
-        trainer: pl.Trainer,
-        pl_module: pl.LightningModule | KostylLightningModule,
-        stage: str,
-    ) -> None:
-        super().setup(trainer, pl_module, stage)
-        if isinstance(pl_module, KostylLightningModule):
-            self.process_group = pl_module.get_process_group()
-        return
     @override
     def _save_checkpoint(self, trainer: "pl.Trainer", filepath: str) -> None:
         super()._save_checkpoint(trainer, filepath)
-        if dist.is_initialized():
-            dist.barrier(group=self.process_group)
-        if trainer.is_global_zero and self.registry_uploader_callback is not None:
-            match self.uploading_mode:
+        if trainer.is_global_zero and self.registry_uploader is not None:
+            match self.upload_strategy:
                 case "every-checkpoint":
-                    self.registry_uploader_callback.upload_checkpoint(filepath)
+                    self.registry_uploader.upload_checkpoint(filepath)
                 case "only-best":
                     if filepath == self.best_model_path:
-                        self.registry_uploader_callback.upload_checkpoint(filepath)
+                        self.registry_uploader.upload_checkpoint(filepath)
         return
 def setup_checkpoint_callback(
     dirpath: Path,
     ckpt_cfg: CheckpointConfig,
-    registry_uploader_callback: RegistryUploaderCallback | None = None,
-    uploading_strategy: Literal["only-best", "every-checkpoint"] | None = None,
+    checkpoint_uploader: ModelCheckpointUploader | None = None,
+    upload_strategy: Literal["only-best", "every-checkpoint"] | None = None,
     remove_folder_if_exists: bool = True,
-) -> ModelCheckpointWithRegistryUploader | ModelCheckpoint:
+) -> ModelCheckpointWithCheckpointUploader | ModelCheckpoint:
     """
     Create and configure a checkpoint callback for model saving.
@@ -313,29 +298,29 @@ def setup_checkpoint_callback(
     Args:
         dirpath: Path to the directory for saving checkpoints.
         ckpt_cfg: Checkpoint configuration (filename, monitor, mode, save_top_k).
-        registry_uploader_callback: Optional callback for uploading checkpoints to a remote registry.
-            Must be specified together with uploading_strategy.
-        uploading_strategy: Checkpoint upload mode:
+        checkpoint_uploader: Optional checkpoint uploader instance. If provided, enables
+            uploading of checkpoints to a remote registry.
+        upload_strategy: Checkpoint upload mode:
             - "only-best": only the best checkpoint is uploaded
             - "every-checkpoint": every saved checkpoint is uploaded
-            Must be specified together with registry_uploader_callback.
+            Must be specified together with checkpoint_uploader.
         remove_folder_if_exists: If True, removes existing checkpoint directory before creating a new one.
     Returns:
-        ModelCheckpointWithRegistryUploader if registry_uploader_callback is provided,
+        ModelCheckpointWithCheckpointUploader if checkpoint_uploader is provided,
         otherwise standard ModelCheckpoint.
     Raises:
-        ValueError: If only one of registry_uploader_callback or uploading_mode is None.
+        ValueError: If only one of checkpoint_uploader or uploading_mode is None.
     Note:
         If the dirpath directory already exists, it will be removed and recreated
         (only on the main process in distributed training) if remove_folder_if_exists is True.
     """
-    if (registry_uploader_callback is None) != (uploading_strategy is None):
+    if (checkpoint_uploader is None) != (upload_strategy is None):
         raise ValueError(
-            "Both registry_uploader_callback and uploading_mode must be provided or neither."
+            "Both checkpoint_uploader and upload_strategy must be provided or neither."
         )
     if dirpath.exists():
@@ -348,8 +333,8 @@ def setup_checkpoint_callback(
         logger.info(f"Creating checkpoint directory {dirpath}.")
         dirpath.mkdir(parents=True, exist_ok=True)
-    if (registry_uploader_callback is not None) and (uploading_strategy is not None):
-        checkpoint_callback = ModelCheckpointWithRegistryUploader(
+    if (checkpoint_uploader is not None) and (upload_strategy is not None):
+        checkpoint_callback = ModelCheckpointWithCheckpointUploader(
             dirpath=dirpath,
             filename=ckpt_cfg.filename,
             save_top_k=ckpt_cfg.save_top_k,
@@ -357,8 +342,8 @@ def setup_checkpoint_callback(
             mode=ckpt_cfg.mode,
             verbose=True,
             save_weights_only=ckpt_cfg.save_weights_only,
-            registry_uploader_callback=registry_uploader_callback,
-            uploading_mode=uploading_strategy,
+            checkpoint_uploader=checkpoint_uploader,
+            upload_strategy=upload_strategy,
         )
     else:
         checkpoint_callback = ModelCheckpoint(

kostyl/ml/{lightning/extensions/custom_module.py → integrations/lightning/module.py} RENAMED Viewed

@@ -5,17 +5,15 @@ from typing import override
 import lightning as L
 import torch
-import torch.distributed as dist
 from lightning.pytorch.strategies import FSDPStrategy
 from torch import nn
-from torch.distributed import ProcessGroup
 from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
 from torchmetrics import Metric
 from torchmetrics import MetricCollection
 from transformers import PretrainedConfig
 from transformers import PreTrainedModel
-from kostyl.ml.metrics_formatting import apply_suffix
+from kostyl.ml.integrations.lightning.metrics_formatting import apply_suffix
 from kostyl.ml.schedulers.base import BaseScheduler
 from kostyl.utils import setup_logger
@@ -26,32 +24,6 @@ module_logger = setup_logger(fmt="only_message")
 class KostylLightningModule(L.LightningModule):
     """Custom PyTorch Lightning Module with logging, checkpointing, and distributed training utilities."""
-    def get_process_group(self) -> ProcessGroup | None:
-        """
-        Retrieves the data parallel process group for distributed training.
-        This method checks if distributed processing is initialized. If a device mesh is provided,
-        it extracts the data parallel mesh and returns its process group, unless the mesh size is 1,
-        in which case it logs a warning and returns None. If no device mesh is provided, it returns
-        the world process group.
-        Returns:
-            ProcessGroup | None: The data parallel process group if available and valid, otherwise None.
-        """
-        if not dist.is_initialized():
-            return None
-        if self.device_mesh is not None:
-            dp_mesh = self.device_mesh["data_parallel"]
-            if dp_mesh.size() == 1:
-                module_logger.warning("Data parallel mesh size is 1, returning None")
-                return None
-            dp_pg = dp_mesh.get_group()
-        else:
-            dp_pg = dist.group.WORLD
-        return dp_pg
     @property
     def model_instance(self) -> PreTrainedModel | nn.Module:
         """Returns the underlying model."""
@@ -60,10 +32,7 @@ class KostylLightningModule(L.LightningModule):
     @property
     def model_config(self) -> PretrainedConfig | None:
         """Returns the model configuration if available."""
-        model = self.model_instance
-        if hasattr(model, "config"):
-            return model.config  # type: ignore
-        return None
+        raise NotImplementedError
     @property
     def grad_clip_val(self) -> float | None:

kostyl/ml/schedulers/__init__.py CHANGED Viewed

@@ -1,18 +1,18 @@
 from .composite import CompositeScheduler
 from .cosine import CosineParamScheduler
 from .cosine import CosineScheduler
-from .cosine_with_plateu import CosineWithPlateauParamScheduler
-from .cosine_with_plateu import CosineWithPlateuScheduler
 from .linear import LinearParamScheduler
 from .linear import LinearScheduler
+from .plateau import PlateauWithAnnealingParamScheduler
+from .plateau import PlateauWithAnnealingScheduler
 __all__ = [
     "CompositeScheduler",
     "CosineParamScheduler",
     "CosineScheduler",
-    "CosineWithPlateauParamScheduler",
-    "CosineWithPlateuScheduler",
     "LinearParamScheduler",
     "LinearScheduler",
+    "PlateauWithAnnealingParamScheduler",
+    "PlateauWithAnnealingScheduler",
 ]

kostyl/ml/schedulers/{cosine_with_plateu.py → plateau.py} RENAMED Viewed

@@ -1,4 +1,5 @@
 from typing import Any
+from typing import Literal
 from typing import override
 import numpy as np
@@ -7,20 +8,25 @@ import torch
 from .base import BaseScheduler
-class _CosineWithPlateauSchedulerCore(BaseScheduler):
-    """Core cosine with plateau scheduler logic."""
+class _PlateauWithAnnealingCore(BaseScheduler):
+    """Core annealing with plateau scheduler logic."""
     def __init__(
         self,
         param_name: str,
         num_iters: int,
-        base_value: float,
+        plateau_value: float,
         final_value: float,
         plateau_ratio: float,
         warmup_value: float | None = None,
         warmup_ratio: float | None = None,
         freeze_ratio: float | None = None,
+        annealing_type: Literal["cosine", "linear"] = "cosine",
     ) -> None:
+        if annealing_type not in ("cosine", "linear"):
+            raise ValueError(
+                f"Annealing type must be 'cosine' or 'linear', got {annealing_type}."
+            )
         if warmup_ratio is not None:
             if not (0 < warmup_ratio < 1):
                 raise ValueError(f"Warmup ratio must be in (0, 1), got {warmup_ratio}.")
@@ -47,16 +53,17 @@ class _CosineWithPlateauSchedulerCore(BaseScheduler):
         self.param_name = param_name
         self.num_iters = num_iters
-        self.base_value = base_value
+        self.plateau_value = plateau_value
         self.final_value = final_value
-        self.cosine_annealing_ratio = 1 - pre_annealing_ratio
+        self.annealing_ratio = 1 - pre_annealing_ratio
         self.plateau_ratio = plateau_ratio
         self.warmup_ratio = warmup_ratio
         self.warmup_value = warmup_value
         self.freeze_ratio = freeze_ratio
+        self.annealing_type = annealing_type
         self.scheduled_values: np.ndarray = np.array([], dtype=np.float64)
-        self.current_value_ = self.base_value
+        self.current_value_ = self.plateau_value
         return
     def _create_scheduler(self) -> None:
@@ -72,28 +79,41 @@ class _CosineWithPlateauSchedulerCore(BaseScheduler):
         if self.warmup_ratio is not None and self.warmup_value is not None:
             warmup_iters = int(self.num_iters * self.warmup_ratio)
             warmup_schedule = np.linspace(
-                self.warmup_value, self.base_value, warmup_iters, dtype=np.float64
+                self.warmup_value, self.plateau_value, warmup_iters, dtype=np.float64
             )
         else:
             warmup_iters = 0
             warmup_schedule = np.array([], dtype=np.float64)
-        # Create cosine annealing schedule
-        if self.cosine_annealing_ratio > 0:
-            cosine_annealing_iters = int(self.num_iters * self.cosine_annealing_ratio)
-            iters = np.arange(cosine_annealing_iters)
-            cosine_annealing_schedule = self.final_value + 0.5 * (
-                self.base_value - self.final_value
-            ) * (1 + np.cos(np.pi * iters / len(iters)))
+        # Create annealing schedule
+        if self.annealing_ratio > 0:
+            annealing_iters = int(self.num_iters * self.annealing_ratio)
+            match self.annealing_type:
+                case "cosine":
+                    iters = np.arange(annealing_iters)
+                    annealing_schedule = self.final_value + 0.5 * (
+                        self.plateau_value - self.final_value
+                    ) * (1 + np.cos(np.pi * iters / len(iters)))
+                case "linear":
+                    annealing_schedule = np.linspace(
+                        self.plateau_value,
+                        self.final_value,
+                        annealing_iters,
+                        dtype=np.float64,
+                    )
+                case _:
+                    raise ValueError(
+                        f"Unsupported annealing type: {self.annealing_type}"
+                    )
         else:
-            cosine_annealing_iters = 0
-            cosine_annealing_schedule = np.array([], dtype=np.float64)
+            annealing_iters = 0
+            annealing_schedule = np.array([], dtype=np.float64)
-        plateau_iters = (
-            self.num_iters - warmup_iters - freeze_iters - cosine_annealing_iters
-        )
+        plateau_iters = self.num_iters - warmup_iters - freeze_iters - annealing_iters
         if plateau_iters > 0:
-            plateau_schedule = np.full(plateau_iters, self.base_value, dtype=np.float64)
+            plateau_schedule = np.full(
+                plateau_iters, self.plateau_value, dtype=np.float64
+            )
         else:
             plateau_schedule = np.array([], dtype=np.float64)
@@ -103,7 +123,7 @@ class _CosineWithPlateauSchedulerCore(BaseScheduler):
                 freeze_schedule,
                 warmup_schedule,
                 plateau_schedule,
-                cosine_annealing_schedule,
+                annealing_schedule,
             )
         )
         self._verify()
@@ -137,12 +157,12 @@ class _CosineWithPlateauSchedulerCore(BaseScheduler):
         return {self.param_name: self.current_value_}
-class CosineWithPlateuScheduler(_CosineWithPlateauSchedulerCore):
+class PlateauWithAnnealingScheduler(_PlateauWithAnnealingCore):
     """
-    Applies a cosine schedule with plateau to an optimizer param-group field.
+    Applies an annealing schedule with plateau to an optimizer param-group field.
-    Schedule phases: freeze (0) → warmup → plateau (base_value) → cosine annealing to final_value.
-    The plateau phase maintains the base_value before cosine annealing begins.
+    Schedule phases: freeze (0) → warmup → plateau (plateau_value) → annealing (cosine/linear) to final_value.
+    The plateau phase maintains the plateau_value before annealing begins.
     """
     def __init__(
@@ -150,30 +170,32 @@ class CosineWithPlateuScheduler(_CosineWithPlateauSchedulerCore):
         optimizer: torch.optim.Optimizer,
         param_group_field: str,
         num_iters: int,
-        base_value: float,
+        plateau_value: float,
         final_value: float,
         plateau_ratio: float,
         warmup_value: float | None = None,
         warmup_ratio: float | None = None,
         freeze_ratio: float | None = None,
+        annealing_type: Literal["cosine", "linear"] = "cosine",
         multiplier_field: str | None = None,
         skip_if_zero: bool = False,
         apply_if_field: str | None = None,
         ignore_if_field: str | None = None,
     ) -> None:
         """
-        Configure cosine scheduling for matching optimizer groups.
+        Configure annealing scheduling for matching optimizer groups.
         Args:
             optimizer: Optimizer whose param groups are updated in-place.
             param_group_field: Name of the field that receives the scheduled value.
             num_iters: Number of scheduler iterations before clamping at ``final_value``.
-            base_value: Value maintained during plateau phase and used as cosine start.
-            final_value: Value approached as iterations progress during cosine annealing.
-            plateau_ratio: Fraction of iterations to maintain ``base_value`` before cosine annealing.
-            warmup_ratio: Optional fraction of iterations to linearly ramp from ``warmup_value`` to ``base_value``.
+            plateau_value: Value maintained during plateau phase and used as annealing start.
+            final_value: Value approached as iterations progress during annealing.
+            plateau_ratio: Fraction of iterations to maintain ``plateau_value`` before annealing.
+            warmup_ratio: Optional fraction of iterations to linearly ramp from ``warmup_value`` to ``plateau_value``.
             warmup_value: Starting value for the warmup ramp.
             freeze_ratio: Optional fraction of iterations to keep the value frozen at zero at the beginning.
+            annealing_type: Type of annealing from plateau to final value ("cosine" or "linear").
             multiplier_field: Optional per-group multiplier applied to the scheduled value.
             skip_if_zero: Leave groups untouched when their target field equals zero.
             apply_if_field: Require this flag to be present in a param group before updating.
@@ -188,12 +210,13 @@ class CosineWithPlateuScheduler(_CosineWithPlateauSchedulerCore):
         super().__init__(
             param_name=param_group_field,
             num_iters=num_iters,
-            base_value=base_value,
+            plateau_value=plateau_value,
             final_value=final_value,
             plateau_ratio=plateau_ratio,
             warmup_ratio=warmup_ratio,
             warmup_value=warmup_value,
             freeze_ratio=freeze_ratio,
+            annealing_type=annealing_type,
         )
         self.param_group_field = param_group_field
         return
@@ -242,12 +265,12 @@ class CosineWithPlateuScheduler(_CosineWithPlateauSchedulerCore):
         return
-class CosineWithPlateauParamScheduler(_CosineWithPlateauSchedulerCore):
+class PlateauWithAnnealingParamScheduler(_PlateauWithAnnealingCore):
     """
-    Standalone cosine scheduler with plateau for non-optimizer parameters.
+    Standalone annealing scheduler with plateau for non-optimizer parameters.
-    Schedule phases: freeze (0) → warmup → plateau (base_value) → cosine annealing to final_value.
-    The plateau phase maintains the base_value before cosine annealing begins.
+    Schedule phases: freeze (0) → warmup → plateau (plateau_value) → annealing (cosine/linear) to final_value.
+    The plateau phase maintains the plateau_value before annealing begins.
     """
     @override

{kostyl_toolkit-0.1.37.dist-info → kostyl_toolkit-0.1.38.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: kostyl-toolkit
-Version: 0.1.37
+Version: 0.1.38
 Summary: Kickass Orchestration System for Training, Yielding & Logging
 Requires-Dist: case-converter>=1.2.0
 Requires-Dist: loguru>=0.7.3

kostyl_toolkit-0.1.38.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,40 @@
+kostyl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+kostyl/ml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+kostyl/ml/base_uploader.py,sha256=KxHuohCcNK18kTVFBBqDu_IOQefluhSXOzwC56O66wc,484
+kostyl/ml/configs/__init__.py,sha256=djYjLxA7riFcSibAKfWHns-BCESEPrqSz_ZY2rJO-cc,913
+kostyl/ml/configs/hyperparams.py,sha256=lvtbvOFEoTBAJug7FR35xMQdPLgDQjRoP2fyDP-jD7E,3305
+kostyl/ml/configs/mixins.py,sha256=xHHAoRoPbzP9ECFP9duzg6SzegHcoLI8Pr9NrLoWNHs,1411
+kostyl/ml/configs/training_settings.py,sha256=wT9CHuLaKrLwonsc87Ee421EyFis_c9fqOgn9bSClm8,2747
+kostyl/ml/data_collator.py,sha256=kxiaMDKwSKXGBtrF8yXxHcypf7t_6syU-NwO1LcX50k,4062
+kostyl/ml/dist_utils.py,sha256=UFNMLEHc0A5F6KvTRG8GQPpRDwG4m5dvM__UvXNc2aQ,4526
+kostyl/ml/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+kostyl/ml/integrations/clearml/__init__.py,sha256=3TBVI-3fE9ZzuvOLEohW9TOK0BZTLD5JiYalAVDkocc,217
+kostyl/ml/integrations/clearml/checkpoint_uploader.py,sha256=PupFi7jKROsIddOz7X5DhV7nUNdDZg5kKaaLvzdCHlY,4012
+kostyl/ml/integrations/clearml/config_mixin.py,sha256=70QRicU7etiDzLX-MplqVX8uFm5siuPrM8KbTOriZnQ,3308
+kostyl/ml/integrations/clearml/dataset_utils.py,sha256=eij_sr2KDhm8GxEbVbK8aBjPsuVvLl9-PIGGaKVgXLA,1729
+kostyl/ml/integrations/clearml/loading_utils.py,sha256=NAMmB9NTGCXCHh-bR_nrQZyqImUVZqicNjExDyPM2mU,5224
+kostyl/ml/integrations/clearml/version_utils.py,sha256=GBjIIZbH_itd5sj7XpvxjkyZwxxGOpEcQ3BiWaJTyq8,1210
+kostyl/ml/integrations/lightning/__init__.py,sha256=r96os8kTuKIAymx3k9Td1JBrO2PH7nQAWUC54NsY5yY,392
+kostyl/ml/integrations/lightning/callbacks/__init__.py,sha256=EnKkNwwNDZnEqKRlpY4FVrqP88ECPF6nlT2bSLUIKRk,194
+kostyl/ml/integrations/lightning/callbacks/checkpoint.py,sha256=SfcaQRkXviMUej0UgrfXcqMDlRKYaAN3rgYCMKI97Os,18433
+kostyl/ml/integrations/lightning/callbacks/early_stopping.py,sha256=D5nyjktCJ9XYAf28-kgXG8jORvXLl1N3nbDQnvValPM,615
+kostyl/ml/integrations/lightning/loggers/__init__.py,sha256=e51dszaoJbuzwBkbdugmuDsPldoSO4yaRgmZUg1Bdy0,71
+kostyl/ml/integrations/lightning/loggers/tb_logger.py,sha256=CpjlcEIT187cJXJgRYafqfzvcnwPgPaVZ0vLUflIr7k,899
+kostyl/ml/integrations/lightning/metrics_formatting.py,sha256=U6vdNENZLvp2dT1L3HqFKtXrHwGKoDXN93hvamPGHjM,1341
+kostyl/ml/integrations/lightning/mixins.py,sha256=hVIsIUu6Iryrz6S7GQTqog9vNq8LQyjJd2aoJ5Ws6KU,5253
+kostyl/ml/integrations/lightning/module.py,sha256=39hcVNZSGyj5tLpXyX8IoqMGWt5vf6-Bx5JnNJ2-Wag,5218
+kostyl/ml/integrations/lightning/utils.py,sha256=DhLy_3JA5VyMQkB1v6xxRxDNHfisjXFYVjuIKPpO81M,1967
+kostyl/ml/params_groups.py,sha256=nUyw5d06Pvy9QPiYtZzLYR87xwXqJLxbHthgQH8oSCM,3583
+kostyl/ml/schedulers/__init__.py,sha256=VIo8MOP4w5Ll24XqFb3QGi2rKvys6c0dEFYPIdDoPlw,526
+kostyl/ml/schedulers/base.py,sha256=bjmwgdZpnSqpCnHPnKC6MEiRO79cwxMJpZq-eQVNs2M,1353
+kostyl/ml/schedulers/composite.py,sha256=ee4xlMDMMtjKPkbTF2ue9GTr9DuGCGjZWf11mHbi6aE,2387
+kostyl/ml/schedulers/cosine.py,sha256=y8ylrgVOkVcr2-ExoqqNW--tdDX88TBYPQCOppIf2_M,8685
+kostyl/ml/schedulers/linear.py,sha256=RnnnblRuRXP3LT03QVIHUaK2kNsiMP1AedrMoeyh3qk,5843
+kostyl/ml/schedulers/plateau.py,sha256=N-hiostPtTR0W4xnEJYB_1dv0DRx39iufLkGUrSIoWE,11235
+kostyl/utils/__init__.py,sha256=hkpmB6c5pr4Ti5BshOROebb7cvjDZfNCw83qZ_FFKMM,240
+kostyl/utils/dict_manipulations.py,sha256=e3vBicID74nYP8lHkVTQc4-IQwoJimrbFELy5uSF6Gk,1073
+kostyl/utils/fs.py,sha256=gAQNIU4R_2DhwjgzOS8BOMe0gZymtY1eZwmdgOdDgqo,510
+kostyl/utils/logging.py,sha256=CgNFNogcK0hoZmygvBWlTcq5A3m2Pfv9eOAP_gwx0pM,6633
+kostyl_toolkit-0.1.38.dist-info/WHEEL,sha256=e_m4S054HL0hyR3CpOk-b7Q7fDX6BuFkgL5OjAExXas,80
+kostyl_toolkit-0.1.38.dist-info/METADATA,sha256=nz5AzlWjKBqh7OZCklk-efWZ1jVDihw3YrrpLyoII3k,4269
+kostyl_toolkit-0.1.38.dist-info/RECORD,,

{kostyl_toolkit-0.1.37.dist-info → kostyl_toolkit-0.1.38.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: uv 0.9.24
+Generator: uv 0.9.27
 Root-Is-Purelib: true
-Tag: py3-none-any
+Tag: py3-none-any

kostyl/ml/lightning/__init__.py DELETED Viewed

@@ -1,5 +0,0 @@
-from .extensions import KostylLightningModule
-from .extensions import LightningCheckpointLoaderMixin
-__all__ = ["KostylLightningModule", "LightningCheckpointLoaderMixin"]

kostyl/ml/lightning/extensions/__init__.py DELETED Viewed

@@ -1,5 +0,0 @@
-from .custom_module import KostylLightningModule
-from .pretrained_model import LightningCheckpointLoaderMixin
-__all__ = ["KostylLightningModule", "LightningCheckpointLoaderMixin"]

kostyl_toolkit-0.1.37.dist-info/RECORD DELETED Viewed

@@ -1,38 +0,0 @@
-kostyl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-kostyl/ml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-kostyl/ml/clearml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-kostyl/ml/clearml/dataset_utils.py,sha256=eij_sr2KDhm8GxEbVbK8aBjPsuVvLl9-PIGGaKVgXLA,1729
-kostyl/ml/clearml/logging_utils.py,sha256=GBjIIZbH_itd5sj7XpvxjkyZwxxGOpEcQ3BiWaJTyq8,1210
-kostyl/ml/clearml/pulling_utils.py,sha256=jMlVXcYRumwWnPlELRlgEdfq5L6Wir_EcfTmOoWBLTA,4077
-kostyl/ml/configs/__init__.py,sha256=IetcivbqYGutowLqxdKp7QR4tkXKBr4m8t4Zkk9jHZU,911
-kostyl/ml/configs/base_model.py,sha256=Eofn14J9RsjpVx_J4rp6C19pDDCANU4hr3JtX-d0FpQ,4820
-kostyl/ml/configs/hyperparams.py,sha256=lvtbvOFEoTBAJug7FR35xMQdPLgDQjRoP2fyDP-jD7E,3305
-kostyl/ml/configs/training_settings.py,sha256=wT9CHuLaKrLwonsc87Ee421EyFis_c9fqOgn9bSClm8,2747
-kostyl/ml/data_processing_utils.py,sha256=jjEjV0S0wREgZkzg27ip0LpI8cQqkwe2QwATmAqm9-g,3832
-kostyl/ml/dist_utils.py,sha256=lK9_aAh9L1SvvXWzcWiBoFjczfDiKzEpcno5csImAYQ,4635
-kostyl/ml/lightning/__init__.py,sha256=R36PImjVvzBF9t_z9u6RYVnUFJJ-sNDUOdboWUojHmM,173
-kostyl/ml/lightning/callbacks/__init__.py,sha256=EnKkNwwNDZnEqKRlpY4FVrqP88ECPF6nlT2bSLUIKRk,194
-kostyl/ml/lightning/callbacks/checkpoint.py,sha256=HI17gu-GxnfXUchflWBTwly7cCYnlpKcshuR-TgD6s4,19066
-kostyl/ml/lightning/callbacks/early_stopping.py,sha256=D5nyjktCJ9XYAf28-kgXG8jORvXLl1N3nbDQnvValPM,615
-kostyl/ml/lightning/extensions/__init__.py,sha256=OY6QGv1agYgqqKf1xJBrxgp_i8FunVfPzYezfaRrGXU,182
-kostyl/ml/lightning/extensions/custom_module.py,sha256=qYffgPwIB_ePwK_MIaRruuDxPKJZb42kg2yy996eGwY,6415
-kostyl/ml/lightning/extensions/pretrained_model.py,sha256=hVIsIUu6Iryrz6S7GQTqog9vNq8LQyjJd2aoJ5Ws6KU,5253
-kostyl/ml/lightning/loggers/__init__.py,sha256=e51dszaoJbuzwBkbdugmuDsPldoSO4yaRgmZUg1Bdy0,71
-kostyl/ml/lightning/loggers/tb_logger.py,sha256=CpjlcEIT187cJXJgRYafqfzvcnwPgPaVZ0vLUflIr7k,899
-kostyl/ml/lightning/utils.py,sha256=DhLy_3JA5VyMQkB1v6xxRxDNHfisjXFYVjuIKPpO81M,1967
-kostyl/ml/metrics_formatting.py,sha256=U6vdNENZLvp2dT1L3HqFKtXrHwGKoDXN93hvamPGHjM,1341
-kostyl/ml/params_groups.py,sha256=nUyw5d06Pvy9QPiYtZzLYR87xwXqJLxbHthgQH8oSCM,3583
-kostyl/ml/registry_uploader.py,sha256=BbyLXvF8AL145k7g6MRkJ7gf_3Um53p3Pn5280vVD9U,4384
-kostyl/ml/schedulers/__init__.py,sha256=_EtZu8DwTCSv4-eR84kRstEZblHylVqda7WQUOXIKfw,534
-kostyl/ml/schedulers/base.py,sha256=bjmwgdZpnSqpCnHPnKC6MEiRO79cwxMJpZq-eQVNs2M,1353
-kostyl/ml/schedulers/composite.py,sha256=ee4xlMDMMtjKPkbTF2ue9GTr9DuGCGjZWf11mHbi6aE,2387
-kostyl/ml/schedulers/cosine.py,sha256=y8ylrgVOkVcr2-ExoqqNW--tdDX88TBYPQCOppIf2_M,8685
-kostyl/ml/schedulers/cosine_with_plateu.py,sha256=0-X6wl3HgsTiLIbISb9lOxIVWXHDEND7rILitMWtIiM,10195
-kostyl/ml/schedulers/linear.py,sha256=RnnnblRuRXP3LT03QVIHUaK2kNsiMP1AedrMoeyh3qk,5843
-kostyl/utils/__init__.py,sha256=hkpmB6c5pr4Ti5BshOROebb7cvjDZfNCw83qZ_FFKMM,240
-kostyl/utils/dict_manipulations.py,sha256=e3vBicID74nYP8lHkVTQc4-IQwoJimrbFELy5uSF6Gk,1073
-kostyl/utils/fs.py,sha256=gAQNIU4R_2DhwjgzOS8BOMe0gZymtY1eZwmdgOdDgqo,510
-kostyl/utils/logging.py,sha256=CgNFNogcK0hoZmygvBWlTcq5A3m2Pfv9eOAP_gwx0pM,6633
-kostyl_toolkit-0.1.37.dist-info/WHEEL,sha256=eycQt0QpYmJMLKpE3X9iDk8R04v2ZF0x82ogq-zP6bQ,79
-kostyl_toolkit-0.1.37.dist-info/METADATA,sha256=yHPgSAhPnm5tDQjvDIfs213-bsVX6vMfVsUbX9GboGU,4269
-kostyl_toolkit-0.1.37.dist-info/RECORD,,