PyPI - kostyl-toolkit - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

kostyl-toolkit 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

kostyl/ml_core/configs/config_base.py CHANGED Viewed

@@ -35,7 +35,7 @@ class ConfigLoadingMixin:
     @classmethod
     def from_file(
-        cls: type[TConfig],  # pyright: ignore[reportGeneralTypeIssues]
+        cls: type[TConfig],  # pyright: ignore
         path: str | Path,
     ) -> TConfig:
         """
@@ -55,7 +55,7 @@ class ConfigLoadingMixin:
     @classmethod
     def from_dict(
-        cls: type[TConfig],  # pyright: ignore[reportGeneralTypeIssues]
+        cls: type[TConfig],  # pyright: ignore
         state_dict: dict,
     ) -> TConfig:
         """
@@ -83,7 +83,7 @@ class ClearMLConfigMixin(ConfigLoadingMixin):
     @classmethod
     def connect_as_file(
-        cls: type[TModel],  # pyright: ignore[reportGeneralTypeIssues]
+        cls: type[TModel],  # pyright: ignore
         task: clearml.Task,
         path: str | Path,
         alias: str | None = None,
@@ -122,7 +122,7 @@ class ClearMLConfigMixin(ConfigLoadingMixin):
     @classmethod
     def connect_as_dict(
-        cls: type[TModel],  # pyright: ignore[reportGeneralTypeIssues]
+        cls: type[TModel],  # pyright: ignore
         task: clearml.Task,
         path: str | Path,
         alias: str | None = None,

kostyl/ml_core/configs/training_params.py CHANGED Viewed

@@ -91,7 +91,7 @@ class DataConfig(BaseModel):
     data_columns: list[str]
-class TrainingParams(ConfigLoadingMixin):
+class TrainingParams(BaseModel, ConfigLoadingMixin):
     """Training parameters configuration."""
     trainer: LightningTrainerParameters

kostyl/ml_core/dist_utils.py CHANGED Viewed

@@ -1,33 +1,70 @@
 import math
 import os
+from typing import Literal
 import torch.distributed as dist
-from kostyl.ml_core.configs import Lr
 from kostyl.utils.logging import setup_logger
 logger = setup_logger(add_rank=True)
-def scale_lrs_by_world_size[Tlr: Lr](
-    lr_config: Tlr,
+def log_dist(msg: str, how: Literal["only-zero-rank", "world"]) -> None:
+    """
+    Log a message in a distributed environment based on the specified verbosity level.
+    Args:
+        msg (str): The message to log.
+        how (Literal["only-zero-rank", "world"]): The verbosity level for logging.
+            - "only-zero-rank": Log only from the main process (rank 0).
+            - "world": Log from all processes in the distributed environment.
+    """
+    match how:
+        case _ if not dist.is_initialized():
+            logger.warning_once(
+                "Distributed logging requested but torch.distributed is not initialized."
+            )
+            logger.info(msg)
+        case "only-zero-rank":
+            if is_main_process():
+                logger.info(msg)
+        case "world":
+            logger.info(msg)
+        case _:
+            logger.warning_once(
+                f"Invalid logging verbosity level requested: {how}. Message not logged."
+            )
+    return
+def scale_lrs_by_world_size(
+    lrs: dict[str, float],
     group: dist.ProcessGroup | None = None,
     config_name: str = "",
     inv_scale: bool = False,
-) -> Tlr:
+    verbose: Literal["only-zero-rank", "world"] | None = None,
+) -> dict[str, float]:
     """
     Scale learning-rate configuration values to match the active distributed world size.
+    Note:
+        The value in the `lrs` will be modified in place.
     Args:
-        lr_config (Lr): Learning-rate configuration whose values will be scaled.
+        lrs (dict[str, float]): A dictionary of learning rate names and their corresponding values to be scaled.
         group (dist.ProcessGroup | None): Optional process group used to determine
             the target world size. Defaults to the global process group.
         config_name (str): Human-readable identifier included in log messages.
         inv_scale (bool): If True, use the inverse square-root scale factor.
+        verbose (Literal["only-zero-rank", "world"] | None): Verbosity level for logging scaled values.
+            - "only-zero-rank": Log only from the main process (rank 0).
+            - "world": Log from all processes in the distributed environment.
+            -  None: No logging.
     Returns:
-        Tlr: The learning-rate configuration with scaled values.
+        dict[str, float]: The learning-rate configuration with scaled values.
     """
     world_size = dist.get_world_size(group=group)
@@ -37,26 +74,16 @@ def scale_lrs_by_world_size[Tlr: Lr](
     else:
         scale = math.sqrt(world_size)
-    logger.info(f"Scaling learning rates for world size: {world_size}")
-    logger.info(f"Scale factor: {scale:.4f}")
-    old_base = lr_config.base_value
-    lr_config.base_value *= scale
-    logger.info(f"New {config_name} lr BASE: {lr_config.base_value}; OLD: {old_base}")
-    if lr_config.final_value is not None:
-        old_final_value = lr_config.final_value
-        lr_config.final_value *= scale
-        logger.info(
-            f"New {config_name} lr FINAL: {lr_config.final_value}; OLD: {old_final_value}"
-        )
-    if lr_config.warmup_value is not None:
-        old_warmup_value = lr_config.warmup_value
-        lr_config.warmup_value *= scale
-        logger.info(
-            f"New {config_name} lr WARMUP: {lr_config.warmup_value}; OLD: {old_warmup_value}"
-        )
-    return lr_config
+    for name, value in lrs.items():
+        old_value = value
+        new_value = value * scale
+        if verbose is not None:
+            log_dist(
+                f"New {config_name} lr {name.upper()}: {new_value}; OLD: {old_value}",
+                verbose,
+            )
+        lrs[name] = new_value
+    return lrs
 def _get_rank() -> int:

kostyl/ml_core/params_groups.py CHANGED Viewed

@@ -7,23 +7,66 @@ def create_params_groups(
     model: nn.Module,
     weight_decay: float,
     lr: float,
+    no_lr_keywords: set[str] | None = None,
+    no_decay_keywords: set[str] | None = None,
 ) -> list[dict]:
-    """Create optimizer parameter groups for a PyTorch model with fine-grained weight decay control."""
+    """
+    Create optimizer parameter groups for a PyTorch model with fine-grained weight decay control.
+    This function iterates through the model's named parameters and assigns them to specific
+    parameter groups based on whether they should be subject to weight decay. Certain parameter
+    types (like normalization layers, biases, embeddings) are typically excluded from weight decay
+    to improve training stability.
+    Args:
+        model (nn.Module): The PyTorch model containing the parameters to optimize.
+        weight_decay (float): The default weight decay value to apply to parameters that are
+            not excluded.
+        lr (float): The learning rate to assign to all parameter groups.
+        no_lr_keywords (set[str] | None, optional): A set of string keywords. If a parameter's
+            name contains any of these keywords, its learning rate is set to 0.0.
+            Defaults to None, which uses an empty set.
+        no_decay_keywords (set[str] | None, optional): A set of string keywords. If a parameter's
+            name contains any of these keywords, its weight decay is set to 0.0.
+            If additional keywords are provided, they will be added to the default set.
+            Defaults to None, which uses a standard set of exclusion keywords:
+            {"norm", "bias", "embedding", "tokenizer", "ln", "scale"}.
+    Returns:
+        list[dict]: A list of dictionaries, where each dictionary represents a parameter group
+            compatible with PyTorch optimizers (e.g., `torch.optim.AdamW`). Each group contains:
+            - "params": The parameter tensor.
+            - "lr": The learning rate.
+            - "weight_decay": The specific weight decay value (0.0 or the provided default).
+    """
+    no_decay_keywords_ = {
+        "norm",
+        "bias",
+        "embedding",
+        "tokenizer",
+        "ln",
+        "scale",
+    }
+    if no_decay_keywords is not None:
+        no_decay_keywords_ = no_decay_keywords_.union(no_decay_keywords)
+    no_lr_keywords_ = set()
+    if no_lr_keywords is not None:
+        no_lr_keywords_ = no_lr_keywords_.union(no_lr_keywords)
     param_groups = []
     for name, param in model.named_parameters():
         if param.requires_grad is False:
             continue
-        param_group = {"params": param, "lr": lr}
-        if (
-            ("norm" in name)
-            or ("bias" in name)
-            or ("embedding" in name)
-            or ("tokenizer" in name)
-            or ("output_projection_point" in name)
-            or ("ln" in name)
-            or ("scale" in name)
-        ):
+        if any(keyword in name for keyword in no_lr_keywords_):
+            lr_ = 0.0
+        else:
+            lr_ = lr
+        param_group = {"params": param, "lr": lr_}
+        if any(keyword in name for keyword in no_decay_keywords_):
             param_group["weight_decay"] = 0.0
         else:
             param_group["weight_decay"] = weight_decay

kostyl/utils/logging.py CHANGED Viewed

@@ -5,9 +5,12 @@ import os
 import sys
 import uuid
 from copy import deepcopy
+from functools import partialmethod
 from pathlib import Path
+from threading import Lock
 from typing import TYPE_CHECKING
 from typing import Literal
+from typing import cast
 from loguru import logger as _base_logger
 from torch.nn.modules.module import _IncompatibleKeys
@@ -16,6 +19,12 @@ from torch.nn.modules.module import _IncompatibleKeys
 if TYPE_CHECKING:
     from loguru import Logger
+    class CustomLogger(Logger):  # noqa: D101
+        def log_once(self, level: str, message: str, *args, **kwargs) -> None: ...  # noqa: ANN003, D102
+        def warning_once(self, message: str, *args, **kwargs) -> None: ...  # noqa: ANN003, D102
+else:
+    CustomLogger = type(_base_logger)
 try:
     import torch.distributed as dist
 except Exception:
@@ -31,10 +40,25 @@ except Exception:
     dist = _Dummy()
-_DEFAULT_SINK_REMOVED = False
-_DEFAULT_FMT = "<level>{level: <8}</level> {time:HH:mm:ss.SSS} [{extra[channel]}] <level>{message}</level>"
-_ONLY_MESSAGE_FMT = "<level>{message}</level>"
-_PRESETS = {"default": _DEFAULT_FMT, "only_message": _ONLY_MESSAGE_FMT}
+_once_lock = Lock()
+_once_keys: set[tuple[str, str]] = set()
+def _log_once(self: CustomLogger, level: str, message: str, *args, **kwargs) -> None:  # noqa: ANN003
+    key = (message, level)
+    with _once_lock:
+        if key in _once_keys:
+            return
+        _once_keys.add(key)
+    self.log(level, message, *args, **kwargs)
+    return
+_base_logger = cast(CustomLogger, _base_logger)
+_base_logger.log_once = _log_once  # pyright: ignore[reportAttributeAccessIssue]
+_base_logger.warning_once = partialmethod(_log_once, "WARNING")  # pyright: ignore[reportAttributeAccessIssue]
 def _caller_filename() -> str:
@@ -43,6 +67,12 @@ def _caller_filename() -> str:
     return name
+_DEFAULT_SINK_REMOVED = False
+_DEFAULT_FMT = "<level>{level: <8}</level> {time:HH:mm:ss.SSS} [{extra[channel]}] <level>{message}</level>"
+_ONLY_MESSAGE_FMT = "<level>{message}</level>"
+_PRESETS = {"default": _DEFAULT_FMT, "only_message": _ONLY_MESSAGE_FMT}
 def setup_logger(
     name: str | None = None,
     fmt: Literal["default", "only_message"] | str = "default",
@@ -51,7 +81,7 @@ def setup_logger(
     sink=sys.stdout,
     colorize: bool = True,
     serialize: bool = False,
-) -> Logger:
+) -> CustomLogger:
     """
     Returns a bound logger with its own sink and formatting.
@@ -96,8 +126,8 @@ def setup_logger(
         serialize=serialize,
         filter=lambda r: r["extra"].get("logger_id") == logger_id,
     )
-    return _base_logger.bind(logger_id=logger_id, channel=channel)
+    logger = _base_logger.bind(logger_id=logger_id, channel=channel)
+    return cast(CustomLogger, logger)
 def log_incompatible_keys(

{kostyl_toolkit-0.1.1.dist-info → kostyl_toolkit-0.1.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: kostyl-toolkit
-Version: 0.1.1
+Version: 0.1.2
 Summary: Kickass Orchestration System for Training, Yielding & Logging
 Requires-Dist: case-converter>=1.2.0
 Requires-Dist: clearml[s3]>=2.0.2

{kostyl_toolkit-0.1.1.dist-info → kostyl_toolkit-0.1.2.dist-info}/RECORD RENAMED Viewed

@@ -4,10 +4,10 @@ kostyl/ml_core/clearml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
 kostyl/ml_core/clearml/logging_utils.py,sha256=GBjIIZbH_itd5sj7XpvxjkyZwxxGOpEcQ3BiWaJTyq8,1210
 kostyl/ml_core/clearml/pulling_utils.py,sha256=Yf70ux8dS0_ENdvfbNQkXOrDxwd4ed2GnRCmOR2ppEk,3252
 kostyl/ml_core/configs/__init__.py,sha256=RKSHp5J8eksqMxFu5xkpSxyswSpgKhrHLjltLS3yZXc,896
-kostyl/ml_core/configs/config_base.py,sha256=zla6qwIzIIg4i0ETG7Er2qYfc48hoGOPIbLRq1xqJPs,5376
+kostyl/ml_core/configs/config_base.py,sha256=ctjedEKZbwByUr5HA-Ic0dVCPWPAIPL9kK8T0S-BOvk,5276
 kostyl/ml_core/configs/hyperparams.py,sha256=iKzuFOAL3xSVGjXlvRX_mbSBt0pqh6RQAxyHPmN-Bik,2974
-kostyl/ml_core/configs/training_params.py,sha256=ocPC2dAUFpxu2jgWvPFDdVFcgAsQEonJM4yPzGSpx20,2587
-kostyl/ml_core/dist_utils.py,sha256=C9lzT37jl7C2igQzqtvXNTdz3NJ6ORzrBRjIDl7PC7o,2221
+kostyl/ml_core/configs/training_params.py,sha256=a8ewftu_xDatlbJ6qk_87WkuRpdThBGYQA2fHbjb9RU,2598
+kostyl/ml_core/dist_utils.py,sha256=G8atjzkRbXZZiZh9rdEYBmeXqX26rJdDDovft2n6xiU,3201
 kostyl/ml_core/lightning/__init__.py,sha256=-F3JAyq8KU1d-nACWryGu8d1CbvWbQ1rXFdeRwfE2X8,175
 kostyl/ml_core/lightning/callbacks/__init__.py,sha256=Vd-rozY4T9Prr3IMqbliXxj6sC6y9XsovHQqRwzc2HI,297
 kostyl/ml_core/lightning/callbacks/checkpoint.py,sha256=RgkNNmsbAz9fdMYGlEgn9Qs_DF8LiuY7Bp1Hu4ZW98s,1946
@@ -20,14 +20,14 @@ kostyl/ml_core/lightning/loggers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
 kostyl/ml_core/lightning/loggers/tb_logger.py,sha256=Zh9n-lLu-bXMld-FIUO3lJfCyDf0IQFhS3JVShDJmvg,937
 kostyl/ml_core/lightning/steps_estimation.py,sha256=fTZ0IrUEZV3H6VYlx4GYn56oco56mMiB7FO9F0Z7qc4,1511
 kostyl/ml_core/metrics_formatting.py,sha256=w0rTz61z0Um_d2pomYLvcQFcZX_C-KolZcIPRsa1efE,1421
-kostyl/ml_core/params_groups.py,sha256=AKQABbor3eOsNihzm0C3MvzbHRgwFxb5XTXUF3wdRbw,1542
+kostyl/ml_core/params_groups.py,sha256=nUyw5d06Pvy9QPiYtZzLYR87xwXqJLxbHthgQH8oSCM,3583
 kostyl/ml_core/schedulers/__init__.py,sha256=bxXbsU_WYnVbhvNNnuI7cOAh2Axz7D25TaleBTZhYfc,197
 kostyl/ml_core/schedulers/base.py,sha256=9M2iOoOVSRojR_liPX1qo3Nn4iMXSM5ZJuAFWZTulUk,1327
 kostyl/ml_core/schedulers/composite.py,sha256=ee4xlMDMMtjKPkbTF2ue9GTr9DuGCGjZWf11mHbi6aE,2387
 kostyl/ml_core/schedulers/cosine.py,sha256=jufULVHn_L_ZZEc3ZTG3QCY_pc0jlAMH5Aw496T31jo,8203
 kostyl/utils/__init__.py,sha256=hkpmB6c5pr4Ti5BshOROebb7cvjDZfNCw83qZ_FFKMM,240
 kostyl/utils/dict_manipulations.py,sha256=e3vBicID74nYP8lHkVTQc4-IQwoJimrbFELy5uSF6Gk,1073
-kostyl/utils/logging.py,sha256=126Zs0ym9w8IgM8wdUVgVp2kLqgRZM-cWtG6bQ--InI,4214
-kostyl_toolkit-0.1.1.dist-info/WHEEL,sha256=YUH1mBqsx8Dh2cQG2rlcuRYUhJddG9iClegy4IgnHik,79
-kostyl_toolkit-0.1.1.dist-info/METADATA,sha256=nhYquV3AKueHR_DVVvSw0jWgZTQJSIkmvXL6mVGcqeQ,4053
-kostyl_toolkit-0.1.1.dist-info/RECORD,,
+kostyl/utils/logging.py,sha256=3MvfDPArZhwakHu5nMlp_LpOsWg0E0SP26y41clsBtA,5232
+kostyl_toolkit-0.1.2.dist-info/WHEEL,sha256=YUH1mBqsx8Dh2cQG2rlcuRYUhJddG9iClegy4IgnHik,79
+kostyl_toolkit-0.1.2.dist-info/METADATA,sha256=4aZUWVa-k5qqIZJFlOqyCLSwT3S-V_znIRMR1d3_tJ0,4053
+kostyl_toolkit-0.1.2.dist-info/RECORD,,

{kostyl_toolkit-0.1.1.dist-info → kostyl_toolkit-0.1.2.dist-info}/WHEEL RENAMED Viewed

File without changes

kostyl-toolkit 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

kostyl-toolkit 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl