kostyl-toolkit 0.1.38__tar.gz → 0.1.39__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/PKG-INFO +6 -8
  2. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/configs/__init__.py +2 -2
  3. kostyl_toolkit-0.1.39/kostyl/ml/configs/hyperparams.py +120 -0
  4. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/integrations/lightning/callbacks/checkpoint.py +1 -1
  5. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/integrations/lightning/loggers/tb_logger.py +6 -3
  6. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/integrations/lightning/utils.py +0 -7
  7. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/schedulers/linear.py +7 -7
  8. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/pyproject.toml +3 -5
  9. kostyl_toolkit-0.1.38/kostyl/ml/configs/hyperparams.py +0 -94
  10. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/README.md +0 -0
  11. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/__init__.py +0 -0
  12. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/__init__.py +0 -0
  13. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/base_uploader.py +0 -0
  14. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/configs/mixins.py +0 -0
  15. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/configs/training_settings.py +0 -0
  16. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/data_collator.py +0 -0
  17. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/dist_utils.py +0 -0
  18. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/integrations/__init__.py +0 -0
  19. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/integrations/clearml/__init__.py +0 -0
  20. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/integrations/clearml/checkpoint_uploader.py +0 -0
  21. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/integrations/clearml/config_mixin.py +0 -0
  22. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/integrations/clearml/dataset_utils.py +0 -0
  23. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/integrations/clearml/loading_utils.py +0 -0
  24. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/integrations/clearml/version_utils.py +0 -0
  25. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/integrations/lightning/__init__.py +0 -0
  26. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/integrations/lightning/callbacks/__init__.py +0 -0
  27. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/integrations/lightning/callbacks/early_stopping.py +0 -0
  28. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/integrations/lightning/loggers/__init__.py +0 -0
  29. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/integrations/lightning/metrics_formatting.py +0 -0
  30. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/integrations/lightning/mixins.py +0 -0
  31. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/integrations/lightning/module.py +0 -0
  32. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/params_groups.py +0 -0
  33. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/schedulers/__init__.py +0 -0
  34. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/schedulers/base.py +0 -0
  35. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/schedulers/composite.py +0 -0
  36. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/schedulers/cosine.py +0 -0
  37. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/ml/schedulers/plateau.py +0 -0
  38. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/utils/__init__.py +0 -0
  39. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/utils/dict_manipulations.py +0 -0
  40. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/utils/fs.py +0 -0
  41. {kostyl_toolkit-0.1.38 → kostyl_toolkit-0.1.39}/kostyl/utils/logging.py +0 -0
@@ -1,17 +1,15 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: kostyl-toolkit
3
- Version: 0.1.38
3
+ Version: 0.1.39
4
4
  Summary: Kickass Orchestration System for Training, Yielding & Logging
5
5
  Requires-Dist: case-converter>=1.2.0
6
6
  Requires-Dist: loguru>=0.7.3
7
- Requires-Dist: case-converter>=1.2.0 ; extra == 'ml-core'
8
- Requires-Dist: clearml[s3]>=2.0.2 ; extra == 'ml-core'
9
- Requires-Dist: lightning>=2.5.6 ; extra == 'ml-core'
10
- Requires-Dist: pydantic>=2.12.4 ; extra == 'ml-core'
11
- Requires-Dist: torch>=2.9.1 ; extra == 'ml-core'
12
- Requires-Dist: transformers>=4.57.1 ; extra == 'ml-core'
7
+ Requires-Dist: case-converter>=1.2.0 ; extra == 'ml'
8
+ Requires-Dist: pydantic>=2.12.4 ; extra == 'ml'
9
+ Requires-Dist: torch>=2.9.1 ; extra == 'ml'
10
+ Requires-Dist: transformers>=4.57.1 ; extra == 'ml'
13
11
  Requires-Python: >=3.12
14
- Provides-Extra: ml-core
12
+ Provides-Extra: ml
15
13
  Description-Content-Type: text/markdown
16
14
 
17
15
  # Kostyl Toolkit
@@ -1,6 +1,6 @@
1
+ from .hyperparams import OPTIMIZER
1
2
  from .hyperparams import HyperparamsConfig
2
3
  from .hyperparams import Lr
3
- from .hyperparams import Optimizer
4
4
  from .hyperparams import WeightDecay
5
5
  from .mixins import ConfigLoadingMixin
6
6
  from .training_settings import CheckpointConfig
@@ -14,6 +14,7 @@ from .training_settings import TrainingSettings
14
14
 
15
15
 
16
16
  __all__ = [
17
+ "OPTIMIZER",
17
18
  "CheckpointConfig",
18
19
  "ConfigLoadingMixin",
19
20
  "DDPStrategyConfig",
@@ -23,7 +24,6 @@ __all__ = [
23
24
  "HyperparamsConfig",
24
25
  "LightningTrainerParameters",
25
26
  "Lr",
26
- "Optimizer",
27
27
  "SingleDeviceStrategyConfig",
28
28
  "TrainingSettings",
29
29
  "WeightDecay",
@@ -0,0 +1,120 @@
1
+ from typing import Literal
2
+
3
+ from pydantic import BaseModel
4
+ from pydantic import Field
5
+ from pydantic import model_validator
6
+
7
+ from kostyl.utils.logging import setup_logger
8
+
9
+
10
+ logger = setup_logger(fmt="only_message")
11
+
12
+
13
+ class AdamConfig(BaseModel):
14
+ """Adam optimizer hyperparameters configuration."""
15
+
16
+ type: Literal["AdamW", "Adam"] = "AdamW"
17
+ betas: tuple[float, float] = (0.9, 0.999)
18
+
19
+
20
+ class MuonConfig(BaseModel):
21
+ """Muon optimizer hyperparameters configuration."""
22
+
23
+ type: Literal["Muon"]
24
+ nesterov: bool = True
25
+ ns_coefficients: tuple[float, float, float] = (3.4445, -4.7750, 2.0315)
26
+ ns_steps: int = 5
27
+
28
+
29
+ class AdamWithPrecisionConfig(BaseModel):
30
+ """Adam optimizer with low-precision hyperparameters configuration."""
31
+
32
+ type: Literal["Adam8bit", "Adam4bit", "AdamFp8"]
33
+ betas: tuple[float, float] = (0.9, 0.999)
34
+ block_size: int
35
+ bf16_stochastic_round: bool = False
36
+ is_adamw: bool = True
37
+
38
+
39
+ OPTIMIZER = AdamConfig | AdamWithPrecisionConfig | MuonConfig
40
+ SCHEDULER = Literal[
41
+ "linear",
42
+ "cosine",
43
+ "plateau-with-cosine-annealing",
44
+ "plateau-with-linear-annealing",
45
+ ]
46
+
47
+
48
+ class Lr(BaseModel):
49
+ """Learning rate hyperparameters configuration."""
50
+
51
+ scheduler_type: SCHEDULER | None = None
52
+
53
+ freeze_ratio: float | None = Field(default=None, ge=0, le=1)
54
+ warmup_ratio: float | None = Field(default=None, gt=0, lt=1, validate_default=False)
55
+ warmup_value: float | None = Field(default=None, gt=0, validate_default=False)
56
+ base_value: float
57
+ final_value: float | None = Field(default=None, gt=0, validate_default=False)
58
+ plateau_ratio: float | None = Field(
59
+ default=None, gt=0, lt=1, validate_default=False
60
+ )
61
+
62
+ @model_validator(mode="after")
63
+ def _validate_freeze_ratio(self) -> "Lr":
64
+ if self.scheduler_type is None and self.freeze_ratio is not None:
65
+ logger.warning("use_scheduler is False, freeze_ratio will be ignored.")
66
+ self.freeze_ratio = None
67
+ return self
68
+
69
+ @model_validator(mode="after")
70
+ def _validate_warmup(self) -> "Lr":
71
+ if ((self.warmup_value is not None) or (self.warmup_ratio is not None)) and self.scheduler_type is None: # fmt: skip
72
+ logger.warning(
73
+ "scheduler_type is None, warmup_value and warmup_ratio will be ignored."
74
+ )
75
+ self.warmup_value = None
76
+ self.warmup_ratio = None
77
+ if (self.warmup_value is None) != (self.warmup_ratio is None): # fmt: skip
78
+ raise ValueError(
79
+ "Both warmup_value and warmup_ratio must be provided or neither"
80
+ )
81
+ return self
82
+
83
+ @model_validator(mode="after")
84
+ def _validate_final_value(self) -> "Lr":
85
+ if (self.scheduler_type in {"linear"}) and (self.final_value is not None):
86
+ raise ValueError("If scheduler_type is 'linear', final_value must be None.")
87
+ if (self.scheduler_type is None) and (self.final_value is not None):
88
+ logger.warning("use_scheduler is False, final_value will be ignored.")
89
+ self.final_value = None
90
+ return self
91
+
92
+ @model_validator(mode="after")
93
+ def _validate_plateau_ratio(self) -> "Lr":
94
+ if self.scheduler_type is not None:
95
+ if self.scheduler_type.startswith("plateau") and self.plateau_ratio is None:
96
+ raise ValueError(
97
+ "If scheduler_type is 'plateau-with-*', plateau_ratio must be provided."
98
+ )
99
+ if (
100
+ not self.scheduler_type.startswith("plateau")
101
+ and self.plateau_ratio is not None
102
+ ):
103
+ logger.warning(
104
+ "scheduler_type is not 'plateau-with-*', plateau_ratio will be ignored."
105
+ )
106
+ self.plateau_ratio = None
107
+ return self
108
+
109
+
110
+ class WeightDecay(Lr):
111
+ """Weight decay hyperparameters configuration."""
112
+
113
+
114
+ class HyperparamsConfig(BaseModel):
115
+ """Model training hyperparameters configuration."""
116
+
117
+ grad_clip_val: float | None = Field(default=None, gt=0, validate_default=False)
118
+ optimizer: OPTIMIZER
119
+ lr: Lr
120
+ weight_decay: WeightDecay
@@ -286,7 +286,7 @@ def setup_checkpoint_callback(
286
286
  ckpt_cfg: CheckpointConfig,
287
287
  checkpoint_uploader: ModelCheckpointUploader | None = None,
288
288
  upload_strategy: Literal["only-best", "every-checkpoint"] | None = None,
289
- remove_folder_if_exists: bool = True,
289
+ remove_folder_if_exists: bool = False,
290
290
  ) -> ModelCheckpointWithCheckpointUploader | ModelCheckpoint:
291
291
  """
292
292
  Create and configure a checkpoint callback for model saving.
@@ -11,14 +11,17 @@ logger = setup_logger()
11
11
 
12
12
 
13
13
  def setup_tb_logger(
14
- runs_dir: Path,
14
+ runs_dir: Path, remove_folder_if_exists: bool = False
15
15
  ) -> TensorBoardLogger:
16
16
  """Sets up a TensorBoardLogger for PyTorch Lightning."""
17
17
  if runs_dir.exists():
18
18
  if is_local_zero_rank():
19
19
  logger.warning(f"TensorBoard log directory {runs_dir} already exists.")
20
- rmtree(runs_dir)
21
- logger.warning(f"Removed existing TensorBoard log directory {runs_dir}.")
20
+ if remove_folder_if_exists:
21
+ rmtree(runs_dir)
22
+ logger.warning(
23
+ f"Removed existing TensorBoard log directory {runs_dir}."
24
+ )
22
25
  else:
23
26
  logger.info(f"Creating TensorBoard log directory {runs_dir}.")
24
27
  runs_dir.mkdir(parents=True, exist_ok=True)
@@ -4,16 +4,9 @@ import lightning as L
4
4
  import torch.distributed as dist
5
5
  from torch.distributed import ProcessGroup
6
6
 
7
- from kostyl.ml.configs import DDPStrategyConfig
8
- from kostyl.ml.configs import FSDP1StrategyConfig
9
- from kostyl.ml.configs import SingleDeviceStrategyConfig
10
7
  from kostyl.utils.logging import setup_logger
11
8
 
12
9
 
13
- TRAINING_STRATEGIES = (
14
- FSDP1StrategyConfig | DDPStrategyConfig | SingleDeviceStrategyConfig
15
- )
16
-
17
10
  logger = setup_logger()
18
11
 
19
12
 
@@ -13,21 +13,21 @@ class _LinearScheduleBase(BaseScheduler):
13
13
  self,
14
14
  param_name: str,
15
15
  num_iters: int,
16
- start_value: float,
16
+ base_value: float,
17
17
  final_value: float,
18
18
  ) -> None:
19
19
  self.param_name = param_name
20
20
  self.num_iters = num_iters
21
- self.start_value = start_value
21
+ self.base_value = base_value
22
22
  self.final_value = final_value
23
23
 
24
24
  self.scheduled_values: npt.NDArray[np.float64] = np.array([], dtype=np.float64)
25
- self.current_value_ = self.start_value
25
+ self.current_value_ = self.base_value
26
26
  return
27
27
 
28
28
  def _create_scheduler(self) -> None:
29
29
  self.scheduled_values = np.linspace(
30
- self.start_value, self.final_value, num=self.num_iters, dtype=np.float64
30
+ self.base_value, self.final_value, num=self.num_iters, dtype=np.float64
31
31
  )
32
32
  self._verify()
33
33
  return
@@ -68,7 +68,7 @@ class LinearScheduler(_LinearScheduleBase):
68
68
  optimizer: torch.optim.Optimizer,
69
69
  param_group_field: str,
70
70
  num_iters: int,
71
- start_value: float,
71
+ base_value: float,
72
72
  final_value: float,
73
73
  multiplier_field: str | None = None,
74
74
  skip_if_zero: bool = False,
@@ -82,7 +82,7 @@ class LinearScheduler(_LinearScheduleBase):
82
82
  optimizer: Optimizer whose param groups are updated in-place.
83
83
  param_group_field: Name of the field that receives the scheduled value.
84
84
  num_iters: Number of scheduler iterations before clamping at ``final_value``.
85
- start_value: Value used on the first iteration.
85
+ base_value: Value used on the first iteration.
86
86
  final_value: Value used once ``num_iters`` iterations are consumed.
87
87
  multiplier_field: Optional per-group multiplier applied to the scheduled value.
88
88
  skip_if_zero: Leave groups untouched when their target field equals zero.
@@ -98,7 +98,7 @@ class LinearScheduler(_LinearScheduleBase):
98
98
  super().__init__(
99
99
  param_name=param_group_field,
100
100
  num_iters=num_iters,
101
- start_value=start_value,
101
+ base_value=base_value,
102
102
  final_value=final_value,
103
103
  )
104
104
  self.param_group_field = param_group_field
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "kostyl-toolkit"
3
- version = "0.1.38"
3
+ version = "0.1.39"
4
4
  description = "Kickass Orchestration System for Training, Yielding & Logging "
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -10,10 +10,8 @@ dependencies = [
10
10
  ]
11
11
 
12
12
  [project.optional-dependencies]
13
- ml-core = [
13
+ ml = [
14
14
  "case-converter>=1.2.0",
15
- "clearml[s3]>=2.0.2",
16
- "lightning>=2.5.6",
17
15
  "pydantic>=2.12.4",
18
16
  "torch>=2.9.1",
19
17
  "transformers>=4.57.1",
@@ -30,7 +28,7 @@ dev = [
30
28
  "pyarrow>=22.0.0",
31
29
  ]
32
30
 
33
- ml-core = [
31
+ ml = [
34
32
  "case-converter>=1.2.0",
35
33
  "clearml[s3]>=2.0.2",
36
34
  "lightning>=2.5.6",
@@ -1,94 +0,0 @@
1
- from typing import Literal
2
-
3
- from pydantic import BaseModel
4
- from pydantic import Field
5
- from pydantic import model_validator
6
-
7
- from kostyl.utils.logging import setup_logger
8
-
9
-
10
- logger = setup_logger(fmt="only_message")
11
-
12
-
13
- class AdamConfig(BaseModel):
14
- """AdamW optimizer hyperparameters configuration."""
15
-
16
- type: Literal["AdamW"] = "AdamW"
17
- betas: tuple[float, float] = (0.9, 0.999)
18
- is_adamw: bool = True
19
-
20
-
21
- class AdamWithPrecisionConfig(BaseModel):
22
- """Adam optimizer with low-precision hyperparameters configuration."""
23
-
24
- type: Literal["Adam8bit", "Adam4bit", "AdamFp8"]
25
- betas: tuple[float, float] = (0.9, 0.999)
26
- block_size: int
27
- bf16_stochastic_round: bool = False
28
- is_adamw: bool = True
29
-
30
-
31
- Optimizer = AdamConfig | AdamWithPrecisionConfig
32
-
33
-
34
- class Lr(BaseModel):
35
- """Learning rate hyperparameters configuration."""
36
-
37
- use_scheduler: bool = False
38
- warmup_iters_ratio: float | None = Field(
39
- default=None, gt=0, lt=1, validate_default=False
40
- )
41
- warmup_value: float | None = Field(default=None, gt=0, validate_default=False)
42
- base_value: float
43
- final_value: float | None = Field(default=None, gt=0, validate_default=False)
44
-
45
- @model_validator(mode="after")
46
- def validate_warmup(self) -> "Lr":
47
- """Validates the warmup parameters based on use_scheduler."""
48
- if (self.warmup_value is None) != (self.warmup_iters_ratio is None): # fmt: skip
49
- raise ValueError(
50
- "Both warmup_value and warmup_iters_ratio must be provided or neither"
51
- )
52
- if ((self.warmup_value is not None) or (self.warmup_iters_ratio is not None)) and not self.use_scheduler: # fmt: skip
53
- logger.warning(
54
- "use_scheduler is False, warmup_value and warmup_iters_ratio will be ignored."
55
- )
56
- self.warmup_value = None
57
- self.warmup_iters_ratio = None
58
- return self
59
-
60
- @model_validator(mode="after")
61
- def validate_final_value(self) -> "Lr":
62
- """Validates the final_value based on use_scheduler."""
63
- if self.use_scheduler and (self.final_value is None):
64
- raise ValueError("If use_scheduler is True, final_value must be provided.")
65
- if (not self.use_scheduler) and (self.final_value is not None):
66
- logger.warning("use_scheduler is False, final_value will be ignored.")
67
- self.final_value = None
68
- return self
69
-
70
-
71
- class WeightDecay(BaseModel):
72
- """Weight decay hyperparameters configuration."""
73
-
74
- use_scheduler: bool = False
75
- base_value: float
76
- final_value: float | None = None
77
-
78
- @model_validator(mode="after")
79
- def validate_final_value(self) -> "WeightDecay":
80
- """Validates the final_value based on use_scheduler."""
81
- if self.use_scheduler and self.final_value is None:
82
- raise ValueError("If use_scheduler is True, final_value must be provided.")
83
- if not self.use_scheduler and self.final_value is not None:
84
- logger.warning("use_scheduler is False, final_value will be ignored.")
85
- return self
86
-
87
-
88
- class HyperparamsConfig(BaseModel):
89
- """Model training hyperparameters configuration."""
90
-
91
- grad_clip_val: float | None = Field(default=None, gt=0, validate_default=False)
92
- optimizer: Optimizer
93
- lr: Lr
94
- weight_decay: WeightDecay