kostyl-toolkit 0.1.40__tar.gz → 0.1.43__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/PKG-INFO +3 -3
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/configs/training_settings.py +15 -7
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/data_collator.py +2 -2
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/optim/factory.py +51 -26
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/params_groups.py +2 -2
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/pyproject.toml +4 -14
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/README.md +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/__init__.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/__init__.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/base_uploader.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/configs/__init__.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/configs/hyperparams.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/configs/mixins.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/dist_utils.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/__init__.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/clearml/__init__.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/clearml/checkpoint_uploader.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/clearml/config_mixin.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/clearml/dataset_utils.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/clearml/loading_utils.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/clearml/version_utils.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/lightning/__init__.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/lightning/callbacks/__init__.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/lightning/callbacks/checkpoint.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/lightning/callbacks/early_stopping.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/lightning/loggers/__init__.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/lightning/loggers/tb_logger.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/lightning/metrics_formatting.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/lightning/mixins.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/lightning/module.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/lightning/utils.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/optim/__init__.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/optim/schedulers/__init__.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/optim/schedulers/base.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/optim/schedulers/composite.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/optim/schedulers/cosine.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/optim/schedulers/linear.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/optim/schedulers/plateau.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/utils/__init__.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/utils/dict_manipulations.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/utils/fs.py +0 -0
- {kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/utils/logging.py +0 -0
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: kostyl-toolkit
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.43
|
|
4
4
|
Summary: Kickass Orchestration System for Training, Yielding & Logging
|
|
5
5
|
Requires-Dist: case-converter>=1.2.0
|
|
6
6
|
Requires-Dist: loguru>=0.7.3
|
|
7
7
|
Requires-Dist: case-converter>=1.2.0 ; extra == 'ml'
|
|
8
8
|
Requires-Dist: pydantic>=2.12.4 ; extra == 'ml'
|
|
9
|
-
Requires-Dist: torch
|
|
10
|
-
Requires-Dist: transformers
|
|
9
|
+
Requires-Dist: torch ; extra == 'ml'
|
|
10
|
+
Requires-Dist: transformers ; extra == 'ml'
|
|
11
11
|
Requires-Python: >=3.12
|
|
12
12
|
Provides-Extra: ml
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
@@ -38,18 +38,18 @@ class FSDP1StrategyConfig(BaseModel):
|
|
|
38
38
|
"""Fully Sharded Data Parallel (FSDP) strategy configuration."""
|
|
39
39
|
|
|
40
40
|
type: Literal["fsdp1"]
|
|
41
|
-
param_dtype: DTYPE | None
|
|
42
|
-
reduce_dtype: DTYPE | None
|
|
43
|
-
buffer_dtype: DTYPE | None
|
|
41
|
+
param_dtype: DTYPE | None = None
|
|
42
|
+
reduce_dtype: DTYPE | None = None
|
|
43
|
+
buffer_dtype: DTYPE | None = None
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
class FSDP2StrategyConfig(BaseModel):
|
|
47
47
|
"""Fully Sharded Data Parallel (FSDP) strategy configuration."""
|
|
48
48
|
|
|
49
49
|
type: Literal["fsdp2"]
|
|
50
|
-
param_dtype: DTYPE | None
|
|
51
|
-
reduce_dtype: DTYPE | None
|
|
52
|
-
buffer_dtype: DTYPE | None
|
|
50
|
+
param_dtype: DTYPE | None = None
|
|
51
|
+
reduce_dtype: DTYPE | None = None
|
|
52
|
+
buffer_dtype: DTYPE | None = None
|
|
53
53
|
|
|
54
54
|
|
|
55
55
|
class DDPStrategyConfig(BaseModel):
|
|
@@ -59,12 +59,20 @@ class DDPStrategyConfig(BaseModel):
|
|
|
59
59
|
find_unused_parameters: bool = False
|
|
60
60
|
|
|
61
61
|
|
|
62
|
+
SUPPORTED_STRATEGIES = (
|
|
63
|
+
FSDP1StrategyConfig
|
|
64
|
+
| FSDP2StrategyConfig
|
|
65
|
+
| SingleDeviceStrategyConfig
|
|
66
|
+
| DDPStrategyConfig
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
62
70
|
class LightningTrainerParameters(BaseModel):
|
|
63
71
|
"""Lightning Trainer parameters configuration."""
|
|
64
72
|
|
|
65
73
|
accelerator: str
|
|
66
74
|
max_epochs: int
|
|
67
|
-
strategy:
|
|
75
|
+
strategy: SUPPORTED_STRATEGIES
|
|
68
76
|
val_check_interval: int | float
|
|
69
77
|
devices: list[int] | int
|
|
70
78
|
precision: PRECISION
|
|
@@ -93,10 +93,10 @@ class BatchCollatorWithKeyAlignment:
|
|
|
93
93
|
if new_key is None:
|
|
94
94
|
continue
|
|
95
95
|
value = item[k]
|
|
96
|
-
if self.max_length is not None and new_key in
|
|
96
|
+
if self.max_length is not None and new_key in {
|
|
97
97
|
"input_ids",
|
|
98
98
|
"attention_mask",
|
|
99
|
-
|
|
99
|
+
}:
|
|
100
100
|
value = self._truncate_data(new_key, value)
|
|
101
101
|
new_item[new_key] = value
|
|
102
102
|
aligned_batch.append(new_item)
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import TypedDict
|
|
2
|
+
from typing import Unpack
|
|
2
3
|
|
|
3
4
|
from torch.optim import Optimizer
|
|
5
|
+
from torch.optim.optimizer import ParamsT
|
|
4
6
|
|
|
5
7
|
from kostyl.ml.configs import OPTIMIZER_CONFIG
|
|
8
|
+
from kostyl.ml.configs import SCHEDULER
|
|
6
9
|
from kostyl.ml.configs import AdamConfig
|
|
7
10
|
from kostyl.ml.configs import AdamWithPrecisionConfig
|
|
8
11
|
from kostyl.ml.configs import MuonConfig
|
|
@@ -18,6 +21,17 @@ from .schedulers import PlateauWithAnnealingScheduler
|
|
|
18
21
|
logger = setup_logger(fmt="only_message")
|
|
19
22
|
|
|
20
23
|
|
|
24
|
+
class OVERRIDABLE_CONFIG_KWARGS(TypedDict, total=False): # noqa: D101, N801
|
|
25
|
+
scheduler_type: SCHEDULER | None
|
|
26
|
+
|
|
27
|
+
freeze_ratio: float | None
|
|
28
|
+
warmup_ratio: float | None
|
|
29
|
+
warmup_value: float | None
|
|
30
|
+
base_value: float | None
|
|
31
|
+
final_value: float | None
|
|
32
|
+
plateau_ratio: float | None
|
|
33
|
+
|
|
34
|
+
|
|
21
35
|
def create_scheduler(
|
|
22
36
|
config: ScheduledParamConfig,
|
|
23
37
|
param_group_field: str,
|
|
@@ -27,6 +41,7 @@ def create_scheduler(
|
|
|
27
41
|
skip_if_zero: bool = False,
|
|
28
42
|
apply_if_field: str | None = None,
|
|
29
43
|
ignore_if_field: str | None = None,
|
|
44
|
+
**kwargs: Unpack[OVERRIDABLE_CONFIG_KWARGS],
|
|
30
45
|
) -> LinearScheduler | CosineScheduler | PlateauWithAnnealingScheduler:
|
|
31
46
|
"""
|
|
32
47
|
Converts a ScheduledParamConfig to a scheduler instance.
|
|
@@ -41,37 +56,48 @@ def create_scheduler(
|
|
|
41
56
|
Default is False.
|
|
42
57
|
apply_if_field: Require this key to be present in a param group before updating.
|
|
43
58
|
ignore_if_field: Skip groups that declare this key in their dictionaries.
|
|
59
|
+
**kwargs: Optional overrides for scheduler configuration parameters (e.g., base_value,
|
|
60
|
+
final_value, warmup_ratio, etc.). These overrides take precedence over the values
|
|
61
|
+
provided in the `config` object.
|
|
44
62
|
|
|
45
63
|
Returns:
|
|
46
64
|
A scheduler instance based on the configuration.
|
|
47
65
|
|
|
48
66
|
"""
|
|
49
|
-
|
|
67
|
+
scheduler_type = kwargs.get("scheduler_type", config.scheduler_type)
|
|
68
|
+
base_value = kwargs.get("base_value", config.base_value)
|
|
69
|
+
final_value = kwargs.get("final_value", config.final_value)
|
|
70
|
+
warmup_ratio = kwargs.get("warmup_ratio", config.warmup_ratio)
|
|
71
|
+
warmup_value = kwargs.get("warmup_value", config.warmup_value)
|
|
72
|
+
freeze_ratio = kwargs.get("freeze_ratio", config.freeze_ratio)
|
|
73
|
+
plateau_ratio = kwargs.get("plateau_ratio", config.plateau_ratio)
|
|
74
|
+
|
|
75
|
+
if scheduler_type is None:
|
|
50
76
|
raise ValueError("scheduler_type must be specified in the config.")
|
|
51
77
|
|
|
52
|
-
if "plateau" in
|
|
53
|
-
|
|
78
|
+
if "plateau" in scheduler_type:
|
|
79
|
+
lookup_scheduler_type = "plateau"
|
|
54
80
|
else:
|
|
55
|
-
|
|
56
|
-
scheduler_cls = SCHEDULER_MAPPING[
|
|
81
|
+
lookup_scheduler_type = scheduler_type
|
|
82
|
+
scheduler_cls = SCHEDULER_MAPPING[lookup_scheduler_type] # type: ignore
|
|
57
83
|
|
|
58
84
|
if issubclass(scheduler_cls, PlateauWithAnnealingScheduler):
|
|
59
|
-
if "cosine" in
|
|
85
|
+
if "cosine" in scheduler_type:
|
|
60
86
|
annealing_type = "cosine"
|
|
61
|
-
elif "linear" in
|
|
87
|
+
elif "linear" in scheduler_type:
|
|
62
88
|
annealing_type = "linear"
|
|
63
89
|
else:
|
|
64
|
-
raise ValueError(f"Unknown annealing_type: {
|
|
90
|
+
raise ValueError(f"Unknown annealing_type: {scheduler_type}")
|
|
65
91
|
scheduler = scheduler_cls(
|
|
66
92
|
optimizer=optim,
|
|
67
93
|
param_group_field=param_group_field,
|
|
68
94
|
num_iters=num_iters,
|
|
69
|
-
plateau_value=
|
|
70
|
-
final_value=
|
|
71
|
-
warmup_ratio=
|
|
72
|
-
warmup_value=
|
|
73
|
-
freeze_ratio=
|
|
74
|
-
plateau_ratio=
|
|
95
|
+
plateau_value=base_value,
|
|
96
|
+
final_value=final_value, # type: ignore
|
|
97
|
+
warmup_ratio=warmup_ratio,
|
|
98
|
+
warmup_value=warmup_value,
|
|
99
|
+
freeze_ratio=freeze_ratio,
|
|
100
|
+
plateau_ratio=plateau_ratio, # type: ignore
|
|
75
101
|
annealing_type=annealing_type,
|
|
76
102
|
multiplier_field=multiplier_field,
|
|
77
103
|
skip_if_zero=skip_if_zero,
|
|
@@ -83,8 +109,8 @@ def create_scheduler(
|
|
|
83
109
|
optimizer=optim,
|
|
84
110
|
param_group_field=param_group_field,
|
|
85
111
|
num_iters=num_iters,
|
|
86
|
-
initial_value=
|
|
87
|
-
final_value=
|
|
112
|
+
initial_value=base_value,
|
|
113
|
+
final_value=final_value, # type: ignore
|
|
88
114
|
multiplier_field=multiplier_field,
|
|
89
115
|
skip_if_zero=skip_if_zero,
|
|
90
116
|
apply_if_field=apply_if_field,
|
|
@@ -95,23 +121,23 @@ def create_scheduler(
|
|
|
95
121
|
optimizer=optim,
|
|
96
122
|
param_group_field=param_group_field,
|
|
97
123
|
num_iters=num_iters,
|
|
98
|
-
base_value=
|
|
99
|
-
final_value=
|
|
100
|
-
warmup_ratio=
|
|
101
|
-
warmup_value=
|
|
102
|
-
freeze_ratio=
|
|
124
|
+
base_value=base_value,
|
|
125
|
+
final_value=final_value, # type: ignore
|
|
126
|
+
warmup_ratio=warmup_ratio,
|
|
127
|
+
warmup_value=warmup_value,
|
|
128
|
+
freeze_ratio=freeze_ratio,
|
|
103
129
|
multiplier_field=multiplier_field,
|
|
104
130
|
skip_if_zero=skip_if_zero,
|
|
105
131
|
apply_if_field=apply_if_field,
|
|
106
132
|
ignore_if_field=ignore_if_field,
|
|
107
133
|
)
|
|
108
134
|
else:
|
|
109
|
-
raise ValueError(f"Unsupported scheduler type: {
|
|
135
|
+
raise ValueError(f"Unsupported scheduler type: {scheduler_type}")
|
|
110
136
|
return scheduler
|
|
111
137
|
|
|
112
138
|
|
|
113
139
|
def create_optimizer( # noqa: C901
|
|
114
|
-
parameters_groups:
|
|
140
|
+
parameters_groups: ParamsT,
|
|
115
141
|
optimizer_config: OPTIMIZER_CONFIG,
|
|
116
142
|
lr: float,
|
|
117
143
|
weight_decay: float,
|
|
@@ -120,8 +146,7 @@ def create_optimizer( # noqa: C901
|
|
|
120
146
|
Creates an optimizer based on the configuration.
|
|
121
147
|
|
|
122
148
|
Args:
|
|
123
|
-
parameters_groups:
|
|
124
|
-
(key "params" and per-group options, i.e. "lr", "weight_decay" and etc.).
|
|
149
|
+
parameters_groups: Parameter groups for the optimizer.
|
|
125
150
|
optimizer_config: Configuration for the optimizer.
|
|
126
151
|
lr: Learning rate.
|
|
127
152
|
weight_decay: Weight decay.
|
|
@@ -28,8 +28,8 @@ def create_params_groups(
|
|
|
28
28
|
Defaults to None, which uses an empty set.
|
|
29
29
|
no_decay_keywords (set[str] | None, optional): A set of string keywords. If a parameter's
|
|
30
30
|
name contains any of these keywords, its weight decay is set to 0.0.
|
|
31
|
-
If
|
|
32
|
-
|
|
31
|
+
If keywords are provided, they will be added to the default set, otherwise the default set is used.
|
|
32
|
+
Default set of keywords:
|
|
33
33
|
{"norm", "bias", "embedding", "tokenizer", "ln", "scale"}.
|
|
34
34
|
|
|
35
35
|
Returns:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "kostyl-toolkit"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.43"
|
|
4
4
|
description = "Kickass Orchestration System for Training, Yielding & Logging "
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
@@ -13,8 +13,8 @@ dependencies = [
|
|
|
13
13
|
ml = [
|
|
14
14
|
"case-converter>=1.2.0",
|
|
15
15
|
"pydantic>=2.12.4",
|
|
16
|
-
"torch
|
|
17
|
-
"transformers
|
|
16
|
+
"torch",
|
|
17
|
+
"transformers",
|
|
18
18
|
]
|
|
19
19
|
|
|
20
20
|
[dependency-groups]
|
|
@@ -57,17 +57,7 @@ publish-url = "https://test.pypi.org/legacy/"
|
|
|
57
57
|
explicit = true
|
|
58
58
|
|
|
59
59
|
[tool.pyright]
|
|
60
|
-
typeCheckingMode = "
|
|
61
|
-
reportMissingTypeStubs = false
|
|
62
|
-
reportIncompatibleMethodOverride = true
|
|
63
|
-
exclude = [
|
|
64
|
-
".venv",
|
|
65
|
-
"build",
|
|
66
|
-
"dist",
|
|
67
|
-
"__pycache__",
|
|
68
|
-
"migrations",
|
|
69
|
-
"notebooks",
|
|
70
|
-
]
|
|
60
|
+
typeCheckingMode = "off" # "off" | "basic" | "standard" | "strict"
|
|
71
61
|
|
|
72
62
|
[tool.ruff.lint]
|
|
73
63
|
exclude = [
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/clearml/config_mixin.py
RENAMED
|
File without changes
|
{kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/clearml/dataset_utils.py
RENAMED
|
File without changes
|
{kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/clearml/loading_utils.py
RENAMED
|
File without changes
|
{kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/clearml/version_utils.py
RENAMED
|
File without changes
|
{kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/lightning/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{kostyl_toolkit-0.1.40 → kostyl_toolkit-0.1.43}/kostyl/ml/integrations/lightning/loggers/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|