nshtrainer 0.42.0__py3-none-any.whl → 0.43.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nshtrainer/__init__.py +2 -0
- nshtrainer/_callback.py +2 -0
- nshtrainer/_checkpoint/loader.py +2 -0
- nshtrainer/_checkpoint/metadata.py +2 -0
- nshtrainer/_checkpoint/saver.py +2 -0
- nshtrainer/_directory.py +4 -2
- nshtrainer/_experimental/__init__.py +2 -0
- nshtrainer/_hf_hub.py +2 -0
- nshtrainer/callbacks/__init__.py +45 -29
- nshtrainer/callbacks/_throughput_monitor_callback.py +2 -0
- nshtrainer/callbacks/actsave.py +2 -0
- nshtrainer/callbacks/base.py +2 -0
- nshtrainer/callbacks/checkpoint/__init__.py +6 -2
- nshtrainer/callbacks/checkpoint/_base.py +2 -0
- nshtrainer/callbacks/checkpoint/best_checkpoint.py +2 -0
- nshtrainer/callbacks/checkpoint/last_checkpoint.py +4 -2
- nshtrainer/callbacks/checkpoint/on_exception_checkpoint.py +6 -2
- nshtrainer/callbacks/debug_flag.py +2 -0
- nshtrainer/callbacks/directory_setup.py +4 -2
- nshtrainer/callbacks/early_stopping.py +6 -4
- nshtrainer/callbacks/ema.py +5 -3
- nshtrainer/callbacks/finite_checks.py +3 -1
- nshtrainer/callbacks/gradient_skipping.py +6 -4
- nshtrainer/callbacks/interval.py +2 -0
- nshtrainer/callbacks/log_epoch.py +13 -1
- nshtrainer/callbacks/norm_logging.py +4 -2
- nshtrainer/callbacks/print_table.py +3 -1
- nshtrainer/callbacks/rlp_sanity_checks.py +4 -2
- nshtrainer/callbacks/shared_parameters.py +4 -2
- nshtrainer/callbacks/throughput_monitor.py +2 -0
- nshtrainer/callbacks/timer.py +5 -3
- nshtrainer/callbacks/wandb_upload_code.py +4 -2
- nshtrainer/callbacks/wandb_watch.py +4 -2
- nshtrainer/config/__init__.py +130 -90
- nshtrainer/config/_checkpoint/loader/__init__.py +10 -8
- nshtrainer/config/_checkpoint/metadata/__init__.py +6 -4
- nshtrainer/config/_directory/__init__.py +9 -3
- nshtrainer/config/_hf_hub/__init__.py +6 -4
- nshtrainer/config/callbacks/__init__.py +82 -42
- nshtrainer/config/callbacks/actsave/__init__.py +4 -2
- nshtrainer/config/callbacks/base/__init__.py +2 -0
- nshtrainer/config/callbacks/checkpoint/__init__.py +6 -4
- nshtrainer/config/callbacks/checkpoint/_base/__init__.py +6 -4
- nshtrainer/config/callbacks/checkpoint/best_checkpoint/__init__.py +2 -0
- nshtrainer/config/callbacks/checkpoint/last_checkpoint/__init__.py +6 -4
- nshtrainer/config/callbacks/checkpoint/on_exception_checkpoint/__init__.py +6 -4
- nshtrainer/config/callbacks/debug_flag/__init__.py +6 -4
- nshtrainer/config/callbacks/directory_setup/__init__.py +7 -5
- nshtrainer/config/callbacks/early_stopping/__init__.py +9 -7
- nshtrainer/config/callbacks/ema/__init__.py +5 -3
- nshtrainer/config/callbacks/finite_checks/__init__.py +7 -5
- nshtrainer/config/callbacks/gradient_skipping/__init__.py +7 -5
- nshtrainer/config/callbacks/norm_logging/__init__.py +9 -5
- nshtrainer/config/callbacks/print_table/__init__.py +7 -5
- nshtrainer/config/callbacks/rlp_sanity_checks/__init__.py +7 -5
- nshtrainer/config/callbacks/shared_parameters/__init__.py +7 -5
- nshtrainer/config/callbacks/throughput_monitor/__init__.py +6 -4
- nshtrainer/config/callbacks/timer/__init__.py +9 -5
- nshtrainer/config/callbacks/wandb_upload_code/__init__.py +7 -5
- nshtrainer/config/callbacks/wandb_watch/__init__.py +9 -5
- nshtrainer/config/loggers/__init__.py +18 -10
- nshtrainer/config/loggers/_base/__init__.py +2 -0
- nshtrainer/config/loggers/csv/__init__.py +2 -0
- nshtrainer/config/loggers/tensorboard/__init__.py +2 -0
- nshtrainer/config/loggers/wandb/__init__.py +18 -10
- nshtrainer/config/lr_scheduler/__init__.py +2 -0
- nshtrainer/config/lr_scheduler/_base/__init__.py +2 -0
- nshtrainer/config/lr_scheduler/linear_warmup_cosine/__init__.py +2 -0
- nshtrainer/config/lr_scheduler/reduce_lr_on_plateau/__init__.py +6 -4
- nshtrainer/config/metrics/__init__.py +2 -0
- nshtrainer/config/metrics/_config/__init__.py +2 -0
- nshtrainer/config/model/__init__.py +8 -6
- nshtrainer/config/model/base/__init__.py +4 -2
- nshtrainer/config/model/config/__init__.py +8 -6
- nshtrainer/config/model/mixins/logger/__init__.py +2 -0
- nshtrainer/config/nn/__init__.py +16 -14
- nshtrainer/config/nn/mlp/__init__.py +2 -0
- nshtrainer/config/nn/nonlinearity/__init__.py +26 -24
- nshtrainer/config/optimizer/__init__.py +2 -0
- nshtrainer/config/profiler/__init__.py +2 -0
- nshtrainer/config/profiler/_base/__init__.py +2 -0
- nshtrainer/config/profiler/advanced/__init__.py +6 -4
- nshtrainer/config/profiler/pytorch/__init__.py +6 -4
- nshtrainer/config/profiler/simple/__init__.py +6 -4
- nshtrainer/config/runner/__init__.py +2 -0
- nshtrainer/config/trainer/_config/__init__.py +43 -39
- nshtrainer/config/trainer/checkpoint_connector/__init__.py +2 -0
- nshtrainer/config/util/_environment_info/__init__.py +20 -18
- nshtrainer/config/util/config/__init__.py +2 -0
- nshtrainer/config/util/config/dtype/__init__.py +2 -0
- nshtrainer/config/util/config/duration/__init__.py +2 -0
- nshtrainer/data/__init__.py +2 -0
- nshtrainer/data/balanced_batch_sampler.py +2 -0
- nshtrainer/data/datamodule.py +2 -0
- nshtrainer/data/transform.py +2 -0
- nshtrainer/ll/__init__.py +2 -0
- nshtrainer/ll/_experimental.py +2 -0
- nshtrainer/ll/actsave.py +2 -0
- nshtrainer/ll/callbacks.py +2 -0
- nshtrainer/ll/config.py +2 -0
- nshtrainer/ll/data.py +2 -0
- nshtrainer/ll/log.py +2 -0
- nshtrainer/ll/lr_scheduler.py +2 -0
- nshtrainer/ll/model.py +2 -0
- nshtrainer/ll/nn.py +2 -0
- nshtrainer/ll/optimizer.py +2 -0
- nshtrainer/ll/runner.py +2 -0
- nshtrainer/ll/snapshot.py +2 -0
- nshtrainer/ll/snoop.py +2 -0
- nshtrainer/ll/trainer.py +2 -0
- nshtrainer/ll/typecheck.py +2 -0
- nshtrainer/ll/util.py +2 -0
- nshtrainer/loggers/__init__.py +2 -0
- nshtrainer/loggers/_base.py +2 -0
- nshtrainer/loggers/csv.py +2 -0
- nshtrainer/loggers/tensorboard.py +2 -0
- nshtrainer/loggers/wandb.py +6 -4
- nshtrainer/lr_scheduler/__init__.py +2 -0
- nshtrainer/lr_scheduler/_base.py +2 -0
- nshtrainer/lr_scheduler/linear_warmup_cosine.py +2 -0
- nshtrainer/lr_scheduler/reduce_lr_on_plateau.py +2 -0
- nshtrainer/metrics/__init__.py +2 -0
- nshtrainer/metrics/_config.py +2 -0
- nshtrainer/model/__init__.py +2 -0
- nshtrainer/model/base.py +2 -0
- nshtrainer/model/config.py +2 -0
- nshtrainer/model/mixins/callback.py +2 -0
- nshtrainer/model/mixins/logger.py +2 -0
- nshtrainer/nn/__init__.py +2 -0
- nshtrainer/nn/mlp.py +2 -0
- nshtrainer/nn/module_dict.py +2 -0
- nshtrainer/nn/module_list.py +2 -0
- nshtrainer/nn/nonlinearity.py +2 -0
- nshtrainer/optimizer.py +2 -0
- nshtrainer/profiler/__init__.py +2 -0
- nshtrainer/profiler/_base.py +2 -0
- nshtrainer/profiler/advanced.py +2 -0
- nshtrainer/profiler/pytorch.py +2 -0
- nshtrainer/profiler/simple.py +2 -0
- nshtrainer/runner.py +2 -0
- nshtrainer/scripts/find_packages.py +2 -0
- nshtrainer/trainer/__init__.py +2 -0
- nshtrainer/trainer/_config.py +16 -13
- nshtrainer/trainer/_runtime_callback.py +2 -0
- nshtrainer/trainer/checkpoint_connector.py +2 -0
- nshtrainer/trainer/signal_connector.py +2 -0
- nshtrainer/trainer/trainer.py +2 -0
- nshtrainer/util/_environment_info.py +2 -0
- nshtrainer/util/bf16.py +2 -0
- nshtrainer/util/config/__init__.py +2 -0
- nshtrainer/util/config/dtype.py +2 -0
- nshtrainer/util/config/duration.py +2 -0
- nshtrainer/util/environment.py +2 -0
- nshtrainer/util/path.py +2 -0
- nshtrainer/util/seed.py +2 -0
- nshtrainer/util/slurm.py +3 -0
- nshtrainer/util/typed.py +2 -0
- nshtrainer/util/typing_utils.py +2 -0
- {nshtrainer-0.42.0.dist-info → nshtrainer-0.43.0.dist-info}/METADATA +1 -1
- nshtrainer-0.43.0.dist-info/RECORD +162 -0
- nshtrainer-0.42.0.dist-info/RECORD +0 -162
- {nshtrainer-0.42.0.dist-info → nshtrainer-0.43.0.dist-info}/WHEEL +0 -0
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
__codegen__ = True
|
|
2
4
|
|
|
3
5
|
from typing import TYPE_CHECKING
|
|
@@ -43,30 +45,30 @@ else:
|
|
|
43
45
|
|
|
44
46
|
if name in globals():
|
|
45
47
|
return globals()[name]
|
|
46
|
-
if name == "
|
|
47
|
-
return importlib.import_module(
|
|
48
|
-
"nshtrainer.util._environment_info"
|
|
49
|
-
).EnvironmentPackageConfig
|
|
50
|
-
if name == "EnvironmentSnapshotConfig":
|
|
48
|
+
if name == "EnvironmentLinuxEnvironmentConfig":
|
|
51
49
|
return importlib.import_module(
|
|
52
50
|
"nshtrainer.util._environment_info"
|
|
53
|
-
).
|
|
51
|
+
).EnvironmentLinuxEnvironmentConfig
|
|
54
52
|
if name == "EnvironmentLSFInformationConfig":
|
|
55
53
|
return importlib.import_module(
|
|
56
54
|
"nshtrainer.util._environment_info"
|
|
57
55
|
).EnvironmentLSFInformationConfig
|
|
58
|
-
if name == "
|
|
56
|
+
if name == "EnvironmentGPUConfig":
|
|
59
57
|
return importlib.import_module(
|
|
60
58
|
"nshtrainer.util._environment_info"
|
|
61
|
-
).
|
|
62
|
-
if name == "
|
|
59
|
+
).EnvironmentGPUConfig
|
|
60
|
+
if name == "EnvironmentPackageConfig":
|
|
63
61
|
return importlib.import_module(
|
|
64
62
|
"nshtrainer.util._environment_info"
|
|
65
|
-
).
|
|
66
|
-
if name == "
|
|
63
|
+
).EnvironmentPackageConfig
|
|
64
|
+
if name == "EnvironmentHardwareConfig":
|
|
67
65
|
return importlib.import_module(
|
|
68
66
|
"nshtrainer.util._environment_info"
|
|
69
|
-
).
|
|
67
|
+
).EnvironmentHardwareConfig
|
|
68
|
+
if name == "EnvironmentSnapshotConfig":
|
|
69
|
+
return importlib.import_module(
|
|
70
|
+
"nshtrainer.util._environment_info"
|
|
71
|
+
).EnvironmentSnapshotConfig
|
|
70
72
|
if name == "EnvironmentClassInformationConfig":
|
|
71
73
|
return importlib.import_module(
|
|
72
74
|
"nshtrainer.util._environment_info"
|
|
@@ -75,18 +77,18 @@ else:
|
|
|
75
77
|
return importlib.import_module(
|
|
76
78
|
"nshtrainer.util._environment_info"
|
|
77
79
|
).GitRepositoryConfig
|
|
78
|
-
if name == "
|
|
80
|
+
if name == "EnvironmentConfig":
|
|
79
81
|
return importlib.import_module(
|
|
80
82
|
"nshtrainer.util._environment_info"
|
|
81
|
-
).
|
|
82
|
-
if name == "
|
|
83
|
+
).EnvironmentConfig
|
|
84
|
+
if name == "EnvironmentCUDAConfig":
|
|
83
85
|
return importlib.import_module(
|
|
84
86
|
"nshtrainer.util._environment_info"
|
|
85
|
-
).
|
|
86
|
-
if name == "
|
|
87
|
+
).EnvironmentCUDAConfig
|
|
88
|
+
if name == "EnvironmentSLURMInformationConfig":
|
|
87
89
|
return importlib.import_module(
|
|
88
90
|
"nshtrainer.util._environment_info"
|
|
89
|
-
).
|
|
91
|
+
).EnvironmentSLURMInformationConfig
|
|
90
92
|
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
|
|
91
93
|
|
|
92
94
|
# Submodule exports
|
nshtrainer/data/__init__.py
CHANGED
nshtrainer/data/datamodule.py
CHANGED
nshtrainer/data/transform.py
CHANGED
nshtrainer/ll/__init__.py
CHANGED
nshtrainer/ll/_experimental.py
CHANGED
nshtrainer/ll/actsave.py
CHANGED
nshtrainer/ll/callbacks.py
CHANGED
nshtrainer/ll/config.py
CHANGED
nshtrainer/ll/data.py
CHANGED
nshtrainer/ll/log.py
CHANGED
nshtrainer/ll/lr_scheduler.py
CHANGED
nshtrainer/ll/model.py
CHANGED
nshtrainer/ll/nn.py
CHANGED
nshtrainer/ll/optimizer.py
CHANGED
nshtrainer/ll/runner.py
CHANGED
nshtrainer/ll/snapshot.py
CHANGED
nshtrainer/ll/snoop.py
CHANGED
nshtrainer/ll/trainer.py
CHANGED
nshtrainer/ll/typecheck.py
CHANGED
nshtrainer/ll/util.py
CHANGED
nshtrainer/loggers/__init__.py
CHANGED
nshtrainer/loggers/_base.py
CHANGED
nshtrainer/loggers/csv.py
CHANGED
nshtrainer/loggers/wandb.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import importlib.metadata
|
|
2
4
|
import logging
|
|
3
5
|
from typing import TYPE_CHECKING, Literal
|
|
@@ -8,8 +10,8 @@ from packaging import version
|
|
|
8
10
|
from typing_extensions import assert_never, override
|
|
9
11
|
|
|
10
12
|
from ..callbacks.base import CallbackConfigBase
|
|
11
|
-
from ..callbacks.wandb_upload_code import
|
|
12
|
-
from ..callbacks.wandb_watch import
|
|
13
|
+
from ..callbacks.wandb_upload_code import WandbUploadCodeCallbackConfig
|
|
14
|
+
from ..callbacks.wandb_watch import WandbWatchCallbackConfig
|
|
13
15
|
from ._base import BaseLoggerConfig
|
|
14
16
|
|
|
15
17
|
if TYPE_CHECKING:
|
|
@@ -92,10 +94,10 @@ class WandbLoggerConfig(CallbackConfigBase, BaseLoggerConfig):
|
|
|
92
94
|
- "none" or False: Do not log any checkpoints
|
|
93
95
|
"""
|
|
94
96
|
|
|
95
|
-
log_code:
|
|
97
|
+
log_code: WandbUploadCodeCallbackConfig | None = WandbUploadCodeCallbackConfig()
|
|
96
98
|
"""WandB code upload configuration. Used to upload code to WandB."""
|
|
97
99
|
|
|
98
|
-
watch:
|
|
100
|
+
watch: WandbWatchCallbackConfig | None = WandbWatchCallbackConfig()
|
|
99
101
|
"""WandB model watch configuration. Used to log model architecture, gradients, and parameters."""
|
|
100
102
|
|
|
101
103
|
offline: bool = False
|
nshtrainer/lr_scheduler/_base.py
CHANGED
nshtrainer/metrics/__init__.py
CHANGED
nshtrainer/metrics/_config.py
CHANGED
nshtrainer/model/__init__.py
CHANGED
nshtrainer/model/base.py
CHANGED
nshtrainer/model/config.py
CHANGED
nshtrainer/nn/__init__.py
CHANGED
nshtrainer/nn/mlp.py
CHANGED
nshtrainer/nn/module_dict.py
CHANGED
nshtrainer/nn/module_list.py
CHANGED
nshtrainer/nn/nonlinearity.py
CHANGED
nshtrainer/optimizer.py
CHANGED
nshtrainer/profiler/__init__.py
CHANGED
nshtrainer/profiler/_base.py
CHANGED
nshtrainer/profiler/advanced.py
CHANGED
nshtrainer/profiler/pytorch.py
CHANGED
nshtrainer/profiler/simple.py
CHANGED
nshtrainer/runner.py
CHANGED
nshtrainer/trainer/__init__.py
CHANGED
nshtrainer/trainer/_config.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
from collections.abc import Iterable, Sequence
|
|
3
5
|
from datetime import timedelta
|
|
@@ -30,14 +32,15 @@ from .._hf_hub import HuggingFaceHubConfig
|
|
|
30
32
|
from ..callbacks import (
|
|
31
33
|
BestCheckpointCallbackConfig,
|
|
32
34
|
CallbackConfig,
|
|
33
|
-
|
|
35
|
+
EarlyStoppingCallbackConfig,
|
|
34
36
|
LastCheckpointCallbackConfig,
|
|
35
37
|
OnExceptionCheckpointCallbackConfig,
|
|
36
38
|
)
|
|
37
39
|
from ..callbacks.base import CallbackConfigBase
|
|
38
40
|
from ..callbacks.debug_flag import DebugFlagCallbackConfig
|
|
39
|
-
from ..callbacks.
|
|
40
|
-
from ..callbacks.
|
|
41
|
+
from ..callbacks.log_epoch import LogEpochCallbackConfig
|
|
42
|
+
from ..callbacks.rlp_sanity_checks import RLPSanityChecksCallbackConfig
|
|
43
|
+
from ..callbacks.shared_parameters import SharedParametersCallbackConfig
|
|
41
44
|
from ..loggers import (
|
|
42
45
|
CSVLoggerConfig,
|
|
43
46
|
LoggerConfig,
|
|
@@ -65,7 +68,7 @@ class LoggingConfig(CallbackConfigBase):
|
|
|
65
68
|
|
|
66
69
|
log_lr: bool | Literal["step", "epoch"] = True
|
|
67
70
|
"""If enabled, will register a `LearningRateMonitor` callback to log the learning rate to the logger."""
|
|
68
|
-
log_epoch:
|
|
71
|
+
log_epoch: LogEpochCallbackConfig | None = LogEpochCallbackConfig()
|
|
69
72
|
"""If enabled, will log the fractional epoch number to the logger."""
|
|
70
73
|
|
|
71
74
|
actsave_logged_metrics: bool = False
|
|
@@ -136,9 +139,7 @@ class LoggingConfig(CallbackConfigBase):
|
|
|
136
139
|
yield LearningRateMonitor(logging_interval=logging_interval)
|
|
137
140
|
|
|
138
141
|
if self.log_epoch:
|
|
139
|
-
from
|
|
140
|
-
|
|
141
|
-
yield LogEpochCallback()
|
|
142
|
+
yield from self.log_epoch.create_callbacks(root_config)
|
|
142
143
|
|
|
143
144
|
for logger in self.loggers:
|
|
144
145
|
if not logger or not isinstance(logger, CallbackConfigBase):
|
|
@@ -172,9 +173,9 @@ class OptimizationConfig(CallbackConfigBase):
|
|
|
172
173
|
|
|
173
174
|
@override
|
|
174
175
|
def create_callbacks(self, root_config):
|
|
175
|
-
from ..callbacks.norm_logging import
|
|
176
|
+
from ..callbacks.norm_logging import NormLoggingCallbackConfig
|
|
176
177
|
|
|
177
|
-
yield from
|
|
178
|
+
yield from NormLoggingCallbackConfig(
|
|
178
179
|
log_grad_norm=self.log_grad_norm,
|
|
179
180
|
log_grad_norm_per_param=self.log_grad_norm_per_param,
|
|
180
181
|
log_param_norm=self.log_param_norm,
|
|
@@ -564,8 +565,8 @@ class TrainerConfig(C.Config):
|
|
|
564
565
|
reproducibility: ReproducibilityConfig = ReproducibilityConfig()
|
|
565
566
|
"""Reproducibility configuration options."""
|
|
566
567
|
|
|
567
|
-
reduce_lr_on_plateau_sanity_checking:
|
|
568
|
-
|
|
568
|
+
reduce_lr_on_plateau_sanity_checking: RLPSanityChecksCallbackConfig | None = (
|
|
569
|
+
RLPSanityChecksCallbackConfig()
|
|
569
570
|
)
|
|
570
571
|
"""
|
|
571
572
|
If enabled, will do some sanity checks if the `ReduceLROnPlateau` scheduler is used:
|
|
@@ -573,7 +574,7 @@ class TrainerConfig(C.Config):
|
|
|
573
574
|
- If the `interval` is epoch, it makes sure that validation is called every `frequency` epochs.
|
|
574
575
|
"""
|
|
575
576
|
|
|
576
|
-
early_stopping:
|
|
577
|
+
early_stopping: EarlyStoppingCallbackConfig | None = None
|
|
577
578
|
"""Early stopping configuration options."""
|
|
578
579
|
|
|
579
580
|
profiler: ProfilerConfig | None = None
|
|
@@ -741,7 +742,9 @@ class TrainerConfig(C.Config):
|
|
|
741
742
|
automatic selection based on the chosen accelerator. Default: ``"auto"``.
|
|
742
743
|
"""
|
|
743
744
|
|
|
744
|
-
shared_parameters:
|
|
745
|
+
shared_parameters: SharedParametersCallbackConfig | None = (
|
|
746
|
+
SharedParametersCallbackConfig()
|
|
747
|
+
)
|
|
745
748
|
"""If enabled, the model supports scaling the gradients of shared parameters that
|
|
746
749
|
are registered in the self.shared_parameters list. This is useful for models that
|
|
747
750
|
share parameters across multiple modules (e.g., in a GPT model) and want to
|
nshtrainer/trainer/trainer.py
CHANGED
nshtrainer/util/bf16.py
CHANGED
nshtrainer/util/config/dtype.py
CHANGED
nshtrainer/util/environment.py
CHANGED
nshtrainer/util/path.py
CHANGED
nshtrainer/util/seed.py
CHANGED
nshtrainer/util/slurm.py
CHANGED
nshtrainer/util/typed.py
CHANGED