PyPI - nshtrainer - Versions diffs - 0.29.0__py3-none-any.whl → 0.30.0__py3-none-any.whl - Mend

nshtrainer 0.29.0py3-none-any.whl → 0.30.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

nshtrainer/__init__.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from . import _experimental as _experimental
 from . import callbacks as callbacks
+from . import config as config
 from . import data as data
 from . import lr_scheduler as lr_scheduler
 from . import metrics as metrics

nshtrainer/callbacks/checkpoint/_base.py CHANGED Viewed

@@ -155,15 +155,15 @@ class CheckpointBase(Checkpoint, ABC, Generic[TConfig]):
         trainer.save_checkpoint(filepath, self.config.save_weights_only)
         if trainer.is_global_zero:
+            # Remove old checkpoints
+            self.remove_old_checkpoints(trainer)
             # Create the latest symlink
             if (symlink_filename := self.symlink_path()) is not None:
                 symlink_path = self.dirpath / symlink_filename
                 _link_checkpoint(filepath, symlink_path, metadata=True)
                 log.debug(f"Created latest symlink: {symlink_path}")
-            # Remove old checkpoints
-            self.remove_old_checkpoints(trainer)
         # Barrier to ensure all processes have saved the checkpoint,
         # deleted the old checkpoints, and created the symlink before continuing
         trainer.strategy.barrier()

nshtrainer/config/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .duration import Duration as Duration
+from .duration import Epochs as Epochs
+from .duration import Steps as Steps

nshtrainer/config/duration.py ADDED Viewed

@@ -0,0 +1,31 @@
+import math
+from typing import Annotated, Literal
+import nshconfig as C
+class Steps(C.Config):
+    kind: Literal["steps"] = "steps"
+    value: Annotated[int, C.Field(ge=0)]
+    """Number of steps."""
+    def to_steps(self, steps_per_epoch: int):
+        return self
+class Epochs(C.Config):
+    kind: Literal["epochs"] = "epochs"
+    value: Annotated[int | float, C.Field(ge=0)]
+    """Number of epochs."""
+    def to_steps(self, steps_per_epoch: int):
+        value = self.value * steps_per_epoch
+        if not isinstance(value, int):
+            value = int(math.ceil(value))
+        return Steps(value=value)
+Duration = Annotated[Steps | Epochs, C.Field(discriminator="kind")]

nshtrainer/lr_scheduler/linear_warmup_cosine.py CHANGED Viewed

@@ -2,11 +2,11 @@ import math
 import warnings
 from typing import Literal
-import nshconfig as C
 from torch.optim import Optimizer
 from torch.optim.lr_scheduler import LRScheduler
 from typing_extensions import override
+from ..config import Duration
 from ._base import LRSchedulerConfigBase, LRSchedulerMetadata
@@ -91,13 +91,13 @@ class LinearWarmupCosineAnnealingLR(LRScheduler):
 class LinearWarmupCosineDecayLRSchedulerConfig(LRSchedulerConfigBase):
     name: Literal["linear_warmup_cosine_decay"] = "linear_warmup_cosine_decay"
-    warmup_epochs: int = C.Field(ge=0)
-    r"""The number of epochs for the linear warmup phase.
-    The learning rate is linearly increased from `warmup_start_lr` to the initial learning rate over this number of epochs."""
+    warmup_duration: Duration
+    r"""The duration for the linear warmup phase.
+    The learning rate is linearly increased from `warmup_start_lr` to the initial learning rate over this duration."""
-    max_epochs: int = C.Field(gt=0)
-    r"""The total number of epochs.
-    The learning rate is decayed to `min_lr` over this number of epochs."""
+    max_duration: Duration
+    r"""The total duration.
+    The learning rate is decayed to `min_lr` over this duration."""
     warmup_start_lr_factor: float = 0.0
     r"""The initial learning rate for the linear warmup phase, as a factor of the initial learning rate.
@@ -121,11 +121,20 @@ class LinearWarmupCosineDecayLRSchedulerConfig(LRSchedulerConfigBase):
     @override
     def create_scheduler_impl(self, optimizer, lightning_module, lr):
         num_steps_per_epoch = self.compute_num_steps_per_epoch(lightning_module)
-        warmup_steps = self.warmup_epochs * num_steps_per_epoch
-        max_steps = self.max_epochs * num_steps_per_epoch
+        warmup_steps = (
+            self.warmup_duration.to_steps(num_steps_per_epoch).value
+            * num_steps_per_epoch
+        )
+        max_steps = (
+            self.max_duration.to_steps(num_steps_per_epoch).value * num_steps_per_epoch
+        )
         warmup_start_lr = self.warmup_start_lr_factor * lr
         min_lr = self.min_lr_factor * lr
+        # Warmup and max steps should be at least 1.
+        warmup_steps = max(warmup_steps, 1)
+        max_steps = max(max_steps, 1)
         # Create the scheduler
         scheduler = LinearWarmupCosineAnnealingLR(
             optimizer=optimizer,

{nshtrainer-0.29.0.dist-info → nshtrainer-0.30.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nshtrainer
-Version: 0.29.0
+Version: 0.30.0
 Summary:
 Author: Nima Shoghi
 Author-email: nimashoghi@gmail.com

{nshtrainer-0.29.0.dist-info → nshtrainer-0.30.0.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-nshtrainer/__init__.py,sha256=39loiLLXbaGiozEsAn8mPHopxaPsek8JsgR9DD2gxtY,583
+nshtrainer/__init__.py,sha256=sUb2yNdkHHhrKWCeWA5QKIA1Xx3jkO1QGD5Pa-HvgbA,614
 nshtrainer/_callback.py,sha256=A1zLsTy4b_wOYnInLLXGSRdHzT2yNa6mPEql-ozm0u0,1013
 nshtrainer/_checkpoint/loader.py,sha256=5vjg-OFChXJjgiOVv8vnV8nwTscfdDtEdxQRz6uPfDE,14158
 nshtrainer/_checkpoint/metadata.py,sha256=5D4PgKodzhLsmQvuF3xxkH49epKaegxi4wh_ImDTtns,4737
@@ -10,7 +10,7 @@ nshtrainer/callbacks/_throughput_monitor_callback.py,sha256=aJo_11rc4lo0IYOd-kHm
 nshtrainer/callbacks/actsave.py,sha256=qbnaKts4_dvjPeAaPtv7Ds12_vEWzaHUfg_--49NB9I,4041
 nshtrainer/callbacks/base.py,sha256=NpjeKmonJ1Kaz5_39XSn3LlDwvbGjk6WV8BpHSNCvI4,3508
 nshtrainer/callbacks/checkpoint/__init__.py,sha256=g-3zIthupERKqWZQw-A_busQPaPRkto6iHBV-M7nK1Y,527
-nshtrainer/callbacks/checkpoint/_base.py,sha256=MzMF7JtvR3A_7DAM2r4NGQSBDisA7krv6WlVk5rKABQ,6157
+nshtrainer/callbacks/checkpoint/_base.py,sha256=vvlwuD-20NozYVIolGGShmUdkkNYeuwN6xCoFnK4GiU,6157
 nshtrainer/callbacks/checkpoint/best_checkpoint.py,sha256=8BHgLAd3Tuzf5sup0guEAKF1jJiAwYsjdKBFYZw98ac,2171
 nshtrainer/callbacks/checkpoint/last_checkpoint.py,sha256=CWWv0cSwQ1VAX26N7hAyMxbNCk26Keh39oQguBEK5To,1102
 nshtrainer/callbacks/checkpoint/on_exception_checkpoint.py,sha256=ctT88EGT22_t_6tr5r7Sfo43cuve6XeroBnBYRMPOus,3372
@@ -25,6 +25,8 @@ nshtrainer/callbacks/print_table.py,sha256=_FdAHhqylWGk4Z0c2FrLFeiMA4jhfA_beZRK_
 nshtrainer/callbacks/throughput_monitor.py,sha256=H_ocXErZxUO3dxFk8Tx_VQdpI9E_Ztvqof5WtFevLyQ,1838
 nshtrainer/callbacks/timer.py,sha256=quS79oYClDUvQxJkNWmDMe0hwRUkkREgTgqzVrnom50,4607
 nshtrainer/callbacks/wandb_watch.py,sha256=Y6SEXfIx3kDDQbI5zpP53BVq0FBLJbLd3RJsiHZk1-Y,2921
+nshtrainer/config/__init__.py,sha256=v9RtlM1Pqj_4fCDfskgxEtiGtbWH3Tj7lqNsKCDQ4gk,119
+nshtrainer/config/duration.py,sha256=f_obz0eorkktI3HzAuIawABDkvuL4lDqCxcPb3UW7Q4,692
 nshtrainer/data/__init__.py,sha256=7mk1tr7SWUZ7ySbsf0y0ZPszk7u4QznPhQ-7wnpH9ec,149
 nshtrainer/data/balanced_batch_sampler.py,sha256=dGBTDDtlBU6c-ZlVQOCnTW7SjTB5hczWsOWEdUWjvkA,4385
 nshtrainer/data/transform.py,sha256=6SNs3_TpNpfhcwTwvPKyEJ3opM1OT7LmMEYQNHKgRl8,2227
@@ -52,7 +54,7 @@ nshtrainer/loggers/tensorboard.py,sha256=wL2amRSdP68zbslZvBeM0ZQBnjF3hIKsz-_lBbd
 nshtrainer/loggers/wandb.py,sha256=FPwbf618AYmuPzHdhd1ZFhJ8qDjwTUiSe7cm7g3KCyM,5112
 nshtrainer/lr_scheduler/__init__.py,sha256=uEvgaFAs-4s_bAEMaildy0GT6OvgpgOEKTuzqutESHE,736
 nshtrainer/lr_scheduler/_base.py,sha256=7xOIuxQ86YHbFWG5a3gX46emQj1WN_LaY4-i0Q1TDBg,3659
-nshtrainer/lr_scheduler/linear_warmup_cosine.py,sha256=mn6cyizyI_stkXtg6zxIEGF9btIxMRWigUHUTlUYCSw,5221
+nshtrainer/lr_scheduler/linear_warmup_cosine.py,sha256=pmX5n7mmhSqPTz4Nu9g_JTsE9gzCkuU4V3GuAHUsDoA,5451
 nshtrainer/lr_scheduler/reduce_lr_on_plateau.py,sha256=h76oTHYpMxauV_l6lviya5DW-WKArwxxf7ZQizhmbCw,2782
 nshtrainer/metrics/__init__.py,sha256=ObLIELGguIEcUpRsUkqh1ltrvZii6vglTpJGrPvoy00,50
 nshtrainer/metrics/_config.py,sha256=jgRBfDAQLFTW7AiUY7CRtdfts6CR6keeuqm0FFMWCzQ,1288
@@ -87,6 +89,6 @@ nshtrainer/util/seed.py,sha256=Or2wMPsnQxfnZ2xfBiyMcHFIUt3tGTNeMMyOEanCkqs,280
 nshtrainer/util/slurm.py,sha256=rofIU26z3SdL79SF45tNez6juou1cyDLz07oXEZb9Hg,1566
 nshtrainer/util/typed.py,sha256=NGuDkDzFlc1fAoaXjOFZVbmj0mRFjsQi1E_hPa7Bn5U,128
 nshtrainer/util/typing_utils.py,sha256=8ptjSSLZxlmy4FY6lzzkoGoF5fGNClo8-B_c0XHQaNU,385
-nshtrainer-0.29.0.dist-info/METADATA,sha256=EP3cdORGt4w_H0pX-whQJ5ULsO5HQXo3VlHp5bkfqfk,916
-nshtrainer-0.29.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-nshtrainer-0.29.0.dist-info/RECORD,,
+nshtrainer-0.30.0.dist-info/METADATA,sha256=lDudS-lD7exw8lNe_3vT13ysnk491QCkObXGLQtjhMk,916
+nshtrainer-0.30.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+nshtrainer-0.30.0.dist-info/RECORD,,

{nshtrainer-0.29.0.dist-info → nshtrainer-0.30.0.dist-info}/WHEEL RENAMED Viewed

File without changes

nshtrainer 0.29.0__py3-none-any.whl → 0.30.0__py3-none-any.whl

nshtrainer 0.29.0py3-none-any.whl → 0.30.0py3-none-any.whl