PyPI - nshtrainer - Versions diffs - 1.0.0b37__py3-none-any.whl → 1.0.0b40__py3-none-any.whl - Mend

nshtrainer 1.0.0b37py3-none-any.whl → 1.0.0b40py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

nshtrainer/_directory.py +1 -1
nshtrainer/callbacks/checkpoint/last_checkpoint.py +13 -12
nshtrainer/configs/.gitattributes +1 -0
nshtrainer/loggers/base.py +9 -0
nshtrainer/nn/mlp.py +64 -45
nshtrainer/nn/tests/test_mlp.py +55 -0
{nshtrainer-1.0.0b37.dist-info → nshtrainer-1.0.0b40.dist-info}/METADATA +2 -2
{nshtrainer-1.0.0b37.dist-info → nshtrainer-1.0.0b40.dist-info}/RECORD +9 -7
{nshtrainer-1.0.0b37.dist-info → nshtrainer-1.0.0b40.dist-info}/WHEEL +1 -1

nshtrainer/_directory.py CHANGED Viewed

@@ -81,7 +81,7 @@ class DirectoryConfig(C.Config):
         # Save to nshtrainer/{id}/log/{logger name}
         log_dir = self.resolve_subdirectory(run_id, "log")
-        log_dir = log_dir / getattr(logger, "name")
+        log_dir = log_dir / logger.resolve_logger_dirname()
         # ^ NOTE: Logger must have a `name` attribute, as this is
         # the discriminator for the logger registry
         log_dir.mkdir(exist_ok=True)

nshtrainer/callbacks/checkpoint/last_checkpoint.py CHANGED Viewed

@@ -21,11 +21,8 @@ log = logging.getLogger(__name__)
 class LastCheckpointCallbackConfig(BaseCheckpointCallbackConfig):
     name: Literal["last_checkpoint"] = "last_checkpoint"
-    save_on_time_interval: bool = True
-    """Whether to save checkpoints based on time interval."""
-    interval: timedelta = timedelta(hours=12)
-    """Time interval between checkpoints when save_on_time_interval is True."""
+    save_on_time_interval: timedelta | None = None
+    """Save a checkpoint every `save_on_time_interval` seconds. If `None`, this feature is disabled."""
     @override
     def create_checkpoint(self, trainer_config, dirpath):
@@ -38,8 +35,6 @@ class LastCheckpointCallback(CheckpointBase[LastCheckpointCallbackConfig]):
         super().__init__(config, dirpath)
         self.start_time = time.time()
         self.last_checkpoint_time = self.start_time
-        self.interval_seconds = config.interval.total_seconds()
-        self.save_on_time_interval = config.save_on_time_interval
     @override
     def name(self):
@@ -57,12 +52,18 @@ class LastCheckpointCallback(CheckpointBase[LastCheckpointCallbackConfig]):
     def topk_sort_reverse(self):
         return True
-    def _should_checkpoint(self) -> bool:
-        if not self.save_on_time_interval:
+    def _local_should_checkpoint(self) -> bool:
+        if (interval := self.config.save_on_time_interval) is None:
             return False
         current_time = time.time()
         elapsed_time = current_time - self.last_checkpoint_time
-        return elapsed_time >= self.interval_seconds
+        return elapsed_time >= interval.total_seconds()
+    def _should_checkpoint(self, trainer: Trainer):
+        if self.config.save_on_time_interval is None:
+            return False
+        return trainer.strategy.broadcast(self._local_should_checkpoint(), src=0)
     def _format_duration(self, seconds: float) -> str:
         """Format duration in seconds to a human-readable string."""
@@ -98,7 +99,7 @@ class LastCheckpointCallback(CheckpointBase[LastCheckpointCallbackConfig]):
         *args,
         **kwargs,
     ):
-        if not self._should_checkpoint():
+        if not self._should_checkpoint(trainer):
             return
         self.save_checkpoints(trainer)
@@ -110,5 +111,5 @@ class LastCheckpointCallback(CheckpointBase[LastCheckpointCallbackConfig]):
     def save_checkpoints(self, trainer):
         super().save_checkpoints(trainer)
-        if self.save_on_time_interval:
+        if self.config.save_on_time_interval is not None:
             self.last_checkpoint_time = time.time()

nshtrainer/configs/.gitattributes ADDED Viewed

	@@ -0,0 +1 @@
1	+ * linguist-generated=true

nshtrainer/loggers/base.py CHANGED Viewed

@@ -30,5 +30,14 @@ class LoggerConfigBase(C.Config, ABC):
     def __bool__(self):
         return self.enabled
+    def resolve_logger_dirname(self) -> str:
+        if not (name := getattr(self, "name", None)):
+            raise ValueError(
+                "Logger must have a name attribute to resolve the directory name.\n"
+                "Otherwise, you must override `resolve_logger_dirname`."
+            )
+        return name
 logger_registry = C.Registry(LoggerConfigBase, discriminator="name")

nshtrainer/nn/mlp.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+import contextlib
 import copy
 from collections.abc import Callable, Sequence
 from typing import Literal, Protocol, runtime_checkable
@@ -44,6 +45,9 @@ class MLPConfigDict(TypedDict):
     residual: bool
     """Whether to use residual connections between layers."""
+    seed: int | None
+    """Random seed to use for initialization. If None, the default Torch behavior is used."""
 class MLPConfig(C.Config):
     bias: bool = True
@@ -64,15 +68,20 @@ class MLPConfig(C.Config):
     residual: bool = False
     """Whether to use residual connections between layers."""
+    seed: int | None = None
+    """Random seed to use for initialization. If None, the default Torch behavior is used."""
     def to_kwargs(self) -> MLPConfigDict:
-        return {
+        kwargs: MLPConfigDict = {
             "bias": self.bias,
             "no_bias_scalar": self.no_bias_scalar,
             "nonlinearity": self.nonlinearity,
             "ln": self.ln,
             "dropout": self.dropout,
             "residual": self.residual,
+            "seed": self.seed,
         }
+        return kwargs
     def create_module(
         self,
@@ -108,6 +117,7 @@ def MLP(
     pre_layers: Sequence[nn.Module] = [],
     post_layers: Sequence[nn.Module] = [],
     linear_cls: LinearModuleConstructor = nn.Linear,
+    seed: int | None = None,
 ):
     """
     Constructs a multi-layer perceptron (MLP) with the given dimensions and activation function.
@@ -123,52 +133,61 @@ def MLP(
         residual (bool, optional): Whether to use residual connections between layers. Defaults to False.
         pre_layers (Sequence[nn.Module], optional): List of layers to insert before the linear layers. Defaults to [].
         post_layers (Sequence[nn.Module], optional): List of layers to insert after the linear layers. Defaults to [].
+        linear_cls (LinearModuleConstructor, optional): Linear module constructor to use. Defaults to nn.Linear.
+        seed (int | None, optional): Random seed to use for initialization. If None, the default Torch behavior is used. Defaults to None.
     Returns:
         nn.Sequential: The constructed MLP.
     """
-    if activation is None:
-        activation = nonlinearity
-    if len(dims) < 2:
-        raise ValueError("mlp requires at least 2 dimensions")
-    if ln is True:
-        ln = "pre"
-    elif isinstance(ln, str) and ln not in ("pre", "post"):
-        raise ValueError("ln must be a boolean or 'pre' or 'post'")
-    layers: list[nn.Module] = []
-    if ln == "pre":
-        layers.append(nn.LayerNorm(dims[0]))
-    layers.extend(pre_layers)
-    for i in range(len(dims) - 1):
-        in_features = dims[i]
-        out_features = dims[i + 1]
-        bias_ = bias and not (no_bias_scalar and out_features == 1)
-        layers.append(linear_cls(in_features, out_features, bias=bias_))
-        if dropout is not None:
-            layers.append(nn.Dropout(dropout))
-        if i < len(dims) - 2:
-            match activation:
-                case NonlinearityConfigBase():
-                    layers.append(activation.create_module())
-                case nn.Module():
-                    # In this case, we create a deep copy of the module to avoid sharing parameters (if any).
-                    layers.append(copy.deepcopy(activation))
-                case Callable():
-                    layers.append(activation())
-                case _:
-                    raise ValueError(
-                        "Either `nonlinearity` or `activation` must be provided"
-                    )
-    layers.extend(post_layers)
-    if ln == "post":
-        layers.append(nn.LayerNorm(dims[-1]))
-    cls = ResidualSequential if residual else nn.Sequential
-    return cls(*layers)
+    with contextlib.ExitStack() as stack:
+        if seed is not None:
+            stack.enter_context(
+                torch.random.fork_rng(devices=range(torch.cuda.device_count()))
+            )
+            torch.manual_seed(seed)
+        if activation is None:
+            activation = nonlinearity
+        if len(dims) < 2:
+            raise ValueError("mlp requires at least 2 dimensions")
+        if ln is True:
+            ln = "pre"
+        elif isinstance(ln, str) and ln not in ("pre", "post"):
+            raise ValueError("ln must be a boolean or 'pre' or 'post'")
+        layers: list[nn.Module] = []
+        if ln == "pre":
+            layers.append(nn.LayerNorm(dims[0]))
+        layers.extend(pre_layers)
+        for i in range(len(dims) - 1):
+            in_features = dims[i]
+            out_features = dims[i + 1]
+            bias_ = bias and not (no_bias_scalar and out_features == 1)
+            layers.append(linear_cls(in_features, out_features, bias=bias_))
+            if dropout is not None:
+                layers.append(nn.Dropout(dropout))
+            if i < len(dims) - 2:
+                match activation:
+                    case NonlinearityConfigBase():
+                        layers.append(activation.create_module())
+                    case nn.Module():
+                        # In this case, we create a deep copy of the module to avoid sharing parameters (if any).
+                        layers.append(copy.deepcopy(activation))
+                    case Callable():
+                        layers.append(activation())
+                    case _:
+                        raise ValueError(
+                            "Either `nonlinearity` or `activation` must be provided"
+                        )
+        layers.extend(post_layers)
+        if ln == "post":
+            layers.append(nn.LayerNorm(dims[-1]))
+        cls = ResidualSequential if residual else nn.Sequential
+        return cls(*layers)

nshtrainer/nn/tests/test_mlp.py ADDED Viewed

@@ -0,0 +1,55 @@
+from __future__ import annotations
+from typing import cast
+import pytest
+import torch
+from nshtrainer.nn.mlp import MLP
+def test_mlp_seed_reproducibility():
+    """Test that the seed parameter in MLP ensures reproducible weights."""
+    # Test dimensions
+    dims = [10, 20, 5]
+    # Create two MLPs with the same seed
+    seed1 = 42
+    mlp1 = MLP(dims, activation=torch.nn.ReLU(), seed=seed1)
+    mlp2 = MLP(dims, activation=torch.nn.ReLU(), seed=seed1)
+    # Create an MLP with a different seed
+    seed2 = 123
+    mlp3 = MLP(dims, activation=torch.nn.ReLU(), seed=seed2)
+    # Check first layer weights
+    layer1_weights1 = cast(torch.Tensor, mlp1[0].weight)
+    layer1_weights2 = cast(torch.Tensor, mlp2[0].weight)
+    layer1_weights3 = cast(torch.Tensor, mlp3[0].weight)
+    # Same seed should produce identical weights
+    assert torch.allclose(layer1_weights1, layer1_weights2)
+    # Different seeds should produce different weights
+    assert not torch.allclose(layer1_weights1, layer1_weights3)
+    # Check second layer weights
+    layer2_weights1 = cast(torch.Tensor, mlp1[2].weight)
+    layer2_weights2 = cast(torch.Tensor, mlp2[2].weight)
+    layer2_weights3 = cast(torch.Tensor, mlp3[2].weight)
+    # Same seed should produce identical weights for all layers
+    assert torch.allclose(layer2_weights1, layer2_weights2)
+    # Different seeds should produce different weights for all layers
+    assert not torch.allclose(layer2_weights1, layer2_weights3)
+    # Test that not providing a seed gives different results each time
+    mlp4 = MLP(dims, activation=torch.nn.ReLU(), seed=None)
+    mlp5 = MLP(dims, activation=torch.nn.ReLU(), seed=None)
+    # Without seeds, weights should be different
+    assert not torch.allclose(
+        cast(torch.Tensor, mlp4[0].weight), cast(torch.Tensor, mlp5[0].weight)
+    )

{nshtrainer-1.0.0b37.dist-info → nshtrainer-1.0.0b40.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.3
 Name: nshtrainer
-Version: 1.0.0b37
+Version: 1.0.0b40
 Summary:
 Author: Nima Shoghi
 Author-email: nimashoghi@gmail.com

{nshtrainer-1.0.0b37.dist-info → nshtrainer-1.0.0b40.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ nshtrainer/__init__.py,sha256=g_moPnfQxSxFZX5NB9ILQQOJrt4RTRuiFt9N0STIpxM,874
 nshtrainer/_callback.py,sha256=tXQCDzS6CvMTuTY5lQSH5qZs1pXUi-gt9bQdpXMVdEs,12715
 nshtrainer/_checkpoint/metadata.py,sha256=PHy-54Cg-o3OtCffAqrVv6ZVMU7zhRo_-sZiSEEno1Y,5019
 nshtrainer/_checkpoint/saver.py,sha256=LOP8jjKF0Dw9x9H-BKrLMWlEp1XTan2DUK0zQUCWw5U,1360
-nshtrainer/_directory.py,sha256=xY8Z9POZJw0Uh56yqffZbnNZvdA_tnWCucT31dhwFCM,3183
+nshtrainer/_directory.py,sha256=TJR9ccyuzRlAVfVjGyeQ3E2AFAcz-XbBCxWfiXo2SlY,3191
 nshtrainer/_experimental/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
 nshtrainer/_hf_hub.py,sha256=4OsCbIITnZk_YLyoMrVyZ0SIN04FBxlC0ig2Et8UAdo,14287
 nshtrainer/callbacks/__init__.py,sha256=4giOYT8A709UOLRtQEt16QbOAFUHCjJ_aLB7ITTwXJI,3577
@@ -12,7 +12,7 @@ nshtrainer/callbacks/base.py,sha256=Alaou1IHAIlMEM7g58d_02ozY2xWlshBN7fsw5Ee21s,
 nshtrainer/callbacks/checkpoint/__init__.py,sha256=l8tkHc83_mLiU0-wT09SWdRzwpm2ulbkLzcuCmuTwzE,620
 nshtrainer/callbacks/checkpoint/_base.py,sha256=ZVEUVl5kjCSSe69Q0rMUbKBNNUog0pxBwWkeyuxG2w0,6304
 nshtrainer/callbacks/checkpoint/best_checkpoint.py,sha256=2CQuhPJ3Fi7lDw7z-J8kXXXuDU8-4HcU48oZxR49apk,2667
-nshtrainer/callbacks/checkpoint/last_checkpoint.py,sha256=MJcNB0biOebx2si2IBFaSUiVOSLSCZTzxB-RcEgO2gY,3482
+nshtrainer/callbacks/checkpoint/last_checkpoint.py,sha256=vn-as3ex7kaTRcKsIurVtM6kUSHYNwHJeYG82j2dMcc,3554
 nshtrainer/callbacks/checkpoint/on_exception_checkpoint.py,sha256=nljzETqkHwA-4g8mxaeFK5HxA8My0dlIPzIUscSMWyk,3525
 nshtrainer/callbacks/debug_flag.py,sha256=96fuP0C7C6dSs1GiMeUYzzs0X3Q4Pjt9JVWg3b75fU4,1748
 nshtrainer/callbacks/directory_setup.py,sha256=wPas_Ren8ANejogmIdKhqqgj4ulxz9AS_8xVIAfRXa0,2565
@@ -30,6 +30,7 @@ nshtrainer/callbacks/shared_parameters.py,sha256=s94jJTAIbDGukYJu6l247QonVOCudGC
 nshtrainer/callbacks/timer.py,sha256=gDcw_K_ikf0bkVgxQ0cDhvvNvz6GLZVLcatuKfh0ORU,4731
 nshtrainer/callbacks/wandb_upload_code.py,sha256=shV7UtnXgY2bUlXdVrXiaDs0PNLlIt7TzNJkJPkzvzI,2414
 nshtrainer/callbacks/wandb_watch.py,sha256=VB14Dy5ZRXQ3di0fPv0K_DFJurLhroLPytnuwQBiJFg,3037
+nshtrainer/configs/.gitattributes,sha256=VeZmarvNEqiRBOHGcllpKm90nL6C8u4tBu7SEm7fj-E,26
 nshtrainer/configs/__init__.py,sha256=MZfcSKhnjtVObBvVv9lu8L2cFTLINP5zcTQvWnz8jdk,14505
 nshtrainer/configs/_checkpoint/__init__.py,sha256=6s7Y68StboqscY2G4P_QG443jz5aiym5SjOogIljWLg,342
 nshtrainer/configs/_checkpoint/metadata/__init__.py,sha256=oOPfYkXTjKgm6pluGsG6V1TPyCEGjsQpHVL-LffSUFQ,290
@@ -101,7 +102,7 @@ nshtrainer/data/datamodule.py,sha256=lSOgH32nysJWa6Y7ba1QyOdUV0DVVdO98qokP8wigjk
 nshtrainer/data/transform.py,sha256=qd0lIocO59Fk_m90xyOHgFezbymd1mRwly8nbYIfHGc,2263
 nshtrainer/loggers/__init__.py,sha256=Ddd3JJXVzew_ZpwHA9kGnGmvq4OwhItwghDL5PzNhDc,614
 nshtrainer/loggers/actsave.py,sha256=wgNrpBB6wQM7qff8iLDb_sQnbiAcYHRmH56pcEJPB3o,1409
-nshtrainer/loggers/base.py,sha256=1-HoPmOiyXevQvMLXboiKe-4GOE1V5SvjURohOHakVc,882
+nshtrainer/loggers/base.py,sha256=ON92XbwTSgadQOSyw5PiRRFzyH6uJ-xLtE0nB3cbgPc,1205
 nshtrainer/loggers/csv.py,sha256=xJ8mSmw4vJwinIfqhF6t2HWmh_1dXEYyLfGuXwL7WHo,1160
 nshtrainer/loggers/tensorboard.py,sha256=E7iO_fDt9bfH02hBL430bXPLljOo5iGgq2QyPqmx2gQ,2324
 nshtrainer/loggers/wandb.py,sha256=KZXAUWrrmdX_L8rqej77oUHaM0JxZRM8y9z6JP9PISw,6856
@@ -117,10 +118,11 @@ nshtrainer/model/mixins/callback.py,sha256=0LPgve4VszHbLipid4mpI1qnnmdGS2spivs0d
 nshtrainer/model/mixins/debug.py,sha256=1LX9KzeFX9JDPs_a6YCdYDZXLhEk_5rBO2aCqlfBy7w,2087
 nshtrainer/model/mixins/logger.py,sha256=27H99FuLaxc6_dDLG2pid4E_5E0-eLGnc2Ifpt0HYIM,6066
 nshtrainer/nn/__init__.py,sha256=7KCs-GDOynCXAIdwkgAQacc0p3FHLEION50UtrvgAOc,1463
-nshtrainer/nn/mlp.py,sha256=ZbkLyOc08stgIugvu1G5_h66DYtxAFDnboikBaJvvZ8,5988
+nshtrainer/nn/mlp.py,sha256=OatI_pzAqa2awKfvMY4CnznO-LltLT8NHQPJKSNn8IM,6979
 nshtrainer/nn/module_dict.py,sha256=9plb8aQUx5TUEPhX5jI9u8LrpTeKe7jZAHi8iIqcN8w,2365
 nshtrainer/nn/module_list.py,sha256=UB43pcwD_3nUke_DyLQt-iXKhWdKM6Zjm84lRC1hPYA,1755
 nshtrainer/nn/nonlinearity.py,sha256=xmaL4QCRvCxqmaGIOwetJeKK-6IK4m2OV7D3SjxSwJQ,6322
+nshtrainer/nn/tests/test_mlp.py,sha256=xBPiHlBvOCn67EbpzzKL-2FU7ikGxHT3i6CMSp1wk7M,1840
 nshtrainer/optimizer.py,sha256=u968GRNPUNn3f_9BEY2RBNuJq5O3wJWams3NG0dkrOA,1738
 nshtrainer/profiler/__init__.py,sha256=RjaNBoVcTFu8lF0dNlFp-2LaPYdonoIbDy2_KhgF0Ek,594
 nshtrainer/profiler/_base.py,sha256=kFcSVn9gJuMwgDxbfyHh46CmEAIPZjxw3yjPbKgzvwA,950
@@ -151,6 +153,6 @@ nshtrainer/util/seed.py,sha256=diMV8iwBKN7Xxt5pELmui-gyqyT80_CZzomrWhNss0k,316
 nshtrainer/util/slurm.py,sha256=HflkP5iI_r4UHMyPjw9R4dD5AHsJUpcfJw5PLvGYBRM,1603
 nshtrainer/util/typed.py,sha256=Xt5fUU6zwLKSTLUdenovnKK0N8qUq89Kddz2_XeykVQ,164
 nshtrainer/util/typing_utils.py,sha256=MjY-CUX9R5Tzat-BlFnQjwl1PQ_W2yZQoXhkYHlJ_VA,442
-nshtrainer-1.0.0b37.dist-info/METADATA,sha256=ObMgpZ_qJLmBAkeRDN7ufTuRSTltiB_LYPFTphNvWks,988
-nshtrainer-1.0.0b37.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-nshtrainer-1.0.0b37.dist-info/RECORD,,
+nshtrainer-1.0.0b40.dist-info/METADATA,sha256=m_MxoWKJnesp95-EClArQl90vTm____cgqMdNJ6n-Ng,988
+nshtrainer-1.0.0b40.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
+nshtrainer-1.0.0b40.dist-info/RECORD,,

{nshtrainer-1.0.0b37.dist-info → nshtrainer-1.0.0b40.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 1.9.1
+Generator: poetry-core 2.1.1
 Root-Is-Purelib: true
 Tag: py3-none-any

nshtrainer 1.0.0b37__py3-none-any.whl → 1.0.0b40__py3-none-any.whl

nshtrainer 1.0.0b37py3-none-any.whl → 1.0.0b40py3-none-any.whl