PyPI - nshtrainer - Versions diffs - 1.0.0b39__py3-none-any.whl → 1.0.0b41__py3-none-any.whl - Mend

nshtrainer 1.0.0b39py3-none-any.whl → 1.0.0b41py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

nshtrainer/configs/.gitattributes ADDED Viewed

	@@ -0,0 +1 @@
1	+ * linguist-generated=true

nshtrainer/nn/__init__.py CHANGED Viewed

@@ -4,6 +4,7 @@ from .mlp import MLP as MLP
 from .mlp import MLPConfig as MLPConfig
 from .mlp import MLPConfigDict as MLPConfigDict
 from .mlp import ResidualSequential as ResidualSequential
+from .mlp import custom_seed_context as custom_seed_context
 from .module_dict import TypedModuleDict as TypedModuleDict
 from .module_list import TypedModuleList as TypedModuleList
 from .nonlinearity import ELUNonlinearityConfig as ELUNonlinearityConfig

nshtrainer/nn/mlp.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+import contextlib
 import copy
 from collections.abc import Callable, Sequence
 from typing import Literal, Protocol, runtime_checkable
@@ -44,6 +45,9 @@ class MLPConfigDict(TypedDict):
     residual: bool
     """Whether to use residual connections between layers."""
+    seed: int | None
+    """Random seed to use for initialization. If None, the default Torch behavior is used."""
 class MLPConfig(C.Config):
     bias: bool = True
@@ -64,15 +68,20 @@ class MLPConfig(C.Config):
     residual: bool = False
     """Whether to use residual connections between layers."""
+    seed: int | None = None
+    """Random seed to use for initialization. If None, the default Torch behavior is used."""
     def to_kwargs(self) -> MLPConfigDict:
-        return {
+        kwargs: MLPConfigDict = {
             "bias": self.bias,
             "no_bias_scalar": self.no_bias_scalar,
             "nonlinearity": self.nonlinearity,
             "ln": self.ln,
             "dropout": self.dropout,
             "residual": self.residual,
+            "seed": self.seed,
         }
+        return kwargs
     def create_module(
         self,
@@ -90,6 +99,18 @@ class MLPConfig(C.Config):
         )
+@contextlib.contextmanager
+def custom_seed_context(seed: int | None):
+    with contextlib.ExitStack() as stack:
+        if seed is not None:
+            stack.enter_context(
+                torch.random.fork_rng(devices=range(torch.cuda.device_count()))
+            )
+            torch.manual_seed(seed)
+        yield
 def MLP(
     dims: Sequence[int],
     activation: NonlinearityConfigBase
@@ -108,6 +129,7 @@ def MLP(
     pre_layers: Sequence[nn.Module] = [],
     post_layers: Sequence[nn.Module] = [],
     linear_cls: LinearModuleConstructor = nn.Linear,
+    seed: int | None = None,
 ):
     """
     Constructs a multi-layer perceptron (MLP) with the given dimensions and activation function.
@@ -123,52 +145,55 @@ def MLP(
         residual (bool, optional): Whether to use residual connections between layers. Defaults to False.
         pre_layers (Sequence[nn.Module], optional): List of layers to insert before the linear layers. Defaults to [].
         post_layers (Sequence[nn.Module], optional): List of layers to insert after the linear layers. Defaults to [].
+        linear_cls (LinearModuleConstructor, optional): Linear module constructor to use. Defaults to nn.Linear.
+        seed (int | None, optional): Random seed to use for initialization. If None, the default Torch behavior is used. Defaults to None.
     Returns:
         nn.Sequential: The constructed MLP.
     """
-    if activation is None:
-        activation = nonlinearity
-    if len(dims) < 2:
-        raise ValueError("mlp requires at least 2 dimensions")
-    if ln is True:
-        ln = "pre"
-    elif isinstance(ln, str) and ln not in ("pre", "post"):
-        raise ValueError("ln must be a boolean or 'pre' or 'post'")
-    layers: list[nn.Module] = []
-    if ln == "pre":
-        layers.append(nn.LayerNorm(dims[0]))
-    layers.extend(pre_layers)
-    for i in range(len(dims) - 1):
-        in_features = dims[i]
-        out_features = dims[i + 1]
-        bias_ = bias and not (no_bias_scalar and out_features == 1)
-        layers.append(linear_cls(in_features, out_features, bias=bias_))
-        if dropout is not None:
-            layers.append(nn.Dropout(dropout))
-        if i < len(dims) - 2:
-            match activation:
-                case NonlinearityConfigBase():
-                    layers.append(activation.create_module())
-                case nn.Module():
-                    # In this case, we create a deep copy of the module to avoid sharing parameters (if any).
-                    layers.append(copy.deepcopy(activation))
-                case Callable():
-                    layers.append(activation())
-                case _:
-                    raise ValueError(
-                        "Either `nonlinearity` or `activation` must be provided"
-                    )
-    layers.extend(post_layers)
-    if ln == "post":
-        layers.append(nn.LayerNorm(dims[-1]))
-    cls = ResidualSequential if residual else nn.Sequential
-    return cls(*layers)
+    with custom_seed_context(seed):
+        if activation is None:
+            activation = nonlinearity
+        if len(dims) < 2:
+            raise ValueError("mlp requires at least 2 dimensions")
+        if ln is True:
+            ln = "pre"
+        elif isinstance(ln, str) and ln not in ("pre", "post"):
+            raise ValueError("ln must be a boolean or 'pre' or 'post'")
+        layers: list[nn.Module] = []
+        if ln == "pre":
+            layers.append(nn.LayerNorm(dims[0]))
+        layers.extend(pre_layers)
+        for i in range(len(dims) - 1):
+            in_features = dims[i]
+            out_features = dims[i + 1]
+            bias_ = bias and not (no_bias_scalar and out_features == 1)
+            layers.append(linear_cls(in_features, out_features, bias=bias_))
+            if dropout is not None:
+                layers.append(nn.Dropout(dropout))
+            if i < len(dims) - 2:
+                match activation:
+                    case NonlinearityConfigBase():
+                        layers.append(activation.create_module())
+                    case nn.Module():
+                        # In this case, we create a deep copy of the module to avoid sharing parameters (if any).
+                        layers.append(copy.deepcopy(activation))
+                    case Callable():
+                        layers.append(activation())
+                    case _:
+                        raise ValueError(
+                            "Either `nonlinearity` or `activation` must be provided"
+                        )
+        layers.extend(post_layers)
+        if ln == "post":
+            layers.append(nn.LayerNorm(dims[-1]))
+        cls = ResidualSequential if residual else nn.Sequential
+        return cls(*layers)

nshtrainer/nn/tests/test_mlp.py ADDED Viewed

@@ -0,0 +1,55 @@
+from __future__ import annotations
+from typing import cast
+import pytest
+import torch
+from nshtrainer.nn.mlp import MLP
+def test_mlp_seed_reproducibility():
+    """Test that the seed parameter in MLP ensures reproducible weights."""
+    # Test dimensions
+    dims = [10, 20, 5]
+    # Create two MLPs with the same seed
+    seed1 = 42
+    mlp1 = MLP(dims, activation=torch.nn.ReLU(), seed=seed1)
+    mlp2 = MLP(dims, activation=torch.nn.ReLU(), seed=seed1)
+    # Create an MLP with a different seed
+    seed2 = 123
+    mlp3 = MLP(dims, activation=torch.nn.ReLU(), seed=seed2)
+    # Check first layer weights
+    layer1_weights1 = cast(torch.Tensor, mlp1[0].weight)
+    layer1_weights2 = cast(torch.Tensor, mlp2[0].weight)
+    layer1_weights3 = cast(torch.Tensor, mlp3[0].weight)
+    # Same seed should produce identical weights
+    assert torch.allclose(layer1_weights1, layer1_weights2)
+    # Different seeds should produce different weights
+    assert not torch.allclose(layer1_weights1, layer1_weights3)
+    # Check second layer weights
+    layer2_weights1 = cast(torch.Tensor, mlp1[2].weight)
+    layer2_weights2 = cast(torch.Tensor, mlp2[2].weight)
+    layer2_weights3 = cast(torch.Tensor, mlp3[2].weight)
+    # Same seed should produce identical weights for all layers
+    assert torch.allclose(layer2_weights1, layer2_weights2)
+    # Different seeds should produce different weights for all layers
+    assert not torch.allclose(layer2_weights1, layer2_weights3)
+    # Test that not providing a seed gives different results each time
+    mlp4 = MLP(dims, activation=torch.nn.ReLU(), seed=None)
+    mlp5 = MLP(dims, activation=torch.nn.ReLU(), seed=None)
+    # Without seeds, weights should be different
+    assert not torch.allclose(
+        cast(torch.Tensor, mlp4[0].weight), cast(torch.Tensor, mlp5[0].weight)
+    )

{nshtrainer-1.0.0b39.dist-info → nshtrainer-1.0.0b41.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.3
 Name: nshtrainer
-Version: 1.0.0b39
+Version: 1.0.0b41
 Summary:
 Author: Nima Shoghi
 Author-email: nimashoghi@gmail.com

{nshtrainer-1.0.0b39.dist-info → nshtrainer-1.0.0b41.dist-info}/RECORD RENAMED Viewed

@@ -30,6 +30,7 @@ nshtrainer/callbacks/shared_parameters.py,sha256=s94jJTAIbDGukYJu6l247QonVOCudGC
 nshtrainer/callbacks/timer.py,sha256=gDcw_K_ikf0bkVgxQ0cDhvvNvz6GLZVLcatuKfh0ORU,4731
 nshtrainer/callbacks/wandb_upload_code.py,sha256=shV7UtnXgY2bUlXdVrXiaDs0PNLlIt7TzNJkJPkzvzI,2414
 nshtrainer/callbacks/wandb_watch.py,sha256=VB14Dy5ZRXQ3di0fPv0K_DFJurLhroLPytnuwQBiJFg,3037
+nshtrainer/configs/.gitattributes,sha256=VeZmarvNEqiRBOHGcllpKm90nL6C8u4tBu7SEm7fj-E,26
 nshtrainer/configs/__init__.py,sha256=MZfcSKhnjtVObBvVv9lu8L2cFTLINP5zcTQvWnz8jdk,14505
 nshtrainer/configs/_checkpoint/__init__.py,sha256=6s7Y68StboqscY2G4P_QG443jz5aiym5SjOogIljWLg,342
 nshtrainer/configs/_checkpoint/metadata/__init__.py,sha256=oOPfYkXTjKgm6pluGsG6V1TPyCEGjsQpHVL-LffSUFQ,290
@@ -116,11 +117,12 @@ nshtrainer/model/base.py,sha256=JL3AmH17GQjQIoMrZl3O0vUI7dj5ZsO5iEJgoLPyzHw,1035
 nshtrainer/model/mixins/callback.py,sha256=0LPgve4VszHbLipid4mpI1qnnmdGS2spivs0dXLvqHw,3154
 nshtrainer/model/mixins/debug.py,sha256=1LX9KzeFX9JDPs_a6YCdYDZXLhEk_5rBO2aCqlfBy7w,2087
 nshtrainer/model/mixins/logger.py,sha256=27H99FuLaxc6_dDLG2pid4E_5E0-eLGnc2Ifpt0HYIM,6066
-nshtrainer/nn/__init__.py,sha256=7KCs-GDOynCXAIdwkgAQacc0p3FHLEION50UtrvgAOc,1463
-nshtrainer/nn/mlp.py,sha256=ZbkLyOc08stgIugvu1G5_h66DYtxAFDnboikBaJvvZ8,5988
+nshtrainer/nn/__init__.py,sha256=0FgeoaLYtRiSLT8fdPigLD8t-d8DKR8IQDw16JA9lT4,1523
+nshtrainer/nn/mlp.py,sha256=_a8rJJniSCvM08gyQGO-5MUoO18U9_FSGGn3tZL2_U4,7101
 nshtrainer/nn/module_dict.py,sha256=9plb8aQUx5TUEPhX5jI9u8LrpTeKe7jZAHi8iIqcN8w,2365
 nshtrainer/nn/module_list.py,sha256=UB43pcwD_3nUke_DyLQt-iXKhWdKM6Zjm84lRC1hPYA,1755
 nshtrainer/nn/nonlinearity.py,sha256=xmaL4QCRvCxqmaGIOwetJeKK-6IK4m2OV7D3SjxSwJQ,6322
+nshtrainer/nn/tests/test_mlp.py,sha256=xBPiHlBvOCn67EbpzzKL-2FU7ikGxHT3i6CMSp1wk7M,1840
 nshtrainer/optimizer.py,sha256=u968GRNPUNn3f_9BEY2RBNuJq5O3wJWams3NG0dkrOA,1738
 nshtrainer/profiler/__init__.py,sha256=RjaNBoVcTFu8lF0dNlFp-2LaPYdonoIbDy2_KhgF0Ek,594
 nshtrainer/profiler/_base.py,sha256=kFcSVn9gJuMwgDxbfyHh46CmEAIPZjxw3yjPbKgzvwA,950
@@ -151,6 +153,6 @@ nshtrainer/util/seed.py,sha256=diMV8iwBKN7Xxt5pELmui-gyqyT80_CZzomrWhNss0k,316
 nshtrainer/util/slurm.py,sha256=HflkP5iI_r4UHMyPjw9R4dD5AHsJUpcfJw5PLvGYBRM,1603
 nshtrainer/util/typed.py,sha256=Xt5fUU6zwLKSTLUdenovnKK0N8qUq89Kddz2_XeykVQ,164
 nshtrainer/util/typing_utils.py,sha256=MjY-CUX9R5Tzat-BlFnQjwl1PQ_W2yZQoXhkYHlJ_VA,442
-nshtrainer-1.0.0b39.dist-info/METADATA,sha256=zzE6nHlj-clB3HJs5_-bBePCHSOrtTkZTi9z_NrSeRY,988
-nshtrainer-1.0.0b39.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-nshtrainer-1.0.0b39.dist-info/RECORD,,
+nshtrainer-1.0.0b41.dist-info/METADATA,sha256=DL9HgN6RP8X8v0sCdTr2IjRSwIBY96NZXe15m5V4y4c,988
+nshtrainer-1.0.0b41.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
+nshtrainer-1.0.0b41.dist-info/RECORD,,

{nshtrainer-1.0.0b39.dist-info → nshtrainer-1.0.0b41.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 1.9.1
+Generator: poetry-core 2.1.1
 Root-Is-Purelib: true
 Tag: py3-none-any

nshtrainer 1.0.0b39__py3-none-any.whl → 1.0.0b41__py3-none-any.whl

nshtrainer 1.0.0b39py3-none-any.whl → 1.0.0b41py3-none-any.whl