PyPI - nshtrainer - Versions diffs - 0.18.1__tar.gz → 0.19.0__tar.gz - Mend

nshtrainer 0.18.1tar.gz → 0.19.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

{nshtrainer-0.18.1 → nshtrainer-0.19.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nshtrainer
-Version: 0.18.1
+Version: 0.19.0
 Summary:
 Author: Nima Shoghi
 Author-email: nimashoghi@gmail.com

{nshtrainer-0.18.1 → nshtrainer-0.19.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "nshtrainer"
-version = "0.18.1"
+version = "0.19.0"
 description = ""
 authors = ["Nima Shoghi <nimashoghi@gmail.com>"]
 readme = "README.md"

{nshtrainer-0.18.1 → nshtrainer-0.19.0}/src/nshtrainer/_hf_hub.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import io
 import logging
 import os
 from pathlib import Path
@@ -299,7 +300,22 @@ def _save_checkpoint_files(
     # Resolve the repository name
     repo_name = _repo_name(api, root_config)
+    # Let's read all the files to memory right now,
+    # in case they get used/removed by other processes.
+    # Read all the files to memory
+    file_contents: list[bytes | None] = []
     for p in paths:
+        try:
+            with open(p, "rb") as f:
+                file_contents.append(f.read())
+        except IOError as e:
+            log.warning(f"Failed to read checkpoint file {p}: {str(e)}")
+            file_contents.append(None)
+    for p, contents in zip(paths, file_contents):
+        if contents is None:
+            continue
         try:
             relative_path = p.relative_to(checkpoint_dir)
         except ValueError:
@@ -314,7 +330,7 @@ def _save_checkpoint_files(
         # Upload the checkpoint file to the repository
         try:
             api.upload_file(
-                path_or_fileobj=str(p.resolve().absolute()),
+                path_or_fileobj=io.BytesIO(contents),
                 path_in_repo=str(path_in_repo),
                 repo_id=repo_name,
                 repo_type="model",

{nshtrainer-0.18.1 → nshtrainer-0.19.0}/src/nshtrainer/nn/nonlinearity.py RENAMED Viewed

@@ -4,15 +4,19 @@ from typing import Annotated, Literal
 import nshconfig as C
 import torch
 import torch.nn as nn
-from typing_extensions import override
+import torch.nn.functional as F
+from typing_extensions import final, override
 class BaseNonlinearityConfig(C.Config, ABC):
     @abstractmethod
-    def create_module(self) -> nn.Module:
-        pass
+    def create_module(self) -> nn.Module: ...
+    @abstractmethod
+    def __call__(self, x: torch.Tensor) -> torch.Tensor: ...
+@final
 class ReLUNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["relu"] = "relu"
@@ -20,7 +24,11 @@ class ReLUNonlinearityConfig(BaseNonlinearityConfig):
     def create_module(self) -> nn.Module:
         return nn.ReLU()
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return F.relu(x)
+@final
 class SigmoidNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["sigmoid"] = "sigmoid"
@@ -28,7 +36,11 @@ class SigmoidNonlinearityConfig(BaseNonlinearityConfig):
     def create_module(self) -> nn.Module:
         return nn.Sigmoid()
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.sigmoid(x)
+@final
 class TanhNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["tanh"] = "tanh"
@@ -36,23 +48,44 @@ class TanhNonlinearityConfig(BaseNonlinearityConfig):
     def create_module(self) -> nn.Module:
         return nn.Tanh()
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.tanh(x)
+@final
 class SoftmaxNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["softmax"] = "softmax"
+    dim: int = -1
+    """The dimension to apply the softmax function."""
     @override
     def create_module(self) -> nn.Module:
-        return nn.Softmax(dim=1)
+        return nn.Softmax(dim=self.dim)
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.softmax(x, dim=self.dim)
+@final
 class SoftplusNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["softplus"] = "softplus"
+    beta: float = 1.0
+    """The beta parameter in the softplus function."""
+    threshold: float = 20.0
+    """Values above this revert to a linear function."""
     @override
     def create_module(self) -> nn.Module:
-        return nn.Softplus()
+        return nn.Softplus(beta=self.beta, threshold=self.threshold)
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return F.softplus(x, beta=self.beta, threshold=self.threshold)
+@final
 class SoftsignNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["softsign"] = "softsign"
@@ -60,44 +93,78 @@ class SoftsignNonlinearityConfig(BaseNonlinearityConfig):
     def create_module(self) -> nn.Module:
         return nn.Softsign()
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return F.softsign(x)
+@final
 class ELUNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["elu"] = "elu"
+    alpha: float = 1.0
+    """The alpha parameter in the ELU function."""
     @override
     def create_module(self) -> nn.Module:
         return nn.ELU()
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return F.elu(x, alpha=self.alpha)
+@final
 class LeakyReLUNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["leaky_relu"] = "leaky_relu"
-    negative_slope: float | None = None
+    negative_slope: float = 1.0e-2
+    """The negative slope of the leaky ReLU function."""
     @override
     def create_module(self) -> nn.Module:
-        kwargs = {}
-        if self.negative_slope is not None:
-            kwargs["negative_slope"] = self.negative_slope
-        return nn.LeakyReLU(**kwargs)
+        return nn.LeakyReLU(negative_slope=self.negative_slope)
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return F.leaky_relu(x, negative_slope=self.negative_slope)
+@final
 class PReLUConfig(BaseNonlinearityConfig):
     name: Literal["prelu"] = "prelu"
+    num_parameters: int = 1
+    """The number of :math:`a` to learn.
+    Although it takes an int as input, there is only two values are legitimate:
+    1, or the number of channels at input."""
+    init: float = 0.25
+    """The initial value of :math:`a`."""
     @override
     def create_module(self) -> nn.Module:
-        return nn.PReLU()
+        return nn.PReLU(num_parameters=self.num_parameters, init=self.init)
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        raise NotImplementedError(
+            "PReLU requires learnable parameters and cannot be called directly."
+        )
+@final
 class GELUNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["gelu"] = "gelu"
+    approximate: Literal["tanh", "none"] = "none"
+    """The gelu approximation algorithm to use."""
     @override
     def create_module(self) -> nn.Module:
-        return nn.GELU()
+        return nn.GELU(approximate=self.approximate)
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return F.gelu(x, approximate=self.approximate)
+@final
 class SwishNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["swish"] = "swish"
@@ -105,7 +172,11 @@ class SwishNonlinearityConfig(BaseNonlinearityConfig):
     def create_module(self) -> nn.Module:
         return nn.SiLU()
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return F.silu(x)
+@final
 class SiLUNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["silu"] = "silu"
@@ -113,7 +184,11 @@ class SiLUNonlinearityConfig(BaseNonlinearityConfig):
     def create_module(self) -> nn.Module:
         return nn.SiLU()
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return F.silu(x)
+@final
 class MishNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["mish"] = "mish"
@@ -121,6 +196,9 @@ class MishNonlinearityConfig(BaseNonlinearityConfig):
     def create_module(self) -> nn.Module:
         return nn.Mish()
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return F.mish(x)
 class SwiGLU(nn.SiLU):
     @override
@@ -129,6 +207,7 @@ class SwiGLU(nn.SiLU):
         return input * super().forward(gate)
+@final
 class SwiGLUNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["swiglu"] = "swiglu"
@@ -136,6 +215,10 @@ class SwiGLUNonlinearityConfig(BaseNonlinearityConfig):
     def create_module(self) -> nn.Module:
         return SwiGLU()
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        input, gate = x.chunk(2, dim=-1)
+        return input * F.silu(gate)
 NonlinearityConfig = Annotated[
     ReLUNonlinearityConfig