PyPI - nshtrainer - Versions diffs - 0.18.2__py3-none-any.whl → 0.19.1__py3-none-any.whl - Mend

nshtrainer 0.18.2py3-none-any.whl → 0.19.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

nshtrainer/_hf_hub.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import io
 import logging
 import os
+import re
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, cast
@@ -150,7 +151,32 @@ def _repo_name(api: "HfApi", root_config: "BaseConfig"):
     elif (username := api.whoami().get("name", None)) is None:
         raise ValueError("Could not get username from Hugging Face Hub.")
-    return f"{username}/{root_config.project}-{root_config.run_name}-{root_config.id}"
+    # Sanitize the project (if it exists), run_name, and id
+    parts = []
+    if root_config.project:
+        parts.append(re.sub(r"[^a-zA-Z0-9-]", "-", root_config.project))
+    parts.append(re.sub(r"[^a-zA-Z0-9-]", "-", root_config.run_name))
+    parts.append(re.sub(r"[^a-zA-Z0-9-]", "-", root_config.id))
+    # Combine parts and ensure it starts and ends with alphanumeric characters
+    repo_name = "-".join(parts)
+    repo_name = repo_name.strip("-")
+    repo_name = re.sub(
+        r"-+", "-", repo_name
+    )  # Replace multiple dashes with a single dash
+    # Ensure the name is not longer than 96 characters (excluding username)
+    if len(repo_name) > 96:
+        repo_name = repo_name[:96].rstrip("-")
+    # Ensure the repo name starts with an alphanumeric character
+    repo_name = re.sub(r"^[^a-zA-Z0-9]+", "", repo_name)
+    # If the repo_name is empty after all sanitization, use a default name
+    if not repo_name:
+        repo_name = "default-repo-name"
+    return f"{username}/{repo_name}"
 def _init(*, trainer: "Trainer", root_config: "BaseConfig"):

nshtrainer/model/modules/callback.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import logging
-from collections import abc
-from collections.abc import Callable, Iterable
-from typing import Any, TypeAlias, cast, final
+from collections.abc import Callable, Iterable, Sequence
+from typing import Any, TypeAlias, cast, final, overload
 from lightning.pytorch import Callback, LightningModule
 from lightning.pytorch.callbacks import LambdaCallback
@@ -19,11 +18,61 @@ class CallbackRegistrarModuleMixin:
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self._ll_callbacks: list[CallbackFn] = []
+        self._nshtrainer_callbacks: list[CallbackFn] = []
+    @overload
+    def register_callback(
+        self, callback: Callback | Iterable[Callback] | CallbackFn | None = None, /
+    ): ...
+    @overload
+    def register_callback(
+        self,
+        /,
+        *,
+        setup: Callable | None = None,
+        teardown: Callable | None = None,
+        on_fit_start: Callable | None = None,
+        on_fit_end: Callable | None = None,
+        on_sanity_check_start: Callable | None = None,
+        on_sanity_check_end: Callable | None = None,
+        on_train_batch_start: Callable | None = None,
+        on_train_batch_end: Callable | None = None,
+        on_train_epoch_start: Callable | None = None,
+        on_train_epoch_end: Callable | None = None,
+        on_validation_epoch_start: Callable | None = None,
+        on_validation_epoch_end: Callable | None = None,
+        on_test_epoch_start: Callable | None = None,
+        on_test_epoch_end: Callable | None = None,
+        on_validation_batch_start: Callable | None = None,
+        on_validation_batch_end: Callable | None = None,
+        on_test_batch_start: Callable | None = None,
+        on_test_batch_end: Callable | None = None,
+        on_train_start: Callable | None = None,
+        on_train_end: Callable | None = None,
+        on_validation_start: Callable | None = None,
+        on_validation_end: Callable | None = None,
+        on_test_start: Callable | None = None,
+        on_test_end: Callable | None = None,
+        on_exception: Callable | None = None,
+        on_save_checkpoint: Callable | None = None,
+        on_load_checkpoint: Callable | None = None,
+        on_before_backward: Callable | None = None,
+        on_after_backward: Callable | None = None,
+        on_before_optimizer_step: Callable | None = None,
+        on_before_zero_grad: Callable | None = None,
+        on_predict_start: Callable | None = None,
+        on_predict_end: Callable | None = None,
+        on_predict_batch_start: Callable | None = None,
+        on_predict_batch_end: Callable | None = None,
+        on_predict_epoch_start: Callable | None = None,
+        on_predict_epoch_end: Callable | None = None,
+    ): ...
     def register_callback(
         self,
         callback: Callback | Iterable[Callback] | CallbackFn | None = None,
+        /,
         *,
         setup: Callable | None = None,
         teardown: Callable | None = None,
@@ -109,7 +158,7 @@ class CallbackRegistrarModuleMixin:
         else:
             callback_ = callback
-        self._ll_callbacks.append(callback_)
+        self._nshtrainer_callbacks.append(callback_)
 class CallbackModuleMixin(
@@ -136,7 +185,7 @@ class CallbackModuleMixin(
     @override
     def configure_callbacks(self):
         callbacks = super().configure_callbacks()
-        if not isinstance(callbacks, abc.Sequence):
+        if not isinstance(callbacks, Sequence):
             callbacks = [callbacks]
         callbacks = list(callbacks)
@@ -145,7 +194,7 @@ class CallbackModuleMixin(
             if callback_result is None:
                 continue
-            if not isinstance(callback_result, abc.Iterable):
+            if not isinstance(callback_result, Iterable):
                 callback_result = [callback_result]
             for callback in callback_result:

nshtrainer/nn/nonlinearity.py CHANGED Viewed

@@ -4,15 +4,19 @@ from typing import Annotated, Literal
 import nshconfig as C
 import torch
 import torch.nn as nn
-from typing_extensions import override
+import torch.nn.functional as F
+from typing_extensions import final, override
 class BaseNonlinearityConfig(C.Config, ABC):
     @abstractmethod
-    def create_module(self) -> nn.Module:
-        pass
+    def create_module(self) -> nn.Module: ...
+    @abstractmethod
+    def __call__(self, x: torch.Tensor) -> torch.Tensor: ...
+@final
 class ReLUNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["relu"] = "relu"
@@ -20,7 +24,11 @@ class ReLUNonlinearityConfig(BaseNonlinearityConfig):
     def create_module(self) -> nn.Module:
         return nn.ReLU()
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return F.relu(x)
+@final
 class SigmoidNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["sigmoid"] = "sigmoid"
@@ -28,7 +36,11 @@ class SigmoidNonlinearityConfig(BaseNonlinearityConfig):
     def create_module(self) -> nn.Module:
         return nn.Sigmoid()
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.sigmoid(x)
+@final
 class TanhNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["tanh"] = "tanh"
@@ -36,23 +48,44 @@ class TanhNonlinearityConfig(BaseNonlinearityConfig):
     def create_module(self) -> nn.Module:
         return nn.Tanh()
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.tanh(x)
+@final
 class SoftmaxNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["softmax"] = "softmax"
+    dim: int = -1
+    """The dimension to apply the softmax function."""
     @override
     def create_module(self) -> nn.Module:
-        return nn.Softmax(dim=1)
+        return nn.Softmax(dim=self.dim)
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.softmax(x, dim=self.dim)
+@final
 class SoftplusNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["softplus"] = "softplus"
+    beta: float = 1.0
+    """The beta parameter in the softplus function."""
+    threshold: float = 20.0
+    """Values above this revert to a linear function."""
     @override
     def create_module(self) -> nn.Module:
-        return nn.Softplus()
+        return nn.Softplus(beta=self.beta, threshold=self.threshold)
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return F.softplus(x, beta=self.beta, threshold=self.threshold)
+@final
 class SoftsignNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["softsign"] = "softsign"
@@ -60,44 +93,78 @@ class SoftsignNonlinearityConfig(BaseNonlinearityConfig):
     def create_module(self) -> nn.Module:
         return nn.Softsign()
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return F.softsign(x)
+@final
 class ELUNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["elu"] = "elu"
+    alpha: float = 1.0
+    """The alpha parameter in the ELU function."""
     @override
     def create_module(self) -> nn.Module:
         return nn.ELU()
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return F.elu(x, alpha=self.alpha)
+@final
 class LeakyReLUNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["leaky_relu"] = "leaky_relu"
-    negative_slope: float | None = None
+    negative_slope: float = 1.0e-2
+    """The negative slope of the leaky ReLU function."""
     @override
     def create_module(self) -> nn.Module:
-        kwargs = {}
-        if self.negative_slope is not None:
-            kwargs["negative_slope"] = self.negative_slope
-        return nn.LeakyReLU(**kwargs)
+        return nn.LeakyReLU(negative_slope=self.negative_slope)
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return F.leaky_relu(x, negative_slope=self.negative_slope)
+@final
 class PReLUConfig(BaseNonlinearityConfig):
     name: Literal["prelu"] = "prelu"
+    num_parameters: int = 1
+    """The number of :math:`a` to learn.
+    Although it takes an int as input, there is only two values are legitimate:
+    1, or the number of channels at input."""
+    init: float = 0.25
+    """The initial value of :math:`a`."""
     @override
     def create_module(self) -> nn.Module:
-        return nn.PReLU()
+        return nn.PReLU(num_parameters=self.num_parameters, init=self.init)
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        raise NotImplementedError(
+            "PReLU requires learnable parameters and cannot be called directly."
+        )
+@final
 class GELUNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["gelu"] = "gelu"
+    approximate: Literal["tanh", "none"] = "none"
+    """The gelu approximation algorithm to use."""
     @override
     def create_module(self) -> nn.Module:
-        return nn.GELU()
+        return nn.GELU(approximate=self.approximate)
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return F.gelu(x, approximate=self.approximate)
+@final
 class SwishNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["swish"] = "swish"
@@ -105,7 +172,11 @@ class SwishNonlinearityConfig(BaseNonlinearityConfig):
     def create_module(self) -> nn.Module:
         return nn.SiLU()
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return F.silu(x)
+@final
 class SiLUNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["silu"] = "silu"
@@ -113,7 +184,11 @@ class SiLUNonlinearityConfig(BaseNonlinearityConfig):
     def create_module(self) -> nn.Module:
         return nn.SiLU()
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return F.silu(x)
+@final
 class MishNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["mish"] = "mish"
@@ -121,6 +196,9 @@ class MishNonlinearityConfig(BaseNonlinearityConfig):
     def create_module(self) -> nn.Module:
         return nn.Mish()
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        return F.mish(x)
 class SwiGLU(nn.SiLU):
     @override
@@ -129,6 +207,7 @@ class SwiGLU(nn.SiLU):
         return input * super().forward(gate)
+@final
 class SwiGLUNonlinearityConfig(BaseNonlinearityConfig):
     name: Literal["swiglu"] = "swiglu"
@@ -136,6 +215,10 @@ class SwiGLUNonlinearityConfig(BaseNonlinearityConfig):
     def create_module(self) -> nn.Module:
         return SwiGLU()
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        input, gate = x.chunk(2, dim=-1)
+        return input * F.silu(gate)
 NonlinearityConfig = Annotated[
     ReLUNonlinearityConfig

{nshtrainer-0.18.2.dist-info → nshtrainer-0.19.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nshtrainer
-Version: 0.18.2
+Version: 0.19.1
 Summary:
 Author: Nima Shoghi
 Author-email: nimashoghi@gmail.com

{nshtrainer-0.18.2.dist-info → nshtrainer-0.19.1.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ nshtrainer/_checkpoint/loader.py,sha256=myFObRsPdb8jBncMK73vjr5FDJIfKhF86Ec_kSjX
 nshtrainer/_checkpoint/metadata.py,sha256=p5e7dhVPpOGrXeuesq_7Y_RHi5lguzDAR_UXtMJXzWU,5175
 nshtrainer/_checkpoint/saver.py,sha256=DkbCH0YeOJ71m32vAARiQdGBf0hvwwdoAV8LOFGy-0Y,1428
 nshtrainer/_experimental/__init__.py,sha256=pEXPyI184UuDHvfh4p9Kg9nQZQZI41e4_HvNd4BK-yg,81
-nshtrainer/_hf_hub.py,sha256=Py9_8ADvMCFPaJzeE7bxm8Mgs3mEMkyWJ4pDEccTGt8,11230
+nshtrainer/_hf_hub.py,sha256=To3BnnGWbMNNMBdzVtgrNOcNU2fi1dQpwwuclusFAbI,12169
 nshtrainer/callbacks/__init__.py,sha256=4qocBDzQbLLhhbIEfvbA3SQB_Dy9ZJH7keMwPay-ZS8,2359
 nshtrainer/callbacks/_throughput_monitor_callback.py,sha256=aJo_11rc4lo0IYOd-kHmPDtzdC4ctgXyRudkRJqH4m4,23184
 nshtrainer/callbacks/actsave.py,sha256=qbnaKts4_dvjPeAaPtv7Ds12_vEWzaHUfg_--49NB9I,4041
@@ -58,7 +58,7 @@ nshtrainer/metrics/_config.py,sha256=jgRBfDAQLFTW7AiUY7CRtdfts6CR6keeuqm0FFMWCzQ
 nshtrainer/model/__init__.py,sha256=VyRziPT3YilP6xjLi_StsSqtlvn7N4LOMzgukRsOnF8,1380
 nshtrainer/model/base.py,sha256=oQVolDk81acy4OlckwQEBHuX2gCaVSYiIA0JaDIfhQ4,17517
 nshtrainer/model/config.py,sha256=147uV7IukvuYE4G_ZuQNxVjnlog1BdCrAVbcj_sx9Vs,43104
-nshtrainer/model/modules/callback.py,sha256=K0-cyEtBcQhI7Q2e-AGTE8T-GghUPY9DYmneU6ULV6g,6401
+nshtrainer/model/modules/callback.py,sha256=thhlJaqLRw2gwvb3Z6DJ8Kk8XUxKhinU_8ad30vne34,8541
 nshtrainer/model/modules/debug.py,sha256=Yy7XEdPou9BkCsD5hJchwJGmCVGrfUru5g9VjPM4uAw,1120
 nshtrainer/model/modules/distributed.py,sha256=ABpR9d-3uBS_fivfy_WYW-dExW6vp5BPaoPQnOudHng,1725
 nshtrainer/model/modules/logger.py,sha256=CJWSmNT8SV5GLtfml-qGYenqRPXcNOMsJRGEavAd8Hw,5464
@@ -69,7 +69,7 @@ nshtrainer/nn/__init__.py,sha256=0QPFl02a71WZQjLMGOlFNMmsYP5aa1q3eABHmnWH58Q,142
 nshtrainer/nn/mlp.py,sha256=V0FrScpIUdg_IgIO8GMtIsGEtmHjwF14i2IWxmZrsqg,5952
 nshtrainer/nn/module_dict.py,sha256=NOY0B6WDTnktyWH4GthsprMQo0bpehC-hCq9SfD8paE,2329
 nshtrainer/nn/module_list.py,sha256=fb2u5Rqdjff8Pekyr9hkCPkBorQ-fldzzFAjsgWAm30,1719
-nshtrainer/nn/nonlinearity.py,sha256=owtU4kh4G98psD0axOJWVfBhm-OtJVgFM-TXSHmbNPU,3625
+nshtrainer/nn/nonlinearity.py,sha256=4sYE4MN5zojc-go1k0PYtqssVRuXrM7D4tbpIXp5K-E,6078
 nshtrainer/optimizer.py,sha256=kuJEA1pvB3y1FcsfhAoOJujVqEZqFHlmYO8GW6JeA1g,1527
 nshtrainer/runner.py,sha256=USAjrExHkN5oVNVunsoPnLxfQrEHSaa54S3RipOe544,3605
 nshtrainer/scripts/find_packages.py,sha256=ixYivZobumyyGsf2B9oYMLyLTRcBzY_vUv-u3bNW-hs,1424
@@ -85,6 +85,6 @@ nshtrainer/util/seed.py,sha256=Or2wMPsnQxfnZ2xfBiyMcHFIUt3tGTNeMMyOEanCkqs,280
 nshtrainer/util/slurm.py,sha256=rofIU26z3SdL79SF45tNez6juou1cyDLz07oXEZb9Hg,1566
 nshtrainer/util/typed.py,sha256=NGuDkDzFlc1fAoaXjOFZVbmj0mRFjsQi1E_hPa7Bn5U,128
 nshtrainer/util/typing_utils.py,sha256=8ptjSSLZxlmy4FY6lzzkoGoF5fGNClo8-B_c0XHQaNU,385
-nshtrainer-0.18.2.dist-info/METADATA,sha256=vev96DaxCnqJOAvvGrGOJ37OpWNFLrCdtGPN-kpnvO4,935
-nshtrainer-0.18.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-nshtrainer-0.18.2.dist-info/RECORD,,
+nshtrainer-0.19.1.dist-info/METADATA,sha256=NMPSdeNqcMnyB9UiQ-4f-MdhBZ_RmCAPCYcYCCvjyYI,935
+nshtrainer-0.19.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+nshtrainer-0.19.1.dist-info/RECORD,,

{nshtrainer-0.18.2.dist-info → nshtrainer-0.19.1.dist-info}/WHEEL RENAMED Viewed

File without changes

nshtrainer 0.18.2__py3-none-any.whl → 0.19.1__py3-none-any.whl

nshtrainer 0.18.2py3-none-any.whl → 0.19.1py3-none-any.whl