PyPI - hyper-connections - Versions diffs - 0.0.20__py3-none-any.whl → 0.0.22__py3-none-any.whl - Mend

hyper-connections 0.0.20py3-none-any.whl → 0.0.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

hyper_connections/__init__.py CHANGED Viewed

@@ -1,5 +1,7 @@
 from hyper_connections.hyper_connections import (
     HyperConnections,
+    get_expand_reduce_stream_functions,
+    get_init_and_expand_reduce_stream_functions,
     Residual,
     StreamEmbed,
     AttentionPoolReduceStream

hyper_connections/hyper_connections.py CHANGED Viewed

@@ -12,6 +12,8 @@ from torch.utils._pytree import tree_flatten, tree_unflatten
 from einops import rearrange, repeat, reduce, einsum
+from beartype import beartype
 """
 ein notation:
 b - batch
@@ -31,15 +33,48 @@ def default(v, d):
 def identity(t):
     return t
+# main functions
+def get_expand_reduce_stream_functions(num_streams, disable = False):
+    if disable:
+        return (identity, identity)
+    expand_fn = partial(repeat, pattern = 'b ... -> (b s) ...', s = num_streams)
+    reduce_fn = partial(reduce, pattern = '(b s) ... -> b ...', reduction = 'sum', s = num_streams)
+    return expand_fn, reduce_fn
+def get_init_and_expand_reduce_stream_functions(num_streams, disable = False):
+    hyper_conn_klass = HyperConnections if not disable else Residual
+    init_hyper_conn_fn = partial(hyper_conn_klass, num_streams)
+    expand_reduce_fns = get_expand_reduce_stream_functions(num_streams, disable = disable)
+    return (init_hyper_conn_fn, *expand_reduce_fns)
+# norms
+class RMSNorm(Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.scale = dim ** 0.5
+        self.gamma = nn.Parameter(torch.zeros(dim))
+    def forward(self, x):
+        return F.normalize(x, dim = -1) * self.scale * (self.gamma + 1)
 # main classes
 # residual base class
 class Residual(Module):
+    @beartype
     def __init__(
         self,
         *args,
-        branch = None,
+        branch: Module | None = None,
         **kwargs
     ):
         super().__init__()
@@ -86,6 +121,7 @@ class Residual(Module):
 # hyper connection residual streams
 class HyperConnections(Module):
+    @beartype
     def __init__(
         self,
         num_residual_streams,
@@ -108,7 +144,7 @@ class HyperConnections(Module):
         self.act = nn.Tanh() if tanh else nn.Identity()
-        self.norm = nn.RMSNorm(dim) # they used layernorm in paper, but rmsnorm is fine given what we know now
+        self.norm = RMSNorm(dim) # they used layernorm in paper, but rmsnorm is fine given what we know now
         assert num_residual_streams > 0, '`num_residual_streams` must be greater than 0'
@@ -135,27 +171,6 @@ class HyperConnections(Module):
         self.channel_first = channel_first
-    @classmethod
-    def get_expand_reduce_stream_functions(cls, num_streams, disable = False):
-        if disable:
-            return (identity, identity)
-        expand_fn = partial(repeat, pattern = 'b ... -> (b s) ...', s = num_streams)
-        reduce_fn = partial(reduce, pattern = '(b s) ... -> b ...', reduction = 'sum', s = num_streams)
-        return expand_fn, reduce_fn
-    @classmethod
-    def get_init_and_expand_reduce_stream_functions(cls, num_streams, disable = False):
-        hyper_conn_klass = cls if not disable else Residual
-        init_hyper_conn_fn = partial(hyper_conn_klass, num_streams)
-        expand_reduce_fns = cls.get_expand_reduce_stream_functions(num_streams, disable = disable)
-        return (init_hyper_conn_fn, *expand_reduce_fns)
     def width_connection(self, residuals):
         # width connection
@@ -233,6 +248,9 @@ class HyperConnections(Module):
         return add_residual_fn(branch_output)
+HyperConnections.get_expand_reduce_stream_functions = staticmethod(get_expand_reduce_stream_functions)
+HyperConnections.get_init_and_expand_reduce_stream_functions = staticmethod(get_init_and_expand_reduce_stream_functions)
 # stream embed
 class StreamEmbed(Module):

hyper_connections/hyper_connections_with_multi_branch_inputs.py CHANGED Viewed

@@ -12,6 +12,8 @@ from torch.utils._pytree import tree_flatten, tree_unflatten
 from einops import rearrange, repeat, reduce, einsum
+from beartype import beartype
 """
 ein notation:
 b - batch
@@ -22,7 +24,7 @@ br - branch functions
 t - residual streams + num branch inputs
 """
-from hyper_connections.hyper_connections import Residual, StreamEmbed
+from hyper_connections.hyper_connections import Residual, StreamEmbed, RMSNorm
 # helper functions
@@ -38,11 +40,32 @@ def divisible_by(num, den):
 def identity(t):
     return t
+# main functions
+def get_expand_reduce_stream_functions(cls, num_streams, disable = False):
+    if disable:
+        return (identity, identity)
+    expand_fn = partial(repeat, pattern = 'b ... -> (b s) ...', s = num_streams)
+    reduce_fn = partial(reduce, pattern = '(b s) ... -> b ...', reduction = 'sum', s = num_streams)
+    return expand_fn, reduce_fn
+def get_init_and_expand_reduce_stream_functions(cls, num_streams, disable = False):
+    hyper_conn_klass = HyperConnections if not disable else Residual
+    init_hyper_conn_fn = partial(hyper_conn_klass, num_streams)
+    expand_reduce_fns = get_expand_reduce_stream_functions(num_streams, disable = disable)
+    return (init_hyper_conn_fn, *expand_reduce_fns)
 # main classes
 # hyper connection residual streams
 class HyperConnections(Module):
+    @beartype
     def __init__(
         self,
         num_residual_streams,
@@ -74,7 +97,7 @@ class HyperConnections(Module):
         self.act = nn.Tanh() if tanh else nn.Identity()
-        self.norm = nn.RMSNorm(dim) # they used layernorm in paper, but rmsnorm is fine given what we know now
+        self.norm = RMSNorm(dim) # they used layernorm in paper, but rmsnorm is fine given what we know now
         self.num_residual_streams = num_residual_streams
         self.num_branch_inputs = num_branch_inputs
@@ -108,26 +131,6 @@ class HyperConnections(Module):
         self.channel_first = channel_first
-    @classmethod
-    def get_expand_reduce_stream_functions(cls, num_streams, disable = False):
-        if disable:
-            return (identity, identity)
-        expand_fn = partial(repeat, pattern = 'b ... -> (b s) ...', s = num_streams)
-        reduce_fn = partial(reduce, pattern = '(b s) ... -> b ...', reduction = 'sum', s = num_streams)
-        return expand_fn, reduce_fn
-    @classmethod
-    def get_init_and_expand_reduce_stream_functions(cls, num_streams, disable = False):
-        hyper_conn_klass = cls if not disable else Residual
-        init_hyper_conn_fn = partial(hyper_conn_klass, num_streams)
-        expand_reduce_fns = cls.get_expand_reduce_stream_functions(num_streams, disable = disable)
-        return (init_hyper_conn_fn, *expand_reduce_fns)
     def width_connection(self, residuals):
         num_streams, num_branch_inputs = self.num_residual_streams, self.num_branch_inputs
@@ -225,3 +228,6 @@ class HyperConnections(Module):
         branch_output = torch.cat(branch_outputs)
         return add_residual_fn(branch_output)
+HyperConnections.get_expand_reduce_stream_functions = staticmethod(get_expand_reduce_stream_functions)
+HyperConnections.get_init_and_expand_reduce_stream_functions = staticmethod(get_init_and_expand_reduce_stream_functions)

{hyper_connections-0.0.20.dist-info → hyper_connections-0.0.22.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hyper-connections
-Version: 0.0.20
+Version: 0.0.22
 Summary: Hyper-Connections
 Project-URL: Homepage, https://pypi.org/project/hyper-connections/
 Project-URL: Repository, https://github.com/lucidrains/hyper-connections
@@ -34,6 +34,7 @@ Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9
+Requires-Dist: beartype
 Requires-Dist: einops>=0.8.0
 Requires-Dist: torch>=2.3
 Provides-Extra: examples
@@ -71,9 +72,9 @@ residual = branch(residual) + residual
 # after, say 4 streams in paper
-from hyper_connections import HyperConnections
+from hyper_connections import get_init_and_expand_reduce_stream_functions
-init_hyper_conn, expand_stream, reduce_stream = HyperConnections.get_init_and_expand_reduce_stream_functions(4)
+init_hyper_conn, expand_stream, reduce_stream = get_init_and_expand_reduce_stream_functions(4)
 # 1. wrap your branch function
@@ -110,9 +111,9 @@ residual = branch(residual) + residual
 # after, say 4 streams in paper
-from hyper_connections import HyperConnections
+from hyper_connections import get_init_and_expand_reduce_stream_functions
-init_hyper_conn, expand_stream, reduce_stream = HyperConnections.get_init_and_expand_reduce_stream_functions(4)
+init_hyper_conn, expand_stream, reduce_stream = get_init_and_expand_reduce_stream_functions(4)
 # 1. instantiate hyper connection with correct number of streams (4 in this case) - or use the init function above
@@ -140,7 +141,7 @@ residual = reduce_stream(residual)
 To compare hyper connections to plain residual without changing the code, just pass `disable = True` when fetching the functions
 ```python
-HyperConnections.get_init_and_expand_reduce_stream_functions(4, disable = True)
+get_init_and_expand_reduce_stream_functions(4, disable = True)
 ```
 ## Citation
@@ -155,3 +156,10 @@ HyperConnections.get_init_and_expand_reduce_stream_functions(4, disable = True)
     url     = {https://api.semanticscholar.org/CorpusID:272987528}
 }
 ```
+```bibtex
+@misc{Rubin2024,
+    author  = {Ohad Rubin},
+    url     = {https://medium.com/@ohadrubin/exploring-weight-decay-in-layer-normalization-challenges-and-a-reparameterization-solution-ad4d12c24950}
+}
+```

hyper_connections-0.0.22.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+hyper_connections/__init__.py,sha256=d2zNTka0Gp9vINu4U-RhgTJFBhsVrs1fne_15Zl0oOs,224
+hyper_connections/hyper_connections.py,sha256=HyMz-jmICBC6L8QT-LA3EdY8djqG5XkOV7mi-i420mI,9993
+hyper_connections/hyper_connections_with_multi_branch_inputs.py,sha256=x4it5IGllpZGYank8PBHCRzFeozgZxUY7UYo6YkSkcg,7778
+hyper_connections-0.0.22.dist-info/METADATA,sha256=uMrTDUeNCoLpQs89yjMvadzz8r4JLQpky0zQ_Di2H7I,5315
+hyper_connections-0.0.22.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+hyper_connections-0.0.22.dist-info/licenses/LICENSE,sha256=E7RGS7kpJIStk5za_-4DVhWEAamf65EU0CNML25mq4c,1066
+hyper_connections-0.0.22.dist-info/RECORD,,

hyper_connections-0.0.20.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-hyper_connections/__init__.py,sha256=wJxbrEXRGmOIjPw8fWP-cUq6CE8bvx95mIlhWifNvYc,135
-hyper_connections/hyper_connections.py,sha256=ElPtieRLvVKaVg2Attx1k6esKq1SY2X4AVZbZmsQAOM,9486
-hyper_connections/hyper_connections_with_multi_branch_inputs.py,sha256=HbLpt79xcMv_os6brMvDd90t2GOPceliE1YFusR2eJI,7553
-hyper_connections-0.0.20.dist-info/METADATA,sha256=erA-d7KNNdzPY76x8IWKd2trv2WuBO9-C2DtH-SoQ_Y,5076
-hyper_connections-0.0.20.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-hyper_connections-0.0.20.dist-info/licenses/LICENSE,sha256=E7RGS7kpJIStk5za_-4DVhWEAamf65EU0CNML25mq4c,1066
-hyper_connections-0.0.20.dist-info/RECORD,,

{hyper_connections-0.0.20.dist-info → hyper_connections-0.0.22.dist-info}/WHEEL RENAMED Viewed

File without changes

{hyper_connections-0.0.20.dist-info → hyper_connections-0.0.22.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

hyper-connections 0.0.20__py3-none-any.whl → 0.0.22__py3-none-any.whl

hyper-connections 0.0.20py3-none-any.whl → 0.0.22py3-none-any.whl