PyPI - hyper-connections - Versions diffs - 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl - Mend

hyper-connections 0.0.1py3-none-any.whl → 0.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

hyper_connections/hyper_connections.py CHANGED Viewed

@@ -6,6 +6,7 @@ import torch
 from torch import nn
 from torch.nn import Module
 import torch.nn.functional as F
+from torch.utils._pytree import tree_flatten, tree_unflatten
 from einops import rearrange, repeat, reduce, einsum
@@ -30,6 +31,7 @@ class HyperConnections(Module):
         branch: Module | None = None,
         layer_index = None,
         tanh = True,
+        channel_first = False
     ):
         """
         Appendix J, Algorithm2 in - https://arxiv.org/abs/2409.19606
@@ -56,6 +58,10 @@ class HyperConnections(Module):
         self.dynamic_beta_fn = nn.Parameter(torch.zeros(dim))
         self.dynamic_beta_scale = nn.Parameter(torch.ones(()) * 1e-2)
+        # channel first option
+        self.channel_first = channel_first
     @classmethod
     def get_expand_reduce_stream_functions(cls, num_streams):
         expand_fn = partial(repeat, pattern = 'b ... -> (b s) ...', s = num_streams)
@@ -66,31 +72,47 @@ class HyperConnections(Module):
     def width_connection(self, residuals):
         # width connection
+        if self.channel_first:
+            residuals = rearrange(residuals, 'b d ... -> b ... d')
         residuals = rearrange(residuals, '(b s) ... d -> b ... s d', s = self.num_residual_streams)
         normed = self.norm(residuals)
+        # alpha for weighted sum of residuals going into branch
         wc_weight = self.act(normed @ self.dynamic_alpha_fn)
         dynamic_alpha = wc_weight * self.dynamic_alpha_scale
         alpha = dynamic_alpha + self.static_alpha
+        # beta for weights from branch output back to residual streams
         dc_weight = self.act(normed @ self.dynamic_beta_fn)
         dynamic_beta = dc_weight * self.dynamic_beta_scale
         beta = dynamic_beta + self.static_beta
-        # width connection
         mix_h = einsum(alpha, residuals, '... s t, ... s d -> ... t d')
         branch_input, residuals = mix_h[..., 0, :], mix_h[..., 1:, :]
+        if self.channel_first:
+            branch_input = rearrange(branch_input, 'b ... d -> b d ...')
         return branch_input, residuals, beta
     def depth_connection(self, branch_output, residuals, beta):
         # 'depth' connection
+        if self.channel_first:
+            branch_output = rearrange(branch_output, 'b d ... -> b ... d')
         residuals = einsum(branch_output, beta, 'b ... d, b ... s -> b ... s d') + residuals
-        return rearrange(residuals, 'b ... s d -> (b s) ... d')
+        output = rearrange(residuals, 'b ... s d -> (b s) ... d')
+        if self.channel_first:
+            output = rearrange(output, 'b ... d -> b d ...')
+        return output
     def forward(self, residuals, **branch_kwargs):
@@ -104,4 +126,8 @@ class HyperConnections(Module):
         branch_output = self.branch(branch_input, **branch_kwargs)
-        return add_residual_fn(branch_output)
+        (branch_output, *rest), tree_spec = tree_flatten(branch_output)
+        branch_output = add_residual_fn(branch_output)
+        return tree_unflatten((branch_output, *rest), tree_spec)

{hyper_connections-0.0.1.dist-info → hyper_connections-0.0.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hyper-connections
-Version: 0.0.1
+Version: 0.0.3
 Summary: Hyper-Connections
 Project-URL: Homepage, https://pypi.org/project/hyper-connections/
 Project-URL: Repository, https://github.com/lucidrains/hyper-connections
@@ -45,7 +45,7 @@ Description-Content-Type: text/markdown
 ## Hyper Connections
-Attempt to make the multiple residual stream approach proposed by Hyper-Connections paper by Bytedance AI more accessible as a reusable library, and for following any new research in this direction.
+Attempt to make multiple residual streams, proposed in [Hyper-Connections paper](https://arxiv.org/abs/2409.19606) out of Bytedance AI lab, accessible as an easy to use library, as well as for following any new research in this direction.
 ## Install
@@ -92,6 +92,49 @@ residual = hyper_conn_branch(residual)
 residual = reduce_stream(residual)
 ```
+Or doing it manually, as in the paper
+```python
+import torch
+from torch import nn
+# a single branch layer
+branch = nn.Linear(512, 512)
+# before
+residual = torch.randn(2, 1024, 512)
+residual = branch(residual) + residual
+# after, say 4 streams in paper
+from hyper_connections import HyperConnections
+expand_stream, reduce_stream = HyperConnections.get_expand_reduce_stream_functions(4)
+# 1. instantiate hyper connection with correct number of streams (4 in this case)
+hyper_conn = HyperConnections(4, dim = 512)
+# 2. expand to 4 streams
+residual = expand_stream(residual)
+# 3. forward your residual into hyper connection for the branch input + add residual function (learned betas)
+branch_input, add_residual = hyper_conn(residual)
+branch_output = branch(branch_input)
+residual = add_residual(branch_output)
+# 4. reduce 4 streams with a summation, this has to be done after your for loop trunk
+residual = reduce_stream(residual)
+```
 ## Citation
 ```bibtex

hyper_connections-0.0.3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,6 @@
+hyper_connections/__init__.py,sha256=xXx2Mb-dS1__UPzT-5VR1XZmyqKSSkT1DU6bAcK8jR0,73
+hyper_connections/hyper_connections.py,sha256=91QtTtnpffmErIZvrnTtosSf4JgBqcyGvxftmka-EOw,4303
+hyper_connections-0.0.3.dist-info/METADATA,sha256=8XKTmC6Ys10uOyotPtvL17v4uZyepkWoeMRVM4B_TSQ,4676
+hyper_connections-0.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+hyper_connections-0.0.3.dist-info/licenses/LICENSE,sha256=E7RGS7kpJIStk5za_-4DVhWEAamf65EU0CNML25mq4c,1066
+hyper_connections-0.0.3.dist-info/RECORD,,

hyper_connections-0.0.1.dist-info/RECORD DELETED Viewed

@@ -1,6 +0,0 @@
-hyper_connections/__init__.py,sha256=xXx2Mb-dS1__UPzT-5VR1XZmyqKSSkT1DU6bAcK8jR0,73
-hyper_connections/hyper_connections.py,sha256=Nbv7_OZ8FkdRG1WfmqdQHf46GZxWY0G_h_p4lT_JW38,3450
-hyper_connections-0.0.1.dist-info/METADATA,sha256=o_PVMP0Mm_sr7196WRYbh7O3wDp-nHVzAESgfWzi3FQ,3684
-hyper_connections-0.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-hyper_connections-0.0.1.dist-info/licenses/LICENSE,sha256=E7RGS7kpJIStk5za_-4DVhWEAamf65EU0CNML25mq4c,1066
-hyper_connections-0.0.1.dist-info/RECORD,,

{hyper_connections-0.0.1.dist-info → hyper_connections-0.0.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{hyper_connections-0.0.1.dist-info → hyper_connections-0.0.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

hyper-connections 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl

hyper-connections 0.0.1py3-none-any.whl → 0.0.3py3-none-any.whl