PyPI - hyper-connections - Versions diffs - 0.1.5__tar.gz → 0.1.7__tar.gz - Mend

hyper-connections 0.1.5tar.gz → 0.1.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{hyper_connections-0.1.5 → hyper_connections-0.1.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hyper-connections
-Version: 0.1.5
+Version: 0.1.7
 Summary: Hyper-Connections
 Project-URL: Homepage, https://pypi.org/project/hyper-connections/
 Project-URL: Repository, https://github.com/lucidrains/hyper-connections

{hyper_connections-0.1.5 → hyper_connections-0.1.7}/hyper_connections/hyper_connections.py RENAMED Viewed

@@ -11,6 +11,7 @@ import torch.nn.functional as F
 from torch.utils._pytree import tree_flatten, tree_unflatten
 from einops import rearrange, repeat, reduce, einsum
+from einops.layers.torch import Reduce
 """
 ein notation:
@@ -35,11 +36,11 @@ def identity(t):
 def get_expand_reduce_stream_functions(num_streams, disable = False):
-    if disable:
-        return (identity, identity)
+    if num_streams == 1 or disable:
+        return (nn.Identity(), nn.Identity())
-    expand_fn = partial(repeat, pattern = 'b ... -> (b s) ...', s = num_streams)
-    reduce_fn = partial(reduce, pattern = '(b s) ... -> b ...', reduction = 'sum', s = num_streams)
+    expand_fn = Reduce(pattern = 'b ... -> (b s) ...', reduction = 'repeat', s = num_streams)
+    reduce_fn = Reduce(pattern = '(b s) ... -> b ...', reduction = 'sum', s = num_streams)
     return expand_fn, reduce_fn
@@ -72,16 +73,18 @@ class Residual(Module):
         self,
         *args,
         branch: Module | None = None,
+        residual_transform: Module | None = None,
         **kwargs
     ):
         super().__init__()
         self.branch = branch
+        self.residual_transform = default(residual_transform, nn.Identity())
     def width_connection(self, residuals):
         return residuals, residuals, dict()
     def depth_connection(self, branch_output, residuals):
-        return branch_output + residuals
+        return branch_output + self.residual_transform(residuals)
     def decorate_branch(self, branch: Callable):
         assert not exists(self.branch), 'branch was already wrapped on init'
@@ -127,7 +130,8 @@ class HyperConnections(Module):
         layer_index = None,
         tanh = True,
         channel_first = False,
-        dropout = 0.
+        dropout = 0.,
+        residual_transform: Module | None = None, # to support resnet blocks where dimension in not equal to dimension out - usually a residual conv
     ):
         """
         Appendix J, Algorithm2 in - https://arxiv.org/abs/2409.19606
@@ -167,7 +171,14 @@ class HyperConnections(Module):
         self.channel_first = channel_first
+        # maybe residual transform
+        self.residual_transform = default(residual_transform, nn.Identity())
     def width_connection(self, residuals):
+        maybe_transformed_residuals = self.residual_transform(residuals)
         # width connection
         if self.channel_first:
@@ -196,7 +207,7 @@ class HyperConnections(Module):
         if self.channel_first:
             branch_input = rearrange(branch_input, 'b ... d -> b d ...')
-        return branch_input, residuals, dict(beta = beta)
+        return branch_input, maybe_transformed_residuals, dict(beta = beta)
     def depth_connection(self, branch_output, residuals, *, beta):
         # 'depth' connection
@@ -204,13 +215,15 @@ class HyperConnections(Module):
         if self.channel_first:
             branch_output = rearrange(branch_output, 'b d ... -> b ... d')
-        residuals = einsum(branch_output, beta, 'b ... d, b ... s -> b ... s d') + residuals
-        output = rearrange(residuals, 'b ... s d -> (b s) ... d')
+        output = einsum(branch_output, beta, 'b ... d, b ... s -> b ... s d')
+        output = rearrange(output, 'b ... s d -> (b s) ... d')
         if self.channel_first:
             output = rearrange(output, 'b ... d -> b d ...')
-        return self.dropout(output)
+        residuals = residuals + output
+        return self.dropout(residuals)
     def decorate_branch(self, branch: Callable):
         assert not exists(self.branch), 'branch was already wrapped on init'

{hyper_connections-0.1.5 → hyper_connections-0.1.7}/hyper_connections/hyper_connections_with_multi_branch_inputs.py RENAMED Viewed

@@ -11,6 +11,7 @@ from torch.nn import Module, ModuleList
 from torch.utils._pytree import tree_flatten, tree_unflatten
 from einops import rearrange, repeat, reduce, einsum
+from einops.layers.torch import Reduce
 """
 ein notation:
@@ -41,11 +42,11 @@ def identity(t):
 # main functions
 def get_expand_reduce_stream_functions(cls, num_streams, disable = False):
-    if disable:
-        return (identity, identity)
+    if num_streams == 1 or disable:
+        return (nn.Identity(), nn.Identity())
-    expand_fn = partial(repeat, pattern = 'b ... -> (b s) ...', s = num_streams)
-    reduce_fn = partial(reduce, pattern = '(b s) ... -> b ...', reduction = 'sum', s = num_streams)
+    expand_fn = Reduce(pattern = 'b ... -> (b s) ...', reduction = 'repeat', s = num_streams)
+    reduce_fn = Reduce(pattern = '(b s) ... -> b ...', reduction = 'sum', s = num_streams)
     return expand_fn, reduce_fn

{hyper_connections-0.1.5 → hyper_connections-0.1.7}/hyper_connections/hyper_connections_with_multi_input_streams.py RENAMED Viewed

@@ -11,7 +11,7 @@ from torch.nn import Module, ModuleList
 from torch.utils._pytree import tree_flatten, tree_unflatten
 from einops import rearrange, repeat, reduce, einsum
-from einops.layers.torch import Rearrange
+from einops.layers.torch import Rearrange, Reduce
 """
 ein notation:
@@ -29,18 +29,15 @@ def exists(v):
 def default(v, d):
     return v if exists(v) else d
-def identity(t):
-    return t
 # main functions
 def get_expand_reduce_stream_functions(num_streams, disable = False):
-    if disable:
-        return (identity, identity)
+    if num_streams == 1 or disable:
+        return (nn.Identity(), nn.Identity())
-    expand_fn = partial(repeat, pattern = 'b ... -> (b s) ...', s = num_streams)
-    reduce_fn = partial(reduce, pattern = '(b s) ... -> b ...', reduction = 'sum', s = num_streams)
+    expand_fn = Reduce(pattern = 'b ... -> (b s) ...', reduction = 'repeat', s = num_streams)
+    reduce_fn = Reduce(pattern = '(b s) ... -> b ...', reduction = 'sum', s = num_streams)
     return expand_fn, reduce_fn

{hyper_connections-0.1.5 → hyper_connections-0.1.7}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "hyper-connections"
-version = "0.1.5"
+version = "0.1.7"
 description = "Hyper-Connections"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }

{hyper_connections-0.1.5 → hyper_connections-0.1.7}/tests/test_hyper_connections.py RENAMED Viewed

@@ -136,3 +136,46 @@ def test_multi_input_hyper_connections(disable):
     residual = reduce_stream(residual)
     assert residual.shape == (3, 1024, 512)
+@pytest.mark.parametrize('disable', (False, True))
+def test_residual_transform(disable):
+    # a single branch layer
+    branch = nn.Sequential(
+        nn.Linear(512, 512),
+        nn.SiLU(),
+        nn.Linear(512, 256)
+    )
+    residual_fn = nn.Linear(512, 256)
+    # before
+    residual = torch.randn(2, 1024, 512)
+    before_residual = branch(residual) + residual_fn(residual)
+    # after, say 4 streams in paper
+    from hyper_connections import get_init_and_expand_reduce_stream_functions
+    init_hyper_conn, expand_stream, reduce_stream = get_init_and_expand_reduce_stream_functions(4, disable = disable)
+    # 1. wrap your branch function
+    hyper_conn_branch = init_hyper_conn(dim = 512, branch = branch, residual_transform = residual_fn)
+    # 2. expand to 4 streams, this must be done before your trunk, typically a for-loop with many branch functions
+    residual = expand_stream(residual)
+    # 3. forward your residual as usual into the wrapped branch function(s)
+    residual = hyper_conn_branch(residual)
+    # 4. reduce 4 streams with a summation, this has to be done after your for-loop trunk. for transformer, unsure whether to do before or after final norm
+    after_residual = reduce_stream(residual)
+    assert before_residual.shape == after_residual.shape

{hyper_connections-0.1.5 → hyper_connections-0.1.7}/.github/workflows/python-publish.yml RENAMED Viewed

File without changes

{hyper_connections-0.1.5 → hyper_connections-0.1.7}/.github/workflows/test.yml RENAMED Viewed

File without changes

{hyper_connections-0.1.5 → hyper_connections-0.1.7}/.gitignore RENAMED Viewed

File without changes

{hyper_connections-0.1.5 → hyper_connections-0.1.7}/LICENSE RENAMED Viewed

File without changes

{hyper_connections-0.1.5 → hyper_connections-0.1.7}/README.md RENAMED Viewed

File without changes

{hyper_connections-0.1.5 → hyper_connections-0.1.7}/hyper-connections.png RENAMED Viewed

File without changes

{hyper_connections-0.1.5 → hyper_connections-0.1.7}/hyper_connections/__init__.py RENAMED Viewed

File without changes

hyper-connections 0.1.5__tar.gz → 0.1.7__tar.gz

hyper-connections 0.1.5tar.gz → 0.1.7tar.gz