PyPI - hyper-connections - Versions diffs - 0.3.16__tar.gz → 0.4.1__tar.gz - Mend

hyper-connections 0.3.16tar.gz → 0.4.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{hyper_connections-0.3.16 → hyper_connections-0.4.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hyper-connections
-Version: 0.3.16
+Version: 0.4.1
 Summary: Hyper-Connections
 Project-URL: Homepage, https://pypi.org/project/hyper-connections/
 Project-URL: Repository, https://github.com/lucidrains/hyper-connections

{hyper_connections-0.3.16 → hyper_connections-0.4.1}/hyper_connections/manifold_constrained_hyper_connections.py RENAMED Viewed

@@ -21,6 +21,7 @@ s - residual streams
 t - residual streams + num branch inputs
 f - number of fractions (division of feature dimension space)
 v - number of views for branch input
+p - proposals
 """
 # helper functions
@@ -212,7 +213,10 @@ class ManifoldConstrainedHyperConnections(Module):
         num_fracs = 1,                      # https://arxiv.org/abs/2503.14125
         sinkhorn_iters = 20,
         log_domain_sinkhorn = False,
+        residual_constraint_fn: Callable | None = None,
         forward_method_names: tuple[str, ...] = (),
+        num_dynamic_alpha_proposals = 1,
     ):
         """
         Appendix J, Algorithm2 in - https://arxiv.org/abs/2409.19606
@@ -256,6 +260,11 @@ class ManifoldConstrainedHyperConnections(Module):
         assert num_input_views >= 1
         self.num_input_views = num_input_views
+        # number of dynamic alpha proposals, for averaging Hres across proposals
+        self.has_dynamic_alpha_proposals = num_dynamic_alpha_proposals > 1
+        self.num_dynamic_alpha_proposals = num_dynamic_alpha_proposals
         # width connection
         init_alpha0 = torch.zeros((num_residual_streams_fracs, num_input_views_fracs))
@@ -263,7 +272,7 @@ class ManifoldConstrainedHyperConnections(Module):
         self.static_alpha = nn.Parameter(cat((init_alpha0, torch.eye(num_residual_streams_fracs)), dim = 1))
-        self.dynamic_alpha_fn = nn.Parameter(torch.zeros(dim, num_residual_streams_fracs + num_input_views_fracs))
+        self.dynamic_alpha_fn = nn.Parameter(torch.zeros(num_dynamic_alpha_proposals, dim, num_residual_streams_fracs + num_input_views_fracs))
         self.pre_branch_scale = nn.Parameter(torch.ones(1) * 1e-2)
         self.residual_scale = nn.Parameter(torch.ones(1) * 1e-2)
@@ -280,10 +289,13 @@ class ManifoldConstrainedHyperConnections(Module):
             self.h_post_scale = nn.Parameter(torch.ones(()) * 1e-2)
-        # sinkhorn related
+        # Hres constraint related
+        # by default is sinkhorn
-        self.sinkhorn_iters = sinkhorn_iters
-        self.log_domain_sinkhorn = log_domain_sinkhorn
+        self.residual_constraint_fn = default(
+            residual_constraint_fn,
+            partial(sinkhorn_knopps if not log_domain_sinkhorn else log_domain_sinkhorn_knopps, iters = sinkhorn_iters)
+        )
         # dropouts
@@ -346,7 +358,7 @@ class ManifoldConstrainedHyperConnections(Module):
         normed = normed.float()
-        wc_weight = normed @ self.dynamic_alpha_fn.float()
+        wc_weight = einsum(normed, self.dynamic_alpha_fn.float(), '... d, p d e -> p ... e')
         pre_branch_scale = repeat(self.pre_branch_scale.float(), '1 -> s', s = self.num_fracs)
         residual_scale = repeat(self.residual_scale.float(), '1 -> s', s = self.num_fracs * streams)
@@ -366,12 +378,15 @@ class ManifoldConstrainedHyperConnections(Module):
         alpha_pre = alpha_pre.sigmoid()
-        sinkhorn_fn = sinkhorn_knopps if not self.log_domain_sinkhorn else log_domain_sinkhorn_knopps
-        alpha_residual = sinkhorn_fn(alpha_residual, self.sinkhorn_iters)
+        alpha_residual = self.residual_constraint_fn(alpha_residual)
         alpha = cat((alpha_pre, alpha_residual), dim = -1)
+        if self.has_dynamic_alpha_proposals:
+            alpha = reduce(alpha, 'p ... -> ...', 'mean')
+        else:
+            alpha = rearrange(alpha, '1 ... -> ...')
         alpha = self.split_fracs(alpha) # (batch, seq, fracs1, streams, fracs2, input + residual streams)
         # beta for weights from branch output back to residual streams

{hyper_connections-0.3.16 → hyper_connections-0.4.1}/hyper_connections/vit.py RENAMED Viewed

@@ -66,12 +66,12 @@ class Attention(Module):
         return self.to_out(out)
 class Transformer(Module):
-    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout = 0., num_residual_streams = 4):
+    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout = 0., num_residual_streams = 4, num_dynamic_alpha_proposals = 1):
         super().__init__()
         self.norm = nn.LayerNorm(dim)
         self.layers = ModuleList([])
-        init_hyper_conn, self.expand_streams, self.reduce_streams = mHC.get_init_and_expand_reduce_stream_functions(num_residual_streams)
+        init_hyper_conn, self.expand_streams, self.reduce_streams = mHC.get_init_and_expand_reduce_stream_functions(num_residual_streams, num_dynamic_alpha_proposals = num_dynamic_alpha_proposals)
         for _ in range(depth):
             self.layers.append(ModuleList([
@@ -92,7 +92,7 @@ class Transformer(Module):
         return self.norm(x)
 class ViT(Module):
-    def __init__(self, *, image_size, patch_size, num_classes, dim, depth, heads, mlp_dim, pool = 'cls', channels = 3, dim_head = 64, dropout = 0., emb_dropout = 0., num_residual_streams = 4):
+    def __init__(self, *, image_size, patch_size, num_classes, dim, depth, heads, mlp_dim, pool = 'cls', channels = 3, dim_head = 64, dropout = 0., emb_dropout = 0., num_residual_streams = 4, num_dynamic_alpha_proposals = 1):
         super().__init__()
         image_height, image_width = pair(image_size)
         patch_height, patch_width = pair(patch_size)
@@ -117,7 +117,7 @@ class ViT(Module):
         self.dropout = nn.Dropout(emb_dropout)
-        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout)
+        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout, num_residual_streams, num_dynamic_alpha_proposals)
         self.pool = pool
         self.to_latent = nn.Identity()

{hyper_connections-0.3.16 → hyper_connections-0.4.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "hyper-connections"
-version = "0.3.16"
+version = "0.4.1"
 description = "Hyper-Connections"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }

{hyper_connections-0.3.16 → hyper_connections-0.4.1}/tests/test_hyper_connections.py RENAMED Viewed

@@ -1,11 +1,12 @@
 import pytest
+param = pytest.mark.parametrize
 import torch
 from torch import nn
-@pytest.mark.parametrize('num_fracs', (1, 4))
-@pytest.mark.parametrize('disable', (False, True))
-@pytest.mark.parametrize('manifold_constrained', (False, True))
+@param('num_fracs', (1, 4))
+@param('disable', (False, True))
+@param('manifold_constrained', (False, True))
 def test_readme(
     num_fracs,
     disable,
@@ -208,7 +209,11 @@ def test_mhc_dtype_restoration():
     assert residual.dtype == torch.half
-def test_mhc_vit():
+@param('num_dynamic_alpha_proposals', (1, 2))
+def test_mhc_vit(
+    num_dynamic_alpha_proposals
+):
     from hyper_connections.vit import ViT
     v = ViT(
@@ -221,7 +226,8 @@ def test_mhc_vit():
         mlp_dim = 2048,
         dropout = 0.1,
         emb_dropout = 0.1,
-        num_residual_streams = 4
+        num_residual_streams = 4,
+        num_dynamic_alpha_proposals = num_dynamic_alpha_proposals
     )
     img = torch.randn(1, 3, 256, 256)