PyPI - hyper-connections - Versions diffs - 0.3.6__tar.gz → 0.3.8__tar.gz - Mend

hyper-connections 0.3.6tar.gz → 0.3.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

{hyper_connections-0.3.6 → hyper_connections-0.3.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hyper-connections
-Version: 0.3.6
+Version: 0.3.8
 Summary: Hyper-Connections
 Project-URL: Homepage, https://pypi.org/project/hyper-connections/
 Project-URL: Repository, https://github.com/lucidrains/hyper-connections

{hyper_connections-0.3.6 → hyper_connections-0.3.8}/hyper_connections/manifold_constrained_hyper_connections.py RENAMED Viewed

@@ -46,6 +46,9 @@ def l1norm(t, dim):
     return F.normalize(t, p = 1, dim = dim)
 def sinkhorn_knopps(log_alpha, iters = 20):
+    dtype = log_alpha.dtype
+    log_alpha = log_alpha.float()
     log_alpha = log_alpha - log_alpha.amax(dim = -2, keepdim = True).detach()
     alpha = log_alpha.exp()
@@ -54,7 +57,7 @@ def sinkhorn_knopps(log_alpha, iters = 20):
         alpha = l1norm(alpha, dim = -2)
         alpha = l1norm(alpha, dim = -1)
-    return alpha
+    return alpha.to(dtype)
 # main functions
@@ -197,7 +200,8 @@ class ManifoldConstrainedHyperConnections(Module):
         num_input_views = 1,                # allow for the branch module to receive multiple input views, dimension placed on the very left (before batch)
         depth_residual_fn = add,
         num_fracs = 1,                      # https://arxiv.org/abs/2503.14125
-        sinkhorn_iters = 20
+        sinkhorn_iters = 20,
+        forward_method_names: tuple[str, ...] = (),
     ):
         """
         Appendix J, Algorithm2 in - https://arxiv.org/abs/2409.19606
@@ -287,6 +291,16 @@ class ManifoldConstrainedHyperConnections(Module):
         self.depth_residual_fn = depth_residual_fn
+        # forwarding method names
+        self.forward_method_names = forward_method_names
+        for forward_method_name in self.forward_method_names:
+            assert not hasattr(self, forward_method_name)
+            fn = getattr(self.branch, forward_method_name)
+            setattr(self, forward_method_name, fn)
     def width_connection(
         self,
         residuals
@@ -316,17 +330,21 @@ class ManifoldConstrainedHyperConnections(Module):
         # alpha for weighted sum of residuals going into branch
-        wc_weight = normed @ self.dynamic_alpha_fn
+        dtype = residuals.dtype
-        pre_branch_scale = repeat(self.pre_branch_scale, '1 -> s', s = self.num_fracs)
-        residual_scale = repeat(self.residual_scale, '1 -> s', s = self.num_fracs * streams)
+        normed = normed.float()
+        wc_weight = normed @ self.dynamic_alpha_fn.float()
+        pre_branch_scale = repeat(self.pre_branch_scale.float(), '1 -> s', s = self.num_fracs)
+        residual_scale = repeat(self.residual_scale.float(), '1 -> s', s = self.num_fracs * streams)
         alpha_scale = cat((pre_branch_scale, residual_scale))
         alpha_scale = repeat(alpha_scale, 'n -> (v n)', v = self.num_input_views)
         dynamic_alpha = wc_weight * alpha_scale
-        static_alpha = rearrange(self.static_alpha, '(f s) d -> f s d', s = streams)
+        static_alpha = rearrange(self.static_alpha.float(), '(f s) d -> f s d', s = streams)
         alpha = dynamic_alpha + static_alpha
@@ -351,20 +369,20 @@ class ManifoldConstrainedHyperConnections(Module):
         beta = None
         if self.add_branch_out_to_residual:
-            dc_weight = normed @ self.dynamic_beta_fn
+            dc_weight = normed @ self.dynamic_beta_fn.float()
             dc_weight = dc_weight.sigmoid() * 2 # sigmoid * 2 for "H_post", corresponding to dc weight in original paper
             if not self.has_fracs:
                 dc_weight = rearrange(dc_weight, '... -> ... 1')
-            dynamic_beta = dc_weight * self.h_post_scale
+            dynamic_beta = dc_weight * self.h_post_scale.float()
-            static_beta = rearrange(self.static_beta, '... (s f) -> ... s f', s = streams)
+            static_beta = rearrange(self.static_beta.float(), '... (s f) -> ... s f', s = streams)
             beta = dynamic_beta + static_beta
-        mix_h = einsum(alpha, residuals, '... f1 s f2 t, ... f1 s d -> ... f2 t d')
+        mix_h = einsum(alpha, residuals.float(), '... f1 s f2 t, ... f1 s d -> ... f2 t d')
         if self.num_input_views == 1:
             branch_input, residuals = mix_h[..., 0, :], mix_h[..., 1:, :]
@@ -379,6 +397,12 @@ class ManifoldConstrainedHyperConnections(Module):
         branch_input = self.merge_fracs(branch_input)
+        branch_input = branch_input.to(dtype)
+        residuals = residuals.to(dtype)
+        if exists(beta):
+            beta = beta.to(dtype)
         return branch_input, maybe_transformed_residuals, dict(beta = beta)
     def depth_connection(
@@ -399,7 +423,9 @@ class ManifoldConstrainedHyperConnections(Module):
         if self.channel_first:
             branch_output = rearrange(branch_output, 'b d ... -> b ... d')
-        output = einsum(branch_output, beta, 'b ... f1 d, b ... f1 s f2 -> b ... f2 s d')
+        dtype = residuals.dtype
+        output = einsum(branch_output.float(), beta.float(), 'b ... f1 d, b ... f1 s f2 -> b ... f2 s d')
         output = rearrange(output, 'b ... s d -> (b s) ... d')
@@ -412,7 +438,7 @@ class ManifoldConstrainedHyperConnections(Module):
         if self.channel_first:
             output = rearrange(output, 'b ... d -> b d ...')
-        residuals = self.depth_residual_fn(output, residuals)
+        residuals = self.depth_residual_fn(output.to(dtype), residuals)
         return self.dropout(residuals)

{hyper_connections-0.3.6 → hyper_connections-0.3.8}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "hyper-connections"
-version = "0.3.6"
+version = "0.3.8"
 description = "Hyper-Connections"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }

{hyper_connections-0.3.6 → hyper_connections-0.3.8}/tests/test_hyper_connections.py RENAMED Viewed

@@ -231,3 +231,24 @@ def test_channel_first_hyper_connection(disable):
     after_residual = reduce_stream(residual)
     assert before_residual.shape == after_residual.shape
+def test_mhc_dtype_restoration():
+    from hyper_connections.manifold_constrained_hyper_connections import ManifoldConstrainedHyperConnections
+    mhc = ManifoldConstrainedHyperConnections(
+        num_residual_streams = 4,
+        dim = 64,
+        add_branch_out_to_residual = True
+    )
+    residual = torch.randn(4, 1, 64).half()
+    branch_input, _, residual_kwargs = mhc.width_connection(residual)
+    assert branch_input.dtype == torch.half
+    assert residual_kwargs['beta'].dtype == torch.half
+    branch_output = torch.randn_like(branch_input).half()
+    residual = mhc.depth_connection(branch_output, residual, **residual_kwargs)
+    assert residual.dtype == torch.half