PyPI - hyper-connections - Versions diffs - 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl - Mend

hyper-connections 0.4.6py3-none-any.whl → 0.4.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

hyper_connections/mHCv2.py CHANGED Viewed

@@ -13,6 +13,8 @@ from torch.utils._pytree import tree_flatten, tree_unflatten
 from einops import rearrange, repeat, reduce, einsum
 from einops.layers.torch import Rearrange, Reduce
+from torch_einops_utils import pack_with_inverse
 """
 ein notation:
 b - batch
@@ -124,8 +126,8 @@ def get_init_and_expand_reduce_stream_functions(
     hyper_conn_klass = ManifoldConstrainedHyperConnections if not disable else Residual
-    kwargs.pop('add_attn_pool_reduce_stream', None)
     init_hyper_conn_fn = partial(hyper_conn_klass, num_streams, num_fracs = num_fracs, sinkhorn_iters = sinkhorn_iters, use_triton_sinkhorn = use_triton_sinkhorn, **kwargs)
     expand_reduce_fns = get_expand_reduce_stream_functions(
         num_streams,
         add_stream_embed = add_stream_embed,
@@ -241,6 +243,7 @@ class ManifoldConstrainedHyperConnections(Module):
         forward_method_names: tuple[str, ...] = (),
         num_dynamic_alpha_proposals = 1,
         use_triton_sinkhorn = False,
+        mix_streams_before_norm = False, # whether to mix the residual streams before the norm (that then projects to Hpre, Hpost, Hresidual)
     ):
         """
         Appendix J, Algorithm2 in - https://arxiv.org/abs/2409.19606
@@ -263,6 +266,16 @@ class ManifoldConstrainedHyperConnections(Module):
         dim //= num_fracs # effective dim handled in dimension is feature dimension divided by num fractions
+        # whether to mix the streams before the norm below
+        # this would be equivalent to separable depthwise convs from yesteryears (with a norm in between) - parameter efficient improv
+        self.maybe_mix_streams = None
+        if mix_streams_before_norm:
+            self.maybe_mix_streams = nn.Conv2d(num_residual_streams, num_residual_streams, 1, bias = False)
+            nn.init.dirac_(self.maybe_mix_streams.weight)
         # they used layernorm in paper, but rmsnorm is fine given what we know now
         self.norm = RMSNorm(dim)
@@ -370,6 +383,14 @@ class ManifoldConstrainedHyperConnections(Module):
         residuals = self.split_fracs(residuals)
+        # maybe mix streams
+        if exists(self.maybe_mix_streams):
+            residuals, inverse_pack_lead_dims = pack_with_inverse(residuals, '* c h w')
+            residuals = self.maybe_mix_streams(residuals)
+            residuals = inverse_pack_lead_dims(residuals)
         # norm
         normed = self.norm(residuals)

hyper_connections/vit.py CHANGED Viewed

@@ -5,7 +5,7 @@ from torch.nn import Module, ModuleList
 from einops import rearrange, repeat
 from einops.layers.torch import Rearrange
-from hyper_connections.manifold_constrained_hyper_connections import mHC
+from hyper_connections.mHCv2 import mHC
 # helpers
@@ -66,12 +66,12 @@ class Attention(Module):
         return self.to_out(out)
 class Transformer(Module):
-    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout = 0., num_residual_streams = 4, num_dynamic_alpha_proposals = 1):
+    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout = 0., num_residual_streams = 4, mhc_kwargs = dict()):
         super().__init__()
         self.norm = nn.LayerNorm(dim)
         self.layers = ModuleList([])
-        init_hyper_conn, self.expand_streams, self.reduce_streams = mHC.get_init_and_expand_reduce_stream_functions(num_residual_streams, num_dynamic_alpha_proposals = num_dynamic_alpha_proposals)
+        init_hyper_conn, self.expand_streams, self.reduce_streams = mHC.get_init_and_expand_reduce_stream_functions(num_residual_streams, **mhc_kwargs)
         for _ in range(depth):
             self.layers.append(ModuleList([
@@ -92,7 +92,7 @@ class Transformer(Module):
         return self.norm(x)
 class ViT(Module):
-    def __init__(self, *, image_size, patch_size, num_classes, dim, depth, heads, mlp_dim, pool = 'cls', channels = 3, dim_head = 64, dropout = 0., emb_dropout = 0., num_residual_streams = 4, num_dynamic_alpha_proposals = 1):
+    def __init__(self, *, image_size, patch_size, num_classes, dim, depth, heads, mlp_dim, pool = 'cls', channels = 3, dim_head = 64, dropout = 0., emb_dropout = 0., num_residual_streams = 4, mhc_kwargs = dict(num_dynamic_alpha_proposals = 1)):
         super().__init__()
         image_height, image_width = pair(image_size)
         patch_height, patch_width = pair(patch_size)
@@ -117,7 +117,7 @@ class ViT(Module):
         self.dropout = nn.Dropout(emb_dropout)
-        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout, num_residual_streams, num_dynamic_alpha_proposals)
+        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout, num_residual_streams, mhc_kwargs)
         self.pool = pool
         self.to_latent = nn.Identity()
@@ -154,7 +154,10 @@ if __name__ == '__main__':
         mlp_dim = 2048,
         dropout = 0.1,
         emb_dropout = 0.1,
-        num_residual_streams = 4
+        num_residual_streams = 4,
+        mhc_kwargs = dict(
+            use_triton_sinkhorn = False
+        )
     )
     img = torch.randn(1, 3, 256, 256)

{hyper_connections-0.4.6.dist-info → hyper_connections-0.4.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hyper-connections
-Version: 0.4.6
+Version: 0.4.8
 Summary: Hyper-Connections
 Project-URL: Homepage, https://pypi.org/project/hyper-connections/
 Project-URL: Repository, https://github.com/lucidrains/hyper-connections
@@ -35,6 +35,7 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9
 Requires-Dist: einops>=0.8.1
+Requires-Dist: torch-einops-utils>=0.0.20
 Requires-Dist: torch>=2.5
 Provides-Extra: examples
 Description-Content-Type: text/markdown

{hyper_connections-0.4.6.dist-info → hyper_connections-0.4.8.dist-info}/RECORD RENAMED Viewed

@@ -3,12 +3,12 @@ hyper_connections/hyper_connections.py,sha256=2F-104cGE82KCK0KeC07NSOJNPT-0PCtvX
 hyper_connections/hyper_connections_channel_first.py,sha256=5vAen4WXxNI9K07ndLBQJwdJv-OjoXznta5EIQTaQNA,6512
 hyper_connections/hyper_connections_with_multi_branch_inputs.py,sha256=yn2AlFB6qCYQeRhhhaMlCM3mxxLEdWCYwU2p9TsMwWI,7835
 hyper_connections/hyper_connections_with_multi_input_streams.py,sha256=iFPw5pgCRHTo16nBJ2PExKSrvTyCh7ba7Py14P1oSPE,11311
-hyper_connections/mHCv2.py,sha256=k-qOt-lnDR-jnwJLTVxlNFMkJZQGT55ExpE1QxUEPco,17503
+hyper_connections/mHCv2.py,sha256=LpMtlrb7Vfi2qq_cqPl9fajA5SxkMTl5QGpmvBJyD1M,18360
 hyper_connections/manifold_constrained_hyper_connections.py,sha256=E4os-6q_SMjJO1JD0EG8rFTCXA7MQoy-aqUlM7KVS5Q,18269
 hyper_connections/residuals.py,sha256=JVSFJj_H7xQ3_Fd-pZH5Hdv9SveAQu29jQNvMyom5ek,921
 hyper_connections/triton_sinkhorn.py,sha256=n2WyQcUemtv5T5Sk2nljnSpV2hEED4I3HaPsIUy4638,5905
-hyper_connections/vit.py,sha256=BOWVfCAIzDQdnTq8OBzNUyiKGGILYZkIQ6mr1GKJVB0,5225
-hyper_connections-0.4.6.dist-info/METADATA,sha256=ZU6BE9Y90LRK2Fg3WXg2Y8dKDg_qaUyyYELsqaPGD6c,6704
-hyper_connections-0.4.6.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-hyper_connections-0.4.6.dist-info/licenses/LICENSE,sha256=E7RGS7kpJIStk5za_-4DVhWEAamf65EU0CNML25mq4c,1066
-hyper_connections-0.4.6.dist-info/RECORD,,
+hyper_connections/vit.py,sha256=dh8AVMUPaUHuWxXJEHoMW_G5nj-EQQjDmgbPwwhiq5g,5215
+hyper_connections-0.4.8.dist-info/METADATA,sha256=vevhBHad-7ffu1KBFcazUqU5C2XVRy1LlZkIxJUNDIs,6746
+hyper_connections-0.4.8.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+hyper_connections-0.4.8.dist-info/licenses/LICENSE,sha256=E7RGS7kpJIStk5za_-4DVhWEAamf65EU0CNML25mq4c,1066
+hyper_connections-0.4.8.dist-info/RECORD,,

{hyper_connections-0.4.6.dist-info → hyper_connections-0.4.8.dist-info}/WHEEL RENAMED Viewed

File without changes

{hyper_connections-0.4.6.dist-info → hyper_connections-0.4.8.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

hyper-connections 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl

hyper-connections 0.4.6py3-none-any.whl → 0.4.8py3-none-any.whl