PyPI - hyper-connections - Versions diffs - 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl - Mend

hyper-connections 0.4.3py3-none-any.whl → 0.4.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

hyper_connections/mHCv2.py CHANGED Viewed

@@ -69,8 +69,8 @@ def log_domain_sinkhorn_knopps(log_alpha, iters = 20):
     log_alpha = log_alpha.float()
     for _ in range(iters):
-        log_alpha = log_alpha - log_alpha.logsumexp(dim = -2, keepdim = True)
-        log_alpha = log_alpha - log_alpha.logsumexp(dim = -1, keepdim = True)
+        log_alpha = F.log_softmax(log_alpha, dim = -2)
+        log_alpha = F.log_softmax(log_alpha, dim = -1)
     return log_alpha.exp().to(dtype)
@@ -109,6 +109,7 @@ def get_init_and_expand_reduce_stream_functions(
     add_attn_pool_reduce_stream = False,
     disable = None,
     sinkhorn_iters = 20,
+    use_triton_sinkhorn = False,
     **kwargs
 ):
     disable = default(disable, num_streams == 1 and num_fracs == 1)
@@ -116,7 +117,7 @@ def get_init_and_expand_reduce_stream_functions(
     hyper_conn_klass = ManifoldConstrainedHyperConnections if not disable else Residual
     kwargs.pop('add_attn_pool_reduce_stream', None)
-    init_hyper_conn_fn = partial(hyper_conn_klass, num_streams, num_fracs = num_fracs, sinkhorn_iters = sinkhorn_iters, **kwargs)
+    init_hyper_conn_fn = partial(hyper_conn_klass, num_streams, num_fracs = num_fracs, sinkhorn_iters = sinkhorn_iters, use_triton_sinkhorn = use_triton_sinkhorn, **kwargs)
     expand_reduce_fns = get_expand_reduce_stream_functions(
         num_streams,
         add_stream_embed = add_stream_embed,
@@ -231,7 +232,7 @@ class ManifoldConstrainedHyperConnections(Module):
         residual_mix_constraint_fn: Callable | None = None,
         forward_method_names: tuple[str, ...] = (),
         num_dynamic_alpha_proposals = 1,
+        use_triton_sinkhorn = False,
     ):
         """
         Appendix J, Algorithm2 in - https://arxiv.org/abs/2409.19606
@@ -306,10 +307,22 @@ class ManifoldConstrainedHyperConnections(Module):
         # Hres constraint related
         # by default is sinkhorn
-        self.residual_mix_constraint_fn = default(
-            residual_mix_constraint_fn,
-            partial(sinkhorn_knopps if not log_domain_sinkhorn else log_domain_sinkhorn_knopps, iters = sinkhorn_iters)
-        )
+        use_triton_sinkhorn_and_available = False
+        if use_triton_sinkhorn:
+            try:
+                from hyper_connections.triton_sinkhorn import triton_sinkhorn, is_triton_available
+                use_triton_sinkhorn_and_available = is_triton_available()
+            except ImportError:
+                use_triton_sinkhorn_and_available = False
+        if use_triton_sinkhorn_and_available:
+            self.residual_mix_constraint_fn = partial(triton_sinkhorn, iters = sinkhorn_iters)
+        else:
+            self.residual_mix_constraint_fn = default(
+                residual_mix_constraint_fn,
+                partial(sinkhorn_knopps if not log_domain_sinkhorn else log_domain_sinkhorn_knopps, iters = sinkhorn_iters)
+            )
         # dropouts

hyper_connections/manifold_constrained_hyper_connections.py CHANGED Viewed

@@ -65,8 +65,8 @@ def log_domain_sinkhorn_knopps(log_alpha, iters = 20):
     log_alpha = log_alpha.float()
     for _ in range(iters):
-        log_alpha = log_alpha - log_alpha.logsumexp(dim = -2, keepdim = True)
-        log_alpha = log_alpha - log_alpha.logsumexp(dim = -1, keepdim = True)
+        log_alpha = F.log_softmax(log_alpha, dim = -2)
+        log_alpha = F.log_softmax(log_alpha, dim = -1)
     return log_alpha.exp().to(dtype)

hyper_connections/triton_sinkhorn.py ADDED Viewed

@@ -0,0 +1,160 @@
+import torch
+import triton
+import triton.language as tl
+from torch.autograd import Function
+@triton.jit
+def sinkhorn_kernel_forward_log(
+    input_ptr,
+    output_ptr,
+    M, N,
+    stride_b, stride_m, stride_n,
+    iters: tl.constexpr,
+    BLOCK_SIZE: tl.constexpr,
+):
+    pid_b = tl.program_id(0)
+    offs_m = tl.arange(0, BLOCK_SIZE)
+    offs_n = tl.arange(0, BLOCK_SIZE)
+    mask = (offs_m[:, None] < M) & (offs_n[None, :] < N)
+    curr_input_ptr = input_ptr + pid_b * stride_b
+    # Use a large negative value for log-space padding to avoid interference
+    log_alpha = tl.load(curr_input_ptr + offs_m[:, None] * stride_m + offs_n[None, :] * stride_n, mask=mask, other=-1e10)
+    # Use static_range to force unrolling and avoid compiler bugs with dynamic loops in this environment
+    for _ in tl.static_range(iters):
+        # Column-wise Log-Softmax (dim=-2)
+        col_max = tl.max(tl.where(mask, log_alpha, -1e10), axis=0)
+        exp_weights_col = tl.exp(log_alpha - col_max[None, :])
+        exp_weights_col = tl.where(mask, exp_weights_col, 0.0)
+        col_lse = col_max + tl.log(tl.sum(exp_weights_col, axis=0))
+        log_alpha = log_alpha - col_lse[None, :]
+        log_alpha = tl.where(mask, log_alpha, -1e10)
+        # Row-wise Log-Softmax (dim=-1)
+        row_max = tl.max(tl.where(mask, log_alpha, -1e10), axis=1)
+        exp_weights_row = tl.exp(log_alpha - row_max[:, None])
+        exp_weights_row = tl.where(mask, exp_weights_row, 0.0)
+        row_lse = row_max + tl.log(tl.sum(exp_weights_row, axis=1))
+        log_alpha = log_alpha - row_lse[:, None]
+        log_alpha = tl.where(mask, log_alpha, -1e10)
+    result_alpha = tl.exp(log_alpha)
+    result_alpha = tl.where(mask, result_alpha, 0.0)
+    curr_output_ptr = output_ptr + pid_b * stride_b
+    tl.store(curr_output_ptr + offs_m[:, None] * stride_m + offs_n[None, :] * stride_n, result_alpha, mask=mask)
+@triton.jit
+def sinkhorn_kernel_backward_log(
+    grad_output_ptr,
+    output_ptr,
+    grad_input_ptr,
+    M, N,
+    stride_b, stride_m, stride_n,
+    iters: tl.constexpr,
+    BLOCK_SIZE: tl.constexpr,
+):
+    pid_b = tl.program_id(0)
+    offs_m = tl.arange(0, BLOCK_SIZE)
+    offs_n = tl.arange(0, BLOCK_SIZE)
+    mask = (offs_m[:, None] < M) & (offs_n[None, :] < N)
+    curr_output_ptr = output_ptr + pid_b * stride_b
+    curr_grad_output_ptr = grad_output_ptr + pid_b * stride_b
+    alpha = tl.load(curr_output_ptr + offs_m[:, None] * stride_m + offs_n[None, :] * stride_n, mask=mask, other=0.0)
+    grad_alpha = tl.load(curr_grad_output_ptr + offs_m[:, None] * stride_m + offs_n[None, :] * stride_n, mask=mask, other=0.0)
+    # Ensure they are truly zeroed in padded areas for sum robustness
+    alpha = tl.where(mask, alpha, 0.0)
+    grad_alpha = tl.where(mask, grad_alpha, 0.0)
+    for _ in tl.static_range(iters):
+        # Backward of Row-wise Normalization
+        # Sum only over valid elements
+        row_sum_grad_alpha = tl.sum(tl.where(mask, grad_alpha * alpha, 0.0), axis=1)
+        grad_alpha = grad_alpha - row_sum_grad_alpha[:, None]
+        grad_alpha = tl.where(mask, grad_alpha, 0.0)
+        # Backward of Column-wise Normalization
+        col_sum_grad_alpha = tl.sum(tl.where(mask, grad_alpha * alpha, 0.0), axis=0)
+        grad_alpha = grad_alpha - col_sum_grad_alpha[None, :]
+        grad_alpha = tl.where(mask, grad_alpha, 0.0)
+    grad_input = alpha * grad_alpha
+    curr_grad_input_ptr = grad_input_ptr + pid_b * stride_b
+    tl.store(curr_grad_input_ptr + offs_m[:, None] * stride_m + offs_n[None, :] * stride_n, grad_input, mask=mask)
+class TritonSinkhornFunction(Function):
+    @staticmethod
+    def forward(ctx, log_alpha, iters=20):
+        # Handle matrix size limits to avoid register spilling/SRAM overflow
+        M, N = log_alpha.shape[-2:]
+        if max(M, N) > 256:
+             from hyper_connections.mHCv2 import log_domain_sinkhorn_knopps
+             return log_domain_sinkhorn_knopps(log_alpha, iters)
+        batch_shape = log_alpha.shape[:-2]
+        log_alpha_flat = log_alpha.view(-1, M, N).contiguous()
+        B = log_alpha_flat.shape[0]
+        output = torch.empty_like(log_alpha_flat)
+        BLOCK_SIZE = max(32, triton.next_power_of_2(max(M, N)))
+        sinkhorn_kernel_forward_log[(B,)](
+            log_alpha_flat,
+            output,
+            M, N,
+            log_alpha_flat.stride(0), log_alpha_flat.stride(1), log_alpha_flat.stride(2),
+            iters=iters,
+            BLOCK_SIZE=BLOCK_SIZE,
+            num_warps=4
+        )
+        ctx.save_for_backward(output)
+        ctx.iters = iters
+        return output.view(*batch_shape, M, N)
+    @staticmethod
+    def backward(ctx, grad_output):
+        output, = ctx.saved_tensors
+        iters = ctx.iters
+        B, M, N = output.shape
+        BLOCK_SIZE = max(32, triton.next_power_of_2(max(M, N)))
+        # Explicit contiguity for grad_output
+        grad_output = grad_output.contiguous()
+        grad_input = torch.empty_like(output)
+        sinkhorn_kernel_backward_log[(B,)](
+            grad_output.view(B, M, N),
+            output,
+            grad_input,
+            M, N,
+            grad_input.stride(0), grad_input.stride(1), grad_input.stride(2),
+            iters=iters,
+            BLOCK_SIZE=BLOCK_SIZE,
+            num_warps=4
+        )
+        return grad_input.view_as(grad_output), None
+def triton_sinkhorn(log_alpha, iters=20):
+    if log_alpha.is_cuda:
+        try:
+            return TritonSinkhornFunction.apply(log_alpha, iters)
+        except Exception:
+            pass
+    # fallback
+    from hyper_connections.mHCv2 import sinkhorn_knopps
+    return sinkhorn_knopps(log_alpha, iters = iters)
+def is_triton_available():
+    try:
+        import triton
+        return torch.cuda.is_available()
+    except ImportError:
+        return False

{hyper_connections-0.4.3.dist-info → hyper_connections-0.4.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hyper-connections
-Version: 0.4.3
+Version: 0.4.5
 Summary: Hyper-Connections
 Project-URL: Homepage, https://pypi.org/project/hyper-connections/
 Project-URL: Repository, https://github.com/lucidrains/hyper-connections

{hyper_connections-0.4.3.dist-info → hyper_connections-0.4.5.dist-info}/RECORD RENAMED Viewed

@@ -3,11 +3,12 @@ hyper_connections/hyper_connections.py,sha256=2F-104cGE82KCK0KeC07NSOJNPT-0PCtvX
 hyper_connections/hyper_connections_channel_first.py,sha256=5vAen4WXxNI9K07ndLBQJwdJv-OjoXznta5EIQTaQNA,6512
 hyper_connections/hyper_connections_with_multi_branch_inputs.py,sha256=yn2AlFB6qCYQeRhhhaMlCM3mxxLEdWCYwU2p9TsMwWI,7835
 hyper_connections/hyper_connections_with_multi_input_streams.py,sha256=iFPw5pgCRHTo16nBJ2PExKSrvTyCh7ba7Py14P1oSPE,11311
-hyper_connections/mHCv2.py,sha256=j3A4XbisBXzqdW9vYCrPRrK2M6iPAqMOjxGCj3lsQ-g,16810
-hyper_connections/manifold_constrained_hyper_connections.py,sha256=rQzAIkP84adzEVyrMasqMuZV76-6LAioUbwKnABcBto,18315
+hyper_connections/mHCv2.py,sha256=wCtp87OFI3QfosdSL-1qwsiQN9f8gX32_0r8GQGO7P0,17411
+hyper_connections/manifold_constrained_hyper_connections.py,sha256=E4os-6q_SMjJO1JD0EG8rFTCXA7MQoy-aqUlM7KVS5Q,18269
 hyper_connections/residuals.py,sha256=JVSFJj_H7xQ3_Fd-pZH5Hdv9SveAQu29jQNvMyom5ek,921
+hyper_connections/triton_sinkhorn.py,sha256=n2WyQcUemtv5T5Sk2nljnSpV2hEED4I3HaPsIUy4638,5905
 hyper_connections/vit.py,sha256=BOWVfCAIzDQdnTq8OBzNUyiKGGILYZkIQ6mr1GKJVB0,5225
-hyper_connections-0.4.3.dist-info/METADATA,sha256=h_zeG-qAgyg-vDktRMaPpGuYzmA-kxrcUmPvVQ4CYvs,6704
-hyper_connections-0.4.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-hyper_connections-0.4.3.dist-info/licenses/LICENSE,sha256=E7RGS7kpJIStk5za_-4DVhWEAamf65EU0CNML25mq4c,1066
-hyper_connections-0.4.3.dist-info/RECORD,,
+hyper_connections-0.4.5.dist-info/METADATA,sha256=sWVb_-yVRmxL8AsAPsk0VdRXOa25uG9zKNc8S_oAXg8,6704
+hyper_connections-0.4.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+hyper_connections-0.4.5.dist-info/licenses/LICENSE,sha256=E7RGS7kpJIStk5za_-4DVhWEAamf65EU0CNML25mq4c,1066
+hyper_connections-0.4.5.dist-info/RECORD,,

{hyper_connections-0.4.3.dist-info → hyper_connections-0.4.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{hyper_connections-0.4.3.dist-info → hyper_connections-0.4.5.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

hyper-connections 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl

hyper-connections 0.4.3py3-none-any.whl → 0.4.5py3-none-any.whl